c++: over-eager friend matching [PR109649]
[official-gcc.git] / libgomp / target.c
blobb30c6a50c7e24bdef11f6f235cf758f097cccb16
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
59 ialias_redirect (GOMP_task)
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
64 return hash_pointer ((void *) element);
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
70 return x == y;
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
96 /* Total number of offload images. */
97 static int num_offload_images;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
102 /* Total number of available devices. */
103 static int num_devices;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
116 void *ret = realloc (old, size);
117 if (ret == NULL)
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 return ret;
125 attribute_hidden void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
131 attribute_hidden int
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp;
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
141 if (remapped && device_id == GOMP_DEVICE_ICV)
143 struct gomp_task_icv *icv = gomp_icv (false);
144 device_id = icv->default_device_var;
145 remapped = false;
148 if (device_id < 0)
150 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device))
152 return NULL;
153 if (device_id == omp_invalid_device)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
159 return NULL;
161 else if (device_id >= gomp_get_num_devices ())
163 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id != num_devices_openmp)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
168 return NULL;
171 gomp_mutex_lock (&devices[device_id].lock);
172 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
173 gomp_init_device (&devices[device_id]);
174 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
176 gomp_mutex_unlock (&devices[device_id].lock);
178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
182 return NULL;
184 gomp_mutex_unlock (&devices[device_id].lock);
186 return &devices[device_id];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
193 if (key->host_start != key->host_end)
194 return splay_tree_lookup (mem_map, key);
196 key->host_end++;
197 splay_tree_key n = splay_tree_lookup (mem_map, key);
198 key->host_end--;
199 if (n)
200 return n;
201 key->host_start--;
202 n = splay_tree_lookup (mem_map, key);
203 key->host_start++;
204 if (n)
205 return n;
206 return splay_tree_lookup (mem_map, key);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
212 return reverse_splay_tree_lookup (mem_map_rev, key);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
218 if (key->host_start != key->host_end)
219 return splay_tree_lookup (mem_map, key);
221 key->host_end++;
222 splay_tree_key n = splay_tree_lookup (mem_map, key);
223 key->host_end--;
224 return n;
227 static inline void
228 gomp_device_copy (struct gomp_device_descr *devicep,
229 bool (*copy_func) (int, void *, const void *, size_t),
230 const char *dst, void *dstaddr,
231 const char *src, const void *srcaddr,
232 size_t size)
234 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
236 gomp_mutex_unlock (&devicep->lock);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
242 static inline void
243 goacc_device_copy_async (struct gomp_device_descr *devicep,
244 bool (*copy_func) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue *),
246 const char *dst, void *dstaddr,
247 const char *src, const void *srcaddr,
248 const void *srcaddr_orig,
249 size_t size, struct goacc_asyncqueue *aq)
251 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
253 gomp_mutex_unlock (&devicep->lock);
254 if (srcaddr_orig && srcaddr_orig != srcaddr)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src, srcaddr_orig, srcaddr_orig + size,
259 src, srcaddr, srcaddr + size,
260 dst, dstaddr, dstaddr + size);
261 else
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src, srcaddr, srcaddr + size,
265 dst, dstaddr, dstaddr + size);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
275 size_t start, end;
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
282 void *buf;
283 struct target_mem_desc *tgt;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk *chunks;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
289 be performed. */
290 long chunk_cnt;
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
294 long use_cnt;
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example). The exception is for EPHEMERAL data, that we know is available
314 already "by construction". */
316 static inline void
317 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
319 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
320 return;
321 if (cbuf->chunk_cnt)
323 if (cbuf->chunk_cnt < 0)
324 return;
325 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
327 cbuf->chunk_cnt = -1;
328 return;
330 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
332 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
333 cbuf->use_cnt++;
334 return;
336 /* If the last chunk is only used by one mapping, discard it,
337 as it will be one host to device copy anyway and
338 memcpying it around will only waste cycles. */
339 if (cbuf->use_cnt == 1)
340 cbuf->chunk_cnt--;
342 cbuf->chunks[cbuf->chunk_cnt].start = start;
343 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
344 cbuf->chunk_cnt++;
345 cbuf->use_cnt = 1;
348 /* Return true for mapping kinds which need to copy data from the
349 host to device for regions that weren't previously mapped. */
351 static inline bool
352 gomp_to_device_kind_p (int kind)
354 switch (kind)
356 case GOMP_MAP_ALLOC:
357 case GOMP_MAP_FROM:
358 case GOMP_MAP_FORCE_ALLOC:
359 case GOMP_MAP_FORCE_FROM:
360 case GOMP_MAP_ALWAYS_FROM:
361 return false;
362 default:
363 return true;
367 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
368 non-NULL), when the source data is stack or may otherwise be deallocated
369 before the asynchronous copy takes place, EPHEMERAL must be passed as
370 TRUE. */
372 attribute_hidden void
373 gomp_copy_host2dev (struct gomp_device_descr *devicep,
374 struct goacc_asyncqueue *aq,
375 void *d, const void *h, size_t sz,
376 bool ephemeral, struct gomp_coalesce_buf *cbuf)
378 if (cbuf)
380 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
381 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
383 long first = 0;
384 long last = cbuf->chunk_cnt - 1;
385 while (first <= last)
387 long middle = (first + last) >> 1;
388 if (cbuf->chunks[middle].end <= doff)
389 first = middle + 1;
390 else if (cbuf->chunks[middle].start <= doff)
392 if (doff + sz > cbuf->chunks[middle].end)
394 gomp_mutex_unlock (&devicep->lock);
395 gomp_fatal ("internal libgomp cbuf error");
398 /* In an asynchronous context, verify that CBUF isn't used
399 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
400 if (__builtin_expect (aq != NULL, 0))
401 assert (ephemeral);
403 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
404 h, sz);
405 return;
407 else
408 last = middle - 1;
413 if (__builtin_expect (aq != NULL, 0))
415 void *h_buf = (void *) h;
416 if (ephemeral)
418 /* We're queueing up an asynchronous copy from data that may
419 disappear before the transfer takes place (i.e. because it is a
420 stack local in a function that is no longer executing). As we've
421 not been able to use CBUF, make a copy of the data into a
422 temporary buffer. */
423 h_buf = gomp_malloc (sz);
424 memcpy (h_buf, h, sz);
426 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
427 "dev", d, "host", h_buf, h, sz, aq);
428 if (ephemeral)
429 /* Free once the transfer has completed. */
430 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
432 else
433 gomp_device_copy (devicep, devicep->host2dev_func,
434 "dev", d, "host", h, sz);
437 attribute_hidden void
438 gomp_copy_dev2host (struct gomp_device_descr *devicep,
439 struct goacc_asyncqueue *aq,
440 void *h, const void *d, size_t sz)
442 if (__builtin_expect (aq != NULL, 0))
443 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
444 "host", h, "dev", d, NULL, sz, aq);
445 else
446 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
449 static void
450 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
452 if (!devicep->free_func (devicep->target_id, devptr))
454 gomp_mutex_unlock (&devicep->lock);
455 gomp_fatal ("error in freeing device memory block at %p", devptr);
459 /* Increment reference count of a splay_tree_key region K by 1.
460 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
461 increment the value if refcount is not yet contained in the set (used for
462 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
463 once for each construct). */
465 static inline void
466 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
468 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
469 return;
471 uintptr_t *refcount_ptr = &k->refcount;
473 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
474 refcount_ptr = &k->structelem_refcount;
475 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
476 refcount_ptr = k->structelem_refcount_ptr;
478 if (refcount_set)
480 if (htab_find (*refcount_set, refcount_ptr))
481 return;
482 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
483 *slot = refcount_ptr;
486 *refcount_ptr += 1;
487 return;
490 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
491 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
492 track already seen refcounts, and only adjust the value if refcount is not
493 yet contained in the set (like gomp_increment_refcount).
495 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
496 it is already zero and we know we decremented it earlier. This signals that
497 associated maps should be copied back to host.
499 *DO_REMOVE is set to true when we this is the first handling of this refcount
500 and we are setting it to zero. This signals a removal of this key from the
501 splay-tree map.
503 Copy and removal are separated due to cases like handling of structure
504 elements, e.g. each map of a structure element representing a possible copy
505 out of a structure field has to be handled individually, but we only signal
506 removal for one (the first encountered) sibing map. */
508 static inline void
509 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
510 bool *do_copy, bool *do_remove)
512 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
514 *do_copy = *do_remove = false;
515 return;
518 uintptr_t *refcount_ptr = &k->refcount;
520 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
521 refcount_ptr = &k->structelem_refcount;
522 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
523 refcount_ptr = k->structelem_refcount_ptr;
525 bool new_encountered_refcount;
526 bool set_to_zero = false;
527 bool is_zero = false;
529 uintptr_t orig_refcount = *refcount_ptr;
531 if (refcount_set)
533 if (htab_find (*refcount_set, refcount_ptr))
535 new_encountered_refcount = false;
536 goto end;
539 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
540 *slot = refcount_ptr;
541 new_encountered_refcount = true;
543 else
544 /* If no refcount_set being used, assume all keys are being decremented
545 for the first time. */
546 new_encountered_refcount = true;
548 if (delete_p)
549 *refcount_ptr = 0;
550 else if (*refcount_ptr > 0)
551 *refcount_ptr -= 1;
553 end:
554 if (*refcount_ptr == 0)
556 if (orig_refcount > 0)
557 set_to_zero = true;
559 is_zero = true;
562 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
563 *do_remove = (new_encountered_refcount && set_to_zero);
566 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
567 gomp_map_0len_lookup found oldn for newn.
568 Helper function of gomp_map_vars. */
570 static inline void
571 gomp_map_vars_existing (struct gomp_device_descr *devicep,
572 struct goacc_asyncqueue *aq, splay_tree_key oldn,
573 splay_tree_key newn, struct target_var_desc *tgt_var,
574 unsigned char kind, bool always_to_flag, bool implicit,
575 struct gomp_coalesce_buf *cbuf,
576 htab_t *refcount_set)
578 assert (kind != GOMP_MAP_ATTACH
579 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
581 tgt_var->key = oldn;
582 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
583 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
584 tgt_var->is_attach = false;
585 tgt_var->offset = newn->host_start - oldn->host_start;
587 /* For implicit maps, old contained in new is valid. */
588 bool implicit_subset = (implicit
589 && newn->host_start <= oldn->host_start
590 && oldn->host_end <= newn->host_end);
591 if (implicit_subset)
592 tgt_var->length = oldn->host_end - oldn->host_start;
593 else
594 tgt_var->length = newn->host_end - newn->host_start;
596 if ((kind & GOMP_MAP_FLAG_FORCE)
597 /* For implicit maps, old contained in new is valid. */
598 || !(implicit_subset
599 /* Otherwise, new contained inside old is considered valid. */
600 || (oldn->host_start <= newn->host_start
601 && newn->host_end <= oldn->host_end)))
603 gomp_mutex_unlock (&devicep->lock);
604 gomp_fatal ("Trying to map into device [%p..%p) object when "
605 "[%p..%p) is already mapped",
606 (void *) newn->host_start, (void *) newn->host_end,
607 (void *) oldn->host_start, (void *) oldn->host_end);
610 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
612 /* Implicit + always should not happen. If this does occur, below
613 address/length adjustment is a TODO. */
614 assert (!implicit_subset);
616 if (oldn->aux && oldn->aux->attach_count)
618 /* We have to be careful not to overwrite still attached pointers
619 during the copyback to host. */
620 uintptr_t addr = newn->host_start;
621 while (addr < newn->host_end)
623 size_t i = (addr - oldn->host_start) / sizeof (void *);
624 if (oldn->aux->attach_count[i] == 0)
625 gomp_copy_host2dev (devicep, aq,
626 (void *) (oldn->tgt->tgt_start
627 + oldn->tgt_offset
628 + addr - oldn->host_start),
629 (void *) addr,
630 sizeof (void *), false, cbuf);
631 addr += sizeof (void *);
634 else
635 gomp_copy_host2dev (devicep, aq,
636 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
637 + newn->host_start - oldn->host_start),
638 (void *) newn->host_start,
639 newn->host_end - newn->host_start, false, cbuf);
642 gomp_increment_refcount (oldn, refcount_set);
645 static int
646 get_kind (bool short_mapkind, void *kinds, int idx)
648 if (!short_mapkind)
649 return ((unsigned char *) kinds)[idx];
651 int val = ((unsigned short *) kinds)[idx];
652 if (GOMP_MAP_IMPLICIT_P (val))
653 val &= ~GOMP_MAP_IMPLICIT;
654 return val;
658 static bool
659 get_implicit (bool short_mapkind, void *kinds, int idx)
661 if (!short_mapkind)
662 return false;
664 int val = ((unsigned short *) kinds)[idx];
665 return GOMP_MAP_IMPLICIT_P (val);
668 static void
669 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
670 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
671 struct gomp_coalesce_buf *cbuf,
672 bool allow_zero_length_array_sections)
674 struct gomp_device_descr *devicep = tgt->device_descr;
675 struct splay_tree_s *mem_map = &devicep->mem_map;
676 struct splay_tree_key_s cur_node;
678 cur_node.host_start = host_ptr;
679 if (cur_node.host_start == (uintptr_t) NULL)
681 cur_node.tgt_offset = (uintptr_t) NULL;
682 gomp_copy_host2dev (devicep, aq,
683 (void *) (tgt->tgt_start + target_offset),
684 (void *) &cur_node.tgt_offset, sizeof (void *),
685 true, cbuf);
686 return;
688 /* Add bias to the pointer value. */
689 cur_node.host_start += bias;
690 cur_node.host_end = cur_node.host_start;
691 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
692 if (n == NULL)
694 if (allow_zero_length_array_sections)
695 cur_node.tgt_offset = 0;
696 else
698 gomp_mutex_unlock (&devicep->lock);
699 gomp_fatal ("Pointer target of array section wasn't mapped");
702 else
704 cur_node.host_start -= n->host_start;
705 cur_node.tgt_offset
706 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
707 /* At this point tgt_offset is target address of the
708 array section. Now subtract bias to get what we want
709 to initialize the pointer with. */
710 cur_node.tgt_offset -= bias;
712 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
713 (void *) &cur_node.tgt_offset, sizeof (void *),
714 true, cbuf);
717 static void
718 gomp_map_fields_existing (struct target_mem_desc *tgt,
719 struct goacc_asyncqueue *aq, splay_tree_key n,
720 size_t first, size_t i, void **hostaddrs,
721 size_t *sizes, void *kinds,
722 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
724 struct gomp_device_descr *devicep = tgt->device_descr;
725 struct splay_tree_s *mem_map = &devicep->mem_map;
726 struct splay_tree_key_s cur_node;
727 int kind;
728 bool implicit;
729 const bool short_mapkind = true;
730 const int typemask = short_mapkind ? 0xff : 0x7;
732 cur_node.host_start = (uintptr_t) hostaddrs[i];
733 cur_node.host_end = cur_node.host_start + sizes[i];
734 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
735 kind = get_kind (short_mapkind, kinds, i);
736 implicit = get_implicit (short_mapkind, kinds, i);
737 if (n2
738 && n2->tgt == n->tgt
739 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
741 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
742 kind & typemask, false, implicit, cbuf,
743 refcount_set);
744 return;
746 if (sizes[i] == 0)
748 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
750 cur_node.host_start--;
751 n2 = splay_tree_lookup (mem_map, &cur_node);
752 cur_node.host_start++;
753 if (n2
754 && n2->tgt == n->tgt
755 && n2->host_start - n->host_start
756 == n2->tgt_offset - n->tgt_offset)
758 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
759 kind & typemask, false, implicit, cbuf,
760 refcount_set);
761 return;
764 cur_node.host_end++;
765 n2 = splay_tree_lookup (mem_map, &cur_node);
766 cur_node.host_end--;
767 if (n2
768 && n2->tgt == n->tgt
769 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
771 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
772 kind & typemask, false, implicit, cbuf,
773 refcount_set);
774 return;
777 gomp_mutex_unlock (&devicep->lock);
778 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
779 "other mapped elements from the same structure weren't mapped "
780 "together with it", (void *) cur_node.host_start,
781 (void *) cur_node.host_end);
784 attribute_hidden void
785 gomp_attach_pointer (struct gomp_device_descr *devicep,
786 struct goacc_asyncqueue *aq, splay_tree mem_map,
787 splay_tree_key n, uintptr_t attach_to, size_t bias,
788 struct gomp_coalesce_buf *cbufp,
789 bool allow_zero_length_array_sections)
791 struct splay_tree_key_s s;
792 size_t size, idx;
794 if (n == NULL)
796 gomp_mutex_unlock (&devicep->lock);
797 gomp_fatal ("enclosing struct not mapped for attach");
800 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
801 /* We might have a pointer in a packed struct: however we cannot have more
802 than one such pointer in each pointer-sized portion of the struct, so
803 this is safe. */
804 idx = (attach_to - n->host_start) / sizeof (void *);
806 if (!n->aux)
807 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
809 if (!n->aux->attach_count)
810 n->aux->attach_count
811 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
813 if (n->aux->attach_count[idx] < UINTPTR_MAX)
814 n->aux->attach_count[idx]++;
815 else
817 gomp_mutex_unlock (&devicep->lock);
818 gomp_fatal ("attach count overflow");
821 if (n->aux->attach_count[idx] == 1)
823 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
824 - n->host_start;
825 uintptr_t target = (uintptr_t) *(void **) attach_to;
826 splay_tree_key tn;
827 uintptr_t data;
829 if ((void *) target == NULL)
831 gomp_mutex_unlock (&devicep->lock);
832 gomp_fatal ("attempt to attach null pointer");
835 s.host_start = target + bias;
836 s.host_end = s.host_start + 1;
837 tn = splay_tree_lookup (mem_map, &s);
839 if (!tn)
841 if (allow_zero_length_array_sections)
842 /* When allowing attachment to zero-length array sections, we
843 allow attaching to NULL pointers when the target region is not
844 mapped. */
845 data = 0;
846 else
848 gomp_mutex_unlock (&devicep->lock);
849 gomp_fatal ("pointer target not mapped for attach");
852 else
853 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
855 gomp_debug (1,
856 "%s: attaching host %p, target %p (struct base %p) to %p\n",
857 __FUNCTION__, (void *) attach_to, (void *) devptr,
858 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
860 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
861 sizeof (void *), true, cbufp);
863 else
864 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
865 (void *) attach_to, (int) n->aux->attach_count[idx]);
868 attribute_hidden void
869 gomp_detach_pointer (struct gomp_device_descr *devicep,
870 struct goacc_asyncqueue *aq, splay_tree_key n,
871 uintptr_t detach_from, bool finalize,
872 struct gomp_coalesce_buf *cbufp)
874 size_t idx;
876 if (n == NULL)
878 gomp_mutex_unlock (&devicep->lock);
879 gomp_fatal ("enclosing struct not mapped for detach");
882 idx = (detach_from - n->host_start) / sizeof (void *);
884 if (!n->aux || !n->aux->attach_count)
886 gomp_mutex_unlock (&devicep->lock);
887 gomp_fatal ("no attachment counters for struct");
890 if (finalize)
891 n->aux->attach_count[idx] = 1;
893 if (n->aux->attach_count[idx] == 0)
895 gomp_mutex_unlock (&devicep->lock);
896 gomp_fatal ("attach count underflow");
898 else
899 n->aux->attach_count[idx]--;
901 if (n->aux->attach_count[idx] == 0)
903 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
904 - n->host_start;
905 uintptr_t target = (uintptr_t) *(void **) detach_from;
907 gomp_debug (1,
908 "%s: detaching host %p, target %p (struct base %p) to %p\n",
909 __FUNCTION__, (void *) detach_from, (void *) devptr,
910 (void *) (n->tgt->tgt_start + n->tgt_offset),
911 (void *) target);
913 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
914 sizeof (void *), true, cbufp);
916 else
917 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
918 (void *) detach_from, (int) n->aux->attach_count[idx]);
921 attribute_hidden uintptr_t
922 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
924 if (tgt->list[i].key != NULL)
925 return tgt->list[i].key->tgt->tgt_start
926 + tgt->list[i].key->tgt_offset
927 + tgt->list[i].offset;
929 switch (tgt->list[i].offset)
931 case OFFSET_INLINED:
932 return (uintptr_t) hostaddrs[i];
934 case OFFSET_POINTER:
935 return 0;
937 case OFFSET_STRUCT:
938 return tgt->list[i + 1].key->tgt->tgt_start
939 + tgt->list[i + 1].key->tgt_offset
940 + tgt->list[i + 1].offset
941 + (uintptr_t) hostaddrs[i]
942 - (uintptr_t) hostaddrs[i + 1];
944 default:
945 return tgt->tgt_start + tgt->list[i].offset;
949 static inline __attribute__((always_inline)) struct target_mem_desc *
950 gomp_map_vars_internal (struct gomp_device_descr *devicep,
951 struct goacc_asyncqueue *aq, size_t mapnum,
952 void **hostaddrs, void **devaddrs, size_t *sizes,
953 void *kinds, bool short_mapkind,
954 htab_t *refcount_set,
955 enum gomp_map_vars_kind pragma_kind)
957 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
958 bool has_firstprivate = false;
959 bool has_always_ptrset = false;
960 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
961 const int rshift = short_mapkind ? 8 : 3;
962 const int typemask = short_mapkind ? 0xff : 0x7;
963 struct splay_tree_s *mem_map = &devicep->mem_map;
964 struct splay_tree_key_s cur_node;
965 struct target_mem_desc *tgt
966 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
967 tgt->list_count = mapnum;
968 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
969 tgt->device_descr = devicep;
970 tgt->prev = NULL;
971 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
973 if (mapnum == 0)
975 tgt->tgt_start = 0;
976 tgt->tgt_end = 0;
977 return tgt;
980 tgt_align = sizeof (void *);
981 tgt_size = 0;
982 cbuf.chunks = NULL;
983 cbuf.chunk_cnt = -1;
984 cbuf.use_cnt = 0;
985 cbuf.buf = NULL;
986 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
988 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
989 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
990 cbuf.chunk_cnt = 0;
992 if (pragma_kind & GOMP_MAP_VARS_TARGET)
994 size_t align = 4 * sizeof (void *);
995 tgt_align = align;
996 tgt_size = mapnum * sizeof (void *);
997 cbuf.chunk_cnt = 1;
998 cbuf.use_cnt = 1 + (mapnum > 1);
999 cbuf.chunks[0].start = 0;
1000 cbuf.chunks[0].end = tgt_size;
1003 gomp_mutex_lock (&devicep->lock);
1004 if (devicep->state == GOMP_DEVICE_FINALIZED)
1006 gomp_mutex_unlock (&devicep->lock);
1007 free (tgt);
1008 return NULL;
1011 for (i = 0; i < mapnum; i++)
1013 int kind = get_kind (short_mapkind, kinds, i);
1014 bool implicit = get_implicit (short_mapkind, kinds, i);
1015 if (hostaddrs[i] == NULL
1016 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1018 tgt->list[i].key = NULL;
1019 tgt->list[i].offset = OFFSET_INLINED;
1020 continue;
1022 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1023 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1025 tgt->list[i].key = NULL;
1026 if (!not_found_cnt)
1028 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1029 on a separate construct prior to using use_device_{addr,ptr}.
1030 In OpenMP 5.0, map directives need to be ordered by the
1031 middle-end before the use_device_* clauses. If
1032 !not_found_cnt, all mappings requested (if any) are already
1033 mapped, so use_device_{addr,ptr} can be resolved right away.
1034 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1035 now but would succeed after performing the mappings in the
1036 following loop. We can't defer this always to the second
1037 loop, because it is not even invoked when !not_found_cnt
1038 after the first loop. */
1039 cur_node.host_start = (uintptr_t) hostaddrs[i];
1040 cur_node.host_end = cur_node.host_start;
1041 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1042 if (n != NULL)
1044 cur_node.host_start -= n->host_start;
1045 hostaddrs[i]
1046 = (void *) (n->tgt->tgt_start + n->tgt_offset
1047 + cur_node.host_start);
1049 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1051 gomp_mutex_unlock (&devicep->lock);
1052 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1054 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1055 /* If not present, continue using the host address. */
1057 else
1058 __builtin_unreachable ();
1059 tgt->list[i].offset = OFFSET_INLINED;
1061 else
1062 tgt->list[i].offset = 0;
1063 continue;
1065 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1067 size_t first = i + 1;
1068 size_t last = i + sizes[i];
1069 cur_node.host_start = (uintptr_t) hostaddrs[i];
1070 cur_node.host_end = (uintptr_t) hostaddrs[last]
1071 + sizes[last];
1072 tgt->list[i].key = NULL;
1073 tgt->list[i].offset = OFFSET_STRUCT;
1074 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1075 if (n == NULL)
1077 size_t align = (size_t) 1 << (kind >> rshift);
1078 if (tgt_align < align)
1079 tgt_align = align;
1080 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1081 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1082 tgt_size += cur_node.host_end - cur_node.host_start;
1083 not_found_cnt += last - i;
1084 for (i = first; i <= last; i++)
1086 tgt->list[i].key = NULL;
1087 if (!aq
1088 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1089 & typemask))
1090 gomp_coalesce_buf_add (&cbuf,
1091 tgt_size - cur_node.host_end
1092 + (uintptr_t) hostaddrs[i],
1093 sizes[i]);
1095 i--;
1096 continue;
1098 for (i = first; i <= last; i++)
1099 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1100 sizes, kinds, NULL, refcount_set);
1101 i--;
1102 continue;
1104 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1106 tgt->list[i].key = NULL;
1107 tgt->list[i].offset = OFFSET_POINTER;
1108 has_firstprivate = true;
1109 continue;
1111 else if ((kind & typemask) == GOMP_MAP_ATTACH
1112 || ((kind & typemask)
1113 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1115 tgt->list[i].key = NULL;
1116 has_firstprivate = true;
1117 continue;
1119 cur_node.host_start = (uintptr_t) hostaddrs[i];
1120 if (!GOMP_MAP_POINTER_P (kind & typemask))
1121 cur_node.host_end = cur_node.host_start + sizes[i];
1122 else
1123 cur_node.host_end = cur_node.host_start + sizeof (void *);
1124 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1126 tgt->list[i].key = NULL;
1128 size_t align = (size_t) 1 << (kind >> rshift);
1129 if (tgt_align < align)
1130 tgt_align = align;
1131 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1132 if (!aq)
1133 gomp_coalesce_buf_add (&cbuf, tgt_size,
1134 cur_node.host_end - cur_node.host_start);
1135 tgt_size += cur_node.host_end - cur_node.host_start;
1136 has_firstprivate = true;
1137 continue;
1139 splay_tree_key n;
1140 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1142 n = gomp_map_0len_lookup (mem_map, &cur_node);
1143 if (!n)
1145 tgt->list[i].key = NULL;
1146 tgt->list[i].offset = OFFSET_POINTER;
1147 continue;
1150 else
1151 n = splay_tree_lookup (mem_map, &cur_node);
1152 if (n && n->refcount != REFCOUNT_LINK)
1154 int always_to_cnt = 0;
1155 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1157 bool has_nullptr = false;
1158 size_t j;
1159 for (j = 0; j < n->tgt->list_count; j++)
1160 if (n->tgt->list[j].key == n)
1162 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1163 break;
1165 if (n->tgt->list_count == 0)
1167 /* 'declare target'; assume has_nullptr; it could also be
1168 statically assigned pointer, but that it should be to
1169 the equivalent variable on the host. */
1170 assert (n->refcount == REFCOUNT_INFINITY);
1171 has_nullptr = true;
1173 else
1174 assert (j < n->tgt->list_count);
1175 /* Re-map the data if there is an 'always' modifier or if it a
1176 null pointer was there and non a nonnull has been found; that
1177 permits transparent re-mapping for Fortran array descriptors
1178 which were previously mapped unallocated. */
1179 for (j = i + 1; j < mapnum; j++)
1181 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1182 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1183 && (!has_nullptr
1184 || !GOMP_MAP_POINTER_P (ptr_kind)
1185 || *(void **) hostaddrs[j] == NULL))
1186 break;
1187 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1188 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1189 > cur_node.host_end))
1190 break;
1191 else
1193 has_always_ptrset = true;
1194 ++always_to_cnt;
1198 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1199 kind & typemask, always_to_cnt > 0, implicit,
1200 NULL, refcount_set);
1201 i += always_to_cnt;
1203 else
1205 tgt->list[i].key = NULL;
1207 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1209 /* Not present, hence, skip entry - including its MAP_POINTER,
1210 when existing. */
1211 tgt->list[i].offset = OFFSET_INLINED;
1212 if (i + 1 < mapnum
1213 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1214 == GOMP_MAP_POINTER))
1216 ++i;
1217 tgt->list[i].key = NULL;
1218 tgt->list[i].offset = 0;
1220 continue;
1222 size_t align = (size_t) 1 << (kind >> rshift);
1223 not_found_cnt++;
1224 if (tgt_align < align)
1225 tgt_align = align;
1226 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1227 if (!aq
1228 && gomp_to_device_kind_p (kind & typemask))
1229 gomp_coalesce_buf_add (&cbuf, tgt_size,
1230 cur_node.host_end - cur_node.host_start);
1231 tgt_size += cur_node.host_end - cur_node.host_start;
1232 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1234 size_t j;
1235 int kind;
1236 for (j = i + 1; j < mapnum; j++)
1237 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1238 kinds, j)) & typemask))
1239 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1240 break;
1241 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1242 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1243 > cur_node.host_end))
1244 break;
1245 else
1247 tgt->list[j].key = NULL;
1248 i++;
1254 if (devaddrs)
1256 if (mapnum != 1)
1258 gomp_mutex_unlock (&devicep->lock);
1259 gomp_fatal ("unexpected aggregation");
1261 tgt->to_free = devaddrs[0];
1262 tgt->tgt_start = (uintptr_t) tgt->to_free;
1263 tgt->tgt_end = tgt->tgt_start + sizes[0];
1265 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1267 /* Allocate tgt_align aligned tgt_size block of memory. */
1268 /* FIXME: Perhaps change interface to allocate properly aligned
1269 memory. */
1270 tgt->to_free = devicep->alloc_func (devicep->target_id,
1271 tgt_size + tgt_align - 1);
1272 if (!tgt->to_free)
1274 gomp_mutex_unlock (&devicep->lock);
1275 gomp_fatal ("device memory allocation fail");
1278 tgt->tgt_start = (uintptr_t) tgt->to_free;
1279 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1280 tgt->tgt_end = tgt->tgt_start + tgt_size;
1282 if (cbuf.use_cnt == 1)
1283 cbuf.chunk_cnt--;
1284 if (cbuf.chunk_cnt > 0)
1286 cbuf.buf
1287 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1288 if (cbuf.buf)
1290 cbuf.tgt = tgt;
1291 cbufp = &cbuf;
1295 else
1297 tgt->to_free = NULL;
1298 tgt->tgt_start = 0;
1299 tgt->tgt_end = 0;
1302 tgt_size = 0;
1303 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1304 tgt_size = mapnum * sizeof (void *);
1306 tgt->array = NULL;
1307 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1309 if (not_found_cnt)
1310 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1311 splay_tree_node array = tgt->array;
1312 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1313 uintptr_t field_tgt_base = 0;
1314 splay_tree_key field_tgt_structelem_first = NULL;
1316 for (i = 0; i < mapnum; i++)
1317 if (has_always_ptrset
1318 && tgt->list[i].key
1319 && (get_kind (short_mapkind, kinds, i) & typemask)
1320 == GOMP_MAP_TO_PSET)
1322 splay_tree_key k = tgt->list[i].key;
1323 bool has_nullptr = false;
1324 size_t j;
1325 for (j = 0; j < k->tgt->list_count; j++)
1326 if (k->tgt->list[j].key == k)
1328 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1329 break;
1331 if (k->tgt->list_count == 0)
1332 has_nullptr = true;
1333 else
1334 assert (j < k->tgt->list_count);
1336 tgt->list[i].has_null_ptr_assoc = false;
1337 for (j = i + 1; j < mapnum; j++)
1339 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1340 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1341 && (!has_nullptr
1342 || !GOMP_MAP_POINTER_P (ptr_kind)
1343 || *(void **) hostaddrs[j] == NULL))
1344 break;
1345 else if ((uintptr_t) hostaddrs[j] < k->host_start
1346 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1347 > k->host_end))
1348 break;
1349 else
1351 if (*(void **) hostaddrs[j] == NULL)
1352 tgt->list[i].has_null_ptr_assoc = true;
1353 tgt->list[j].key = k;
1354 tgt->list[j].copy_from = false;
1355 tgt->list[j].always_copy_from = false;
1356 tgt->list[j].is_attach = false;
1357 gomp_increment_refcount (k, refcount_set);
1358 gomp_map_pointer (k->tgt, aq,
1359 (uintptr_t) *(void **) hostaddrs[j],
1360 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1361 - k->host_start),
1362 sizes[j], cbufp, false);
1365 i = j - 1;
1367 else if (tgt->list[i].key == NULL)
1369 int kind = get_kind (short_mapkind, kinds, i);
1370 bool implicit = get_implicit (short_mapkind, kinds, i);
1371 if (hostaddrs[i] == NULL)
1372 continue;
1373 switch (kind & typemask)
1375 size_t align, len, first, last;
1376 splay_tree_key n;
1377 case GOMP_MAP_FIRSTPRIVATE:
1378 align = (size_t) 1 << (kind >> rshift);
1379 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1380 tgt->list[i].offset = tgt_size;
1381 len = sizes[i];
1382 gomp_copy_host2dev (devicep, aq,
1383 (void *) (tgt->tgt_start + tgt_size),
1384 (void *) hostaddrs[i], len, false, cbufp);
1385 /* Save device address in hostaddr to permit latter availablity
1386 when doing a deep-firstprivate with pointer attach. */
1387 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1388 tgt_size += len;
1390 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1391 firstprivate to hostaddrs[i+1], which is assumed to contain a
1392 device address. */
1393 if (i + 1 < mapnum
1394 && (GOMP_MAP_ATTACH
1395 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1397 uintptr_t target = (uintptr_t) hostaddrs[i];
1398 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1399 /* Per
1400 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1401 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1402 this probably needs revision for 'aq' usage. */
1403 assert (!aq);
1404 gomp_copy_host2dev (devicep, aq, devptr, &target,
1405 sizeof (void *), false, cbufp);
1406 ++i;
1408 continue;
1409 case GOMP_MAP_FIRSTPRIVATE_INT:
1410 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1411 continue;
1412 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1413 /* The OpenACC 'host_data' construct only allows 'use_device'
1414 "mapping" clauses, so in the first loop, 'not_found_cnt'
1415 must always have been zero, so all OpenACC 'use_device'
1416 clauses have already been handled. (We can only easily test
1417 'use_device' with 'if_present' clause here.) */
1418 assert (tgt->list[i].offset == OFFSET_INLINED);
1419 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1420 code conceptually simple, similar to the first loop. */
1421 case GOMP_MAP_USE_DEVICE_PTR:
1422 if (tgt->list[i].offset == 0)
1424 cur_node.host_start = (uintptr_t) hostaddrs[i];
1425 cur_node.host_end = cur_node.host_start;
1426 n = gomp_map_lookup (mem_map, &cur_node);
1427 if (n != NULL)
1429 cur_node.host_start -= n->host_start;
1430 hostaddrs[i]
1431 = (void *) (n->tgt->tgt_start + n->tgt_offset
1432 + cur_node.host_start);
1434 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1436 gomp_mutex_unlock (&devicep->lock);
1437 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1439 else if ((kind & typemask)
1440 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1441 /* If not present, continue using the host address. */
1443 else
1444 __builtin_unreachable ();
1445 tgt->list[i].offset = OFFSET_INLINED;
1447 continue;
1448 case GOMP_MAP_STRUCT:
1449 first = i + 1;
1450 last = i + sizes[i];
1451 cur_node.host_start = (uintptr_t) hostaddrs[i];
1452 cur_node.host_end = (uintptr_t) hostaddrs[last]
1453 + sizes[last];
1454 if (tgt->list[first].key != NULL)
1455 continue;
1456 n = splay_tree_lookup (mem_map, &cur_node);
1457 if (n == NULL)
1459 size_t align = (size_t) 1 << (kind >> rshift);
1460 tgt_size -= (uintptr_t) hostaddrs[first]
1461 - (uintptr_t) hostaddrs[i];
1462 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1463 tgt_size += (uintptr_t) hostaddrs[first]
1464 - (uintptr_t) hostaddrs[i];
1465 field_tgt_base = (uintptr_t) hostaddrs[first];
1466 field_tgt_offset = tgt_size;
1467 field_tgt_clear = last;
1468 field_tgt_structelem_first = NULL;
1469 tgt_size += cur_node.host_end
1470 - (uintptr_t) hostaddrs[first];
1471 continue;
1473 for (i = first; i <= last; i++)
1474 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1475 sizes, kinds, cbufp, refcount_set);
1476 i--;
1477 continue;
1478 case GOMP_MAP_ALWAYS_POINTER:
1479 cur_node.host_start = (uintptr_t) hostaddrs[i];
1480 cur_node.host_end = cur_node.host_start + sizeof (void *);
1481 n = splay_tree_lookup (mem_map, &cur_node);
1482 if (n == NULL
1483 || n->host_start > cur_node.host_start
1484 || n->host_end < cur_node.host_end)
1486 gomp_mutex_unlock (&devicep->lock);
1487 gomp_fatal ("always pointer not mapped");
1489 if (i > 0
1490 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1491 != GOMP_MAP_ALWAYS_POINTER))
1492 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1493 if (cur_node.tgt_offset)
1494 cur_node.tgt_offset -= sizes[i];
1495 gomp_copy_host2dev (devicep, aq,
1496 (void *) (n->tgt->tgt_start
1497 + n->tgt_offset
1498 + cur_node.host_start
1499 - n->host_start),
1500 (void *) &cur_node.tgt_offset,
1501 sizeof (void *), true, cbufp);
1502 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1503 + cur_node.host_start - n->host_start;
1504 continue;
1505 case GOMP_MAP_IF_PRESENT:
1506 /* Not present - otherwise handled above. Skip over its
1507 MAP_POINTER as well. */
1508 if (i + 1 < mapnum
1509 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1510 == GOMP_MAP_POINTER))
1511 ++i;
1512 continue;
1513 case GOMP_MAP_ATTACH:
1514 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1516 cur_node.host_start = (uintptr_t) hostaddrs[i];
1517 cur_node.host_end = cur_node.host_start + sizeof (void *);
1518 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1519 if (n != NULL)
1521 tgt->list[i].key = n;
1522 tgt->list[i].offset = cur_node.host_start - n->host_start;
1523 tgt->list[i].length = n->host_end - n->host_start;
1524 tgt->list[i].copy_from = false;
1525 tgt->list[i].always_copy_from = false;
1526 tgt->list[i].is_attach = true;
1527 /* OpenACC 'attach'/'detach' doesn't affect
1528 structured/dynamic reference counts ('n->refcount',
1529 'n->dynamic_refcount'). */
1531 bool zlas
1532 = ((kind & typemask)
1533 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1534 gomp_attach_pointer (devicep, aq, mem_map, n,
1535 (uintptr_t) hostaddrs[i], sizes[i],
1536 cbufp, zlas);
1538 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1540 gomp_mutex_unlock (&devicep->lock);
1541 gomp_fatal ("outer struct not mapped for attach");
1543 continue;
1545 default:
1546 break;
1548 splay_tree_key k = &array->key;
1549 k->host_start = (uintptr_t) hostaddrs[i];
1550 if (!GOMP_MAP_POINTER_P (kind & typemask))
1551 k->host_end = k->host_start + sizes[i];
1552 else
1553 k->host_end = k->host_start + sizeof (void *);
1554 splay_tree_key n = splay_tree_lookup (mem_map, k);
1555 if (n && n->refcount != REFCOUNT_LINK)
1556 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1557 kind & typemask, false, implicit, cbufp,
1558 refcount_set);
1559 else
1561 k->aux = NULL;
1562 if (n && n->refcount == REFCOUNT_LINK)
1564 /* Replace target address of the pointer with target address
1565 of mapped object in the splay tree. */
1566 splay_tree_remove (mem_map, n);
1567 k->aux
1568 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1569 k->aux->link_key = n;
1571 size_t align = (size_t) 1 << (kind >> rshift);
1572 tgt->list[i].key = k;
1573 k->tgt = tgt;
1574 k->refcount = 0;
1575 k->dynamic_refcount = 0;
1576 if (field_tgt_clear != FIELD_TGT_EMPTY)
1578 k->tgt_offset = k->host_start - field_tgt_base
1579 + field_tgt_offset;
1580 if (openmp_p)
1582 k->refcount = REFCOUNT_STRUCTELEM;
1583 if (field_tgt_structelem_first == NULL)
1585 /* Set to first structure element of sequence. */
1586 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1587 field_tgt_structelem_first = k;
1589 else
1590 /* Point to refcount of leading element, but do not
1591 increment again. */
1592 k->structelem_refcount_ptr
1593 = &field_tgt_structelem_first->structelem_refcount;
1595 if (i == field_tgt_clear)
1597 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1598 field_tgt_structelem_first = NULL;
1601 if (i == field_tgt_clear)
1602 field_tgt_clear = FIELD_TGT_EMPTY;
1604 else
1606 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1607 k->tgt_offset = tgt_size;
1608 tgt_size += k->host_end - k->host_start;
1610 /* First increment, from 0 to 1. gomp_increment_refcount
1611 encapsulates the different increment cases, so use this
1612 instead of directly setting 1 during initialization. */
1613 gomp_increment_refcount (k, refcount_set);
1615 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1616 tgt->list[i].always_copy_from
1617 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1618 tgt->list[i].is_attach = false;
1619 tgt->list[i].offset = 0;
1620 tgt->list[i].length = k->host_end - k->host_start;
1621 tgt->refcount++;
1622 array->left = NULL;
1623 array->right = NULL;
1624 splay_tree_insert (mem_map, array);
1625 switch (kind & typemask)
1627 case GOMP_MAP_ALLOC:
1628 case GOMP_MAP_FROM:
1629 case GOMP_MAP_FORCE_ALLOC:
1630 case GOMP_MAP_FORCE_FROM:
1631 case GOMP_MAP_ALWAYS_FROM:
1632 break;
1633 case GOMP_MAP_TO:
1634 case GOMP_MAP_TOFROM:
1635 case GOMP_MAP_FORCE_TO:
1636 case GOMP_MAP_FORCE_TOFROM:
1637 case GOMP_MAP_ALWAYS_TO:
1638 case GOMP_MAP_ALWAYS_TOFROM:
1639 gomp_copy_host2dev (devicep, aq,
1640 (void *) (tgt->tgt_start
1641 + k->tgt_offset),
1642 (void *) k->host_start,
1643 k->host_end - k->host_start,
1644 false, cbufp);
1645 break;
1646 case GOMP_MAP_POINTER:
1647 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1648 gomp_map_pointer
1649 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1650 k->tgt_offset, sizes[i], cbufp,
1651 ((kind & typemask)
1652 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1653 break;
1654 case GOMP_MAP_TO_PSET:
1655 gomp_copy_host2dev (devicep, aq,
1656 (void *) (tgt->tgt_start
1657 + k->tgt_offset),
1658 (void *) k->host_start,
1659 k->host_end - k->host_start,
1660 false, cbufp);
1661 tgt->list[i].has_null_ptr_assoc = false;
1663 for (j = i + 1; j < mapnum; j++)
1665 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1666 & typemask);
1667 if (!GOMP_MAP_POINTER_P (ptr_kind)
1668 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1669 break;
1670 else if ((uintptr_t) hostaddrs[j] < k->host_start
1671 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1672 > k->host_end))
1673 break;
1674 else
1676 tgt->list[j].key = k;
1677 tgt->list[j].copy_from = false;
1678 tgt->list[j].always_copy_from = false;
1679 tgt->list[j].is_attach = false;
1680 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1681 /* For OpenMP, the use of refcount_sets causes
1682 errors if we set k->refcount = 1 above but also
1683 increment it again here, for decrementing will
1684 not properly match, since we decrement only once
1685 for each key's refcount. Therefore avoid this
1686 increment for OpenMP constructs. */
1687 if (!openmp_p)
1688 gomp_increment_refcount (k, refcount_set);
1689 gomp_map_pointer (tgt, aq,
1690 (uintptr_t) *(void **) hostaddrs[j],
1691 k->tgt_offset
1692 + ((uintptr_t) hostaddrs[j]
1693 - k->host_start),
1694 sizes[j], cbufp, false);
1697 i = j - 1;
1698 break;
1699 case GOMP_MAP_FORCE_PRESENT:
1701 /* We already looked up the memory region above and it
1702 was missing. */
1703 size_t size = k->host_end - k->host_start;
1704 gomp_mutex_unlock (&devicep->lock);
1705 #ifdef HAVE_INTTYPES_H
1706 gomp_fatal ("present clause: !acc_is_present (%p, "
1707 "%"PRIu64" (0x%"PRIx64"))",
1708 (void *) k->host_start,
1709 (uint64_t) size, (uint64_t) size);
1710 #else
1711 gomp_fatal ("present clause: !acc_is_present (%p, "
1712 "%lu (0x%lx))", (void *) k->host_start,
1713 (unsigned long) size, (unsigned long) size);
1714 #endif
1716 break;
1717 case GOMP_MAP_FORCE_DEVICEPTR:
1718 assert (k->host_end - k->host_start == sizeof (void *));
1719 gomp_copy_host2dev (devicep, aq,
1720 (void *) (tgt->tgt_start
1721 + k->tgt_offset),
1722 (void *) k->host_start,
1723 sizeof (void *), false, cbufp);
1724 break;
1725 default:
1726 gomp_mutex_unlock (&devicep->lock);
1727 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1728 kind);
1731 if (k->aux && k->aux->link_key)
1733 /* Set link pointer on target to the device address of the
1734 mapped object. */
1735 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1736 /* We intentionally do not use coalescing here, as it's not
1737 data allocated by the current call to this function. */
1738 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1739 &tgt_addr, sizeof (void *), true, NULL);
1741 array++;
1746 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1748 for (i = 0; i < mapnum; i++)
1750 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1751 gomp_copy_host2dev (devicep, aq,
1752 (void *) (tgt->tgt_start + i * sizeof (void *)),
1753 (void *) &cur_node.tgt_offset, sizeof (void *),
1754 true, cbufp);
1758 if (cbufp)
1760 long c = 0;
1761 for (c = 0; c < cbuf.chunk_cnt; ++c)
1762 gomp_copy_host2dev (devicep, aq,
1763 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1764 (char *) cbuf.buf + (cbuf.chunks[c].start
1765 - cbuf.chunks[0].start),
1766 cbuf.chunks[c].end - cbuf.chunks[c].start,
1767 false, NULL);
1768 if (aq)
1769 /* Free once the transfer has completed. */
1770 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1771 else
1772 free (cbuf.buf);
1773 cbuf.buf = NULL;
1774 cbufp = NULL;
1777 /* If the variable from "omp target enter data" map-list was already mapped,
1778 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1779 gomp_exit_data. */
1780 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1782 free (tgt);
1783 tgt = NULL;
1786 gomp_mutex_unlock (&devicep->lock);
1787 return tgt;
1790 static struct target_mem_desc *
1791 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1792 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1793 bool short_mapkind, htab_t *refcount_set,
1794 enum gomp_map_vars_kind pragma_kind)
1796 /* This management of a local refcount_set is for convenience of callers
1797 who do not share a refcount_set over multiple map/unmap uses. */
1798 htab_t local_refcount_set = NULL;
1799 if (refcount_set == NULL)
1801 local_refcount_set = htab_create (mapnum);
1802 refcount_set = &local_refcount_set;
1805 struct target_mem_desc *tgt;
1806 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1807 sizes, kinds, short_mapkind, refcount_set,
1808 pragma_kind);
1809 if (local_refcount_set)
1810 htab_free (local_refcount_set);
1812 return tgt;
1815 attribute_hidden struct target_mem_desc *
1816 goacc_map_vars (struct gomp_device_descr *devicep,
1817 struct goacc_asyncqueue *aq, size_t mapnum,
1818 void **hostaddrs, void **devaddrs, size_t *sizes,
1819 void *kinds, bool short_mapkind,
1820 enum gomp_map_vars_kind pragma_kind)
1822 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1823 sizes, kinds, short_mapkind, NULL,
1824 GOMP_MAP_VARS_OPENACC | pragma_kind);
1827 static void
1828 gomp_unmap_tgt (struct target_mem_desc *tgt)
1830 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1831 if (tgt->tgt_end)
1832 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1834 free (tgt->array);
1835 free (tgt);
1838 static bool
1839 gomp_unref_tgt (void *ptr)
1841 bool is_tgt_unmapped = false;
1843 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1845 if (tgt->refcount > 1)
1846 tgt->refcount--;
1847 else
1849 gomp_unmap_tgt (tgt);
1850 is_tgt_unmapped = true;
1853 return is_tgt_unmapped;
1856 static void
1857 gomp_unref_tgt_void (void *ptr)
1859 (void) gomp_unref_tgt (ptr);
1862 static void
1863 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1865 splay_tree_remove (sp, k);
1866 if (k->aux)
1868 if (k->aux->link_key)
1869 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1870 if (k->aux->attach_count)
1871 free (k->aux->attach_count);
1872 free (k->aux);
1873 k->aux = NULL;
1877 static inline __attribute__((always_inline)) bool
1878 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1879 struct goacc_asyncqueue *aq)
1881 bool is_tgt_unmapped = false;
1883 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1885 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1886 /* Infer the splay_tree_key of the first structelem key using the
1887 pointer to the first structleme_refcount. */
1888 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1889 - offsetof (struct splay_tree_key_s,
1890 structelem_refcount));
1891 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1893 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1894 with the splay_tree_keys embedded inside. */
1895 splay_tree_node node =
1896 (splay_tree_node) ((char *) k
1897 - offsetof (struct splay_tree_node_s, key));
1898 while (true)
1900 /* Starting from the _FIRST key, and continue for all following
1901 sibling keys. */
1902 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1903 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1904 break;
1905 else
1906 k = &(++node)->key;
1909 else
1910 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1912 if (aq)
1913 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1914 (void *) k->tgt);
1915 else
1916 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1917 return is_tgt_unmapped;
1920 attribute_hidden bool
1921 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1923 return gomp_remove_var_internal (devicep, k, NULL);
1926 /* Remove a variable asynchronously. This actually removes the variable
1927 mapping immediately, but retains the linked target_mem_desc until the
1928 asynchronous operation has completed (as it may still refer to target
1929 memory). The device lock must be held before entry, and remains locked on
1930 exit. */
1932 attribute_hidden void
1933 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1934 struct goacc_asyncqueue *aq)
1936 (void) gomp_remove_var_internal (devicep, k, aq);
1939 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1940 variables back from device to host: if it is false, it is assumed that this
1941 has been done already. */
1943 static inline __attribute__((always_inline)) void
1944 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1945 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1947 struct gomp_device_descr *devicep = tgt->device_descr;
1949 if (tgt->list_count == 0)
1951 free (tgt);
1952 return;
1955 gomp_mutex_lock (&devicep->lock);
1956 if (devicep->state == GOMP_DEVICE_FINALIZED)
1958 gomp_mutex_unlock (&devicep->lock);
1959 free (tgt->array);
1960 free (tgt);
1961 return;
1964 size_t i;
1966 /* We must perform detachments before any copies back to the host. */
1967 for (i = 0; i < tgt->list_count; i++)
1969 splay_tree_key k = tgt->list[i].key;
1971 if (k != NULL && tgt->list[i].is_attach)
1972 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1973 + tgt->list[i].offset,
1974 false, NULL);
1977 for (i = 0; i < tgt->list_count; i++)
1979 splay_tree_key k = tgt->list[i].key;
1980 if (k == NULL)
1981 continue;
1983 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1984 counts ('n->refcount', 'n->dynamic_refcount'). */
1985 if (tgt->list[i].is_attach)
1986 continue;
1988 bool do_copy, do_remove;
1989 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1991 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1992 || tgt->list[i].always_copy_from)
1993 gomp_copy_dev2host (devicep, aq,
1994 (void *) (k->host_start + tgt->list[i].offset),
1995 (void *) (k->tgt->tgt_start + k->tgt_offset
1996 + tgt->list[i].offset),
1997 tgt->list[i].length);
1998 if (do_remove)
2000 struct target_mem_desc *k_tgt = k->tgt;
2001 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2002 /* It would be bad if TGT got unmapped while we're still iterating
2003 over its LIST_COUNT, and also expect to use it in the following
2004 code. */
2005 assert (!is_tgt_unmapped
2006 || k_tgt != tgt);
2010 if (aq)
2011 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2012 (void *) tgt);
2013 else
2014 gomp_unref_tgt ((void *) tgt);
2016 gomp_mutex_unlock (&devicep->lock);
2019 static void
2020 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2021 htab_t *refcount_set)
2023 /* This management of a local refcount_set is for convenience of callers
2024 who do not share a refcount_set over multiple map/unmap uses. */
2025 htab_t local_refcount_set = NULL;
2026 if (refcount_set == NULL)
2028 local_refcount_set = htab_create (tgt->list_count);
2029 refcount_set = &local_refcount_set;
2032 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2034 if (local_refcount_set)
2035 htab_free (local_refcount_set);
2038 attribute_hidden void
2039 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2040 struct goacc_asyncqueue *aq)
2042 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2045 static void
2046 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2047 size_t *sizes, void *kinds, bool short_mapkind)
2049 size_t i;
2050 struct splay_tree_key_s cur_node;
2051 const int typemask = short_mapkind ? 0xff : 0x7;
2053 if (!devicep)
2054 return;
2056 if (mapnum == 0)
2057 return;
2059 gomp_mutex_lock (&devicep->lock);
2060 if (devicep->state == GOMP_DEVICE_FINALIZED)
2062 gomp_mutex_unlock (&devicep->lock);
2063 return;
2066 for (i = 0; i < mapnum; i++)
2067 if (sizes[i])
2069 cur_node.host_start = (uintptr_t) hostaddrs[i];
2070 cur_node.host_end = cur_node.host_start + sizes[i];
2071 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2072 if (n)
2074 int kind = get_kind (short_mapkind, kinds, i);
2075 if (n->host_start > cur_node.host_start
2076 || n->host_end < cur_node.host_end)
2078 gomp_mutex_unlock (&devicep->lock);
2079 gomp_fatal ("Trying to update [%p..%p) object when "
2080 "only [%p..%p) is mapped",
2081 (void *) cur_node.host_start,
2082 (void *) cur_node.host_end,
2083 (void *) n->host_start,
2084 (void *) n->host_end);
2087 if (n->aux && n->aux->attach_count)
2089 uintptr_t addr = cur_node.host_start;
2090 while (addr < cur_node.host_end)
2092 /* We have to be careful not to overwrite still attached
2093 pointers during host<->device updates. */
2094 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2095 if (n->aux->attach_count[i] == 0)
2097 void *devaddr = (void *) (n->tgt->tgt_start
2098 + n->tgt_offset
2099 + addr - n->host_start);
2100 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2101 gomp_copy_host2dev (devicep, NULL,
2102 devaddr, (void *) addr,
2103 sizeof (void *), false, NULL);
2104 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2105 gomp_copy_dev2host (devicep, NULL,
2106 (void *) addr, devaddr,
2107 sizeof (void *));
2109 addr += sizeof (void *);
2112 else
2114 void *hostaddr = (void *) cur_node.host_start;
2115 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2116 + cur_node.host_start
2117 - n->host_start);
2118 size_t size = cur_node.host_end - cur_node.host_start;
2120 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2121 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2122 false, NULL);
2123 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2124 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2128 gomp_mutex_unlock (&devicep->lock);
2131 static struct gomp_offload_icv_list *
2132 gomp_get_offload_icv_item (int dev_num)
2134 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2135 while (l != NULL && l->device_num != dev_num)
2136 l = l->next;
2138 return l;
2141 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2142 depending on the device num and the variable hierarchy
2143 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2144 device and thus no item with that device number is contained in
2145 gomp_offload_icv_list, then a new item is created and added to the list. */
2147 static struct gomp_offload_icvs *
2148 get_gomp_offload_icvs (int dev_num)
2150 struct gomp_icv_list *dev
2151 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2152 struct gomp_icv_list *all
2153 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2154 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2155 struct gomp_offload_icv_list *offload_icvs
2156 = gomp_get_offload_icv_item (dev_num);
2158 if (offload_icvs != NULL)
2159 return &offload_icvs->icvs;
2161 struct gomp_offload_icv_list *new
2162 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2164 new->device_num = dev_num;
2165 new->icvs.device_num = dev_num;
2166 new->next = gomp_offload_icv_list;
2168 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2169 new->icvs.nteams = dev_x->icvs.nteams_var;
2170 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2171 new->icvs.nteams = dev->icvs.nteams_var;
2172 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2173 new->icvs.nteams = all->icvs.nteams_var;
2174 else
2175 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2177 if (dev_x != NULL
2178 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2179 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2180 else if (dev != NULL
2181 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2182 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2183 else if (all != NULL
2184 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2185 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2186 else
2187 new->icvs.teams_thread_limit
2188 = gomp_default_icv_values.teams_thread_limit_var;
2190 if (dev_x != NULL
2191 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2192 new->icvs.default_device = dev_x->icvs.default_device_var;
2193 else if (dev != NULL
2194 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2195 new->icvs.default_device = dev->icvs.default_device_var;
2196 else if (all != NULL
2197 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2198 new->icvs.default_device = all->icvs.default_device_var;
2199 else
2200 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2202 gomp_offload_icv_list = new;
2203 return &new->icvs;
2206 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2207 And insert to splay tree the mapping between addresses from HOST_TABLE and
2208 from loaded target image. We rely in the host and device compiler
2209 emitting variable and functions in the same order. */
2211 static void
2212 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2213 const void *host_table, const void *target_data,
2214 bool is_register_lock)
2216 void **host_func_table = ((void ***) host_table)[0];
2217 void **host_funcs_end = ((void ***) host_table)[1];
2218 void **host_var_table = ((void ***) host_table)[2];
2219 void **host_vars_end = ((void ***) host_table)[3];
2221 /* The func table contains only addresses, the var table contains addresses
2222 and corresponding sizes. */
2223 int num_funcs = host_funcs_end - host_func_table;
2224 int num_vars = (host_vars_end - host_var_table) / 2;
2226 /* Load image to device and get target addresses for the image. */
2227 struct addr_pair *target_table = NULL;
2228 uint64_t *rev_target_fn_table = NULL;
2229 int i, num_target_entries;
2231 /* With reverse offload, insert also target-host addresses. */
2232 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2234 num_target_entries
2235 = devicep->load_image_func (devicep->target_id, version,
2236 target_data, &target_table,
2237 rev_lookup ? &rev_target_fn_table : NULL);
2239 if (num_target_entries != num_funcs + num_vars
2240 /* "+1" due to the additional ICV struct. */
2241 && num_target_entries != num_funcs + num_vars + 1)
2243 gomp_mutex_unlock (&devicep->lock);
2244 if (is_register_lock)
2245 gomp_mutex_unlock (&register_lock);
2246 gomp_fatal ("Cannot map target functions or variables"
2247 " (expected %u, have %u)", num_funcs + num_vars,
2248 num_target_entries);
2251 /* Insert host-target address mapping into splay tree. */
2252 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2253 /* "+1" due to the additional ICV struct. */
2254 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2255 * sizeof (*tgt->array));
2256 if (rev_target_fn_table)
2257 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2258 else
2259 tgt->rev_array = NULL;
2260 tgt->refcount = REFCOUNT_INFINITY;
2261 tgt->tgt_start = 0;
2262 tgt->tgt_end = 0;
2263 tgt->to_free = NULL;
2264 tgt->prev = NULL;
2265 tgt->list_count = 0;
2266 tgt->device_descr = devicep;
2267 splay_tree_node array = tgt->array;
2268 reverse_splay_tree_node rev_array = tgt->rev_array;
2270 for (i = 0; i < num_funcs; i++)
2272 splay_tree_key k = &array->key;
2273 k->host_start = (uintptr_t) host_func_table[i];
2274 k->host_end = k->host_start + 1;
2275 k->tgt = tgt;
2276 k->tgt_offset = target_table[i].start;
2277 k->refcount = REFCOUNT_INFINITY;
2278 k->dynamic_refcount = 0;
2279 k->aux = NULL;
2280 array->left = NULL;
2281 array->right = NULL;
2282 splay_tree_insert (&devicep->mem_map, array);
2283 if (rev_target_fn_table)
2285 reverse_splay_tree_key k2 = &rev_array->key;
2286 k2->dev = rev_target_fn_table[i];
2287 k2->k = k;
2288 rev_array->left = NULL;
2289 rev_array->right = NULL;
2290 if (k2->dev != 0)
2291 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2292 rev_array++;
2294 array++;
2297 /* Most significant bit of the size in host and target tables marks
2298 "omp declare target link" variables. */
2299 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2300 const uintptr_t size_mask = ~link_bit;
2302 for (i = 0; i < num_vars; i++)
2304 struct addr_pair *target_var = &target_table[num_funcs + i];
2305 uintptr_t target_size = target_var->end - target_var->start;
2306 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2308 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2310 gomp_mutex_unlock (&devicep->lock);
2311 if (is_register_lock)
2312 gomp_mutex_unlock (&register_lock);
2313 gomp_fatal ("Cannot map target variables (size mismatch)");
2316 splay_tree_key k = &array->key;
2317 k->host_start = (uintptr_t) host_var_table[i * 2];
2318 k->host_end
2319 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2320 k->tgt = tgt;
2321 k->tgt_offset = target_var->start;
2322 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2323 k->dynamic_refcount = 0;
2324 k->aux = NULL;
2325 array->left = NULL;
2326 array->right = NULL;
2327 splay_tree_insert (&devicep->mem_map, array);
2328 array++;
2331 /* Last entry is for a ICVs variable.
2332 Tolerate case where plugin does not return those entries. */
2333 if (num_funcs + num_vars < num_target_entries)
2335 struct addr_pair *var = &target_table[num_funcs + num_vars];
2337 /* Start address will be non-zero for the ICVs variable if
2338 the variable was found in this image. */
2339 if (var->start != 0)
2341 /* The index of the devicep within devices[] is regarded as its
2342 'device number', which is different from the per-device type
2343 devicep->target_id. */
2344 int dev_num = (int) (devicep - &devices[0]);
2345 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2346 size_t var_size = var->end - var->start;
2347 if (var_size != sizeof (struct gomp_offload_icvs))
2349 gomp_mutex_unlock (&devicep->lock);
2350 if (is_register_lock)
2351 gomp_mutex_unlock (&register_lock);
2352 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2353 "format");
2355 /* Copy the ICVs variable to place on device memory, hereby
2356 actually designating its device number into effect. */
2357 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2358 var_size, false, NULL);
2359 splay_tree_key k = &array->key;
2360 k->host_start = (uintptr_t) icvs;
2361 k->host_end =
2362 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2363 k->tgt = tgt;
2364 k->tgt_offset = var->start;
2365 k->refcount = REFCOUNT_INFINITY;
2366 k->dynamic_refcount = 0;
2367 k->aux = NULL;
2368 array->left = NULL;
2369 array->right = NULL;
2370 splay_tree_insert (&devicep->mem_map, array);
2371 array++;
2375 free (target_table);
2378 /* Unload the mappings described by target_data from device DEVICE_P.
2379 The device must be locked. */
2381 static void
2382 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2383 unsigned version,
2384 const void *host_table, const void *target_data)
2386 void **host_func_table = ((void ***) host_table)[0];
2387 void **host_funcs_end = ((void ***) host_table)[1];
2388 void **host_var_table = ((void ***) host_table)[2];
2389 void **host_vars_end = ((void ***) host_table)[3];
2391 /* The func table contains only addresses, the var table contains addresses
2392 and corresponding sizes. */
2393 int num_funcs = host_funcs_end - host_func_table;
2394 int num_vars = (host_vars_end - host_var_table) / 2;
2396 struct splay_tree_key_s k;
2397 splay_tree_key node = NULL;
2399 /* Find mapping at start of node array */
2400 if (num_funcs || num_vars)
2402 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2403 : (uintptr_t) host_var_table[0]);
2404 k.host_end = k.host_start + 1;
2405 node = splay_tree_lookup (&devicep->mem_map, &k);
2408 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2410 gomp_mutex_unlock (&devicep->lock);
2411 gomp_fatal ("image unload fail");
2413 if (devicep->mem_map_rev.root)
2415 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2416 real allocation. */
2417 assert (node && node->tgt && node->tgt->rev_array);
2418 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2419 free (node->tgt->rev_array);
2420 devicep->mem_map_rev.root = NULL;
2423 /* Remove mappings from splay tree. */
2424 int i;
2425 for (i = 0; i < num_funcs; i++)
2427 k.host_start = (uintptr_t) host_func_table[i];
2428 k.host_end = k.host_start + 1;
2429 splay_tree_remove (&devicep->mem_map, &k);
2432 /* Most significant bit of the size in host and target tables marks
2433 "omp declare target link" variables. */
2434 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2435 const uintptr_t size_mask = ~link_bit;
2436 bool is_tgt_unmapped = false;
2438 for (i = 0; i < num_vars; i++)
2440 k.host_start = (uintptr_t) host_var_table[i * 2];
2441 k.host_end
2442 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2444 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2445 splay_tree_remove (&devicep->mem_map, &k);
2446 else
2448 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2449 is_tgt_unmapped = gomp_remove_var (devicep, n);
2453 if (node && !is_tgt_unmapped)
2455 free (node->tgt);
2456 free (node);
2460 static void
2461 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2463 char *end = buf + size, *p = buf;
2464 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2465 p += snprintf (p, end - p, "unified_address");
2466 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2467 p += snprintf (p, end - p, "%sunified_shared_memory",
2468 (p == buf ? "" : ", "));
2469 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2470 p += snprintf (p, end - p, "%sreverse_offload",
2471 (p == buf ? "" : ", "));
2474 /* This function should be called from every offload image while loading.
2475 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2476 the target, and DATA. */
2478 void
2479 GOMP_offload_register_ver (unsigned version, const void *host_table,
2480 int target_type, const void *data)
2482 int i;
2484 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2485 gomp_fatal ("Library too old for offload (version %u < %u)",
2486 GOMP_VERSION, GOMP_VERSION_LIB (version));
2488 int omp_req;
2489 const void *target_data;
2490 if (GOMP_VERSION_LIB (version) > 1)
2492 omp_req = (int) (size_t) ((void **) data)[0];
2493 target_data = &((void **) data)[1];
2495 else
2497 omp_req = 0;
2498 target_data = data;
2501 gomp_mutex_lock (&register_lock);
2503 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2505 char buf1[sizeof ("unified_address, unified_shared_memory, "
2506 "reverse_offload")];
2507 char buf2[sizeof ("unified_address, unified_shared_memory, "
2508 "reverse_offload")];
2509 gomp_requires_to_name (buf2, sizeof (buf2),
2510 omp_req != GOMP_REQUIRES_TARGET_USED
2511 ? omp_req : omp_requires_mask);
2512 if (omp_req != GOMP_REQUIRES_TARGET_USED
2513 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2515 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2516 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2517 "in multiple compilation units: '%s' vs. '%s'",
2518 buf1, buf2);
2520 else
2521 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2522 "some compilation units", buf2);
2524 omp_requires_mask = omp_req;
2526 /* Load image to all initialized devices. */
2527 for (i = 0; i < num_devices; i++)
2529 struct gomp_device_descr *devicep = &devices[i];
2530 gomp_mutex_lock (&devicep->lock);
2531 if (devicep->type == target_type
2532 && devicep->state == GOMP_DEVICE_INITIALIZED)
2533 gomp_load_image_to_device (devicep, version,
2534 host_table, target_data, true);
2535 gomp_mutex_unlock (&devicep->lock);
2538 /* Insert image to array of pending images. */
2539 offload_images
2540 = gomp_realloc_unlock (offload_images,
2541 (num_offload_images + 1)
2542 * sizeof (struct offload_image_descr));
2543 offload_images[num_offload_images].version = version;
2544 offload_images[num_offload_images].type = target_type;
2545 offload_images[num_offload_images].host_table = host_table;
2546 offload_images[num_offload_images].target_data = target_data;
2548 num_offload_images++;
2549 gomp_mutex_unlock (&register_lock);
2552 /* Legacy entry point. */
2554 void
2555 GOMP_offload_register (const void *host_table, int target_type,
2556 const void *target_data)
2558 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2561 /* This function should be called from every offload image while unloading.
2562 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2563 the target, and DATA. */
2565 void
2566 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2567 int target_type, const void *data)
2569 int i;
2571 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2572 gomp_fatal ("Library too old for offload (version %u < %u)",
2573 GOMP_VERSION, GOMP_VERSION_LIB (version));
2575 const void *target_data;
2576 if (GOMP_VERSION_LIB (version) > 1)
2577 target_data = &((void **) data)[1];
2578 else
2579 target_data = data;
2581 gomp_mutex_lock (&register_lock);
2583 /* Unload image from all initialized devices. */
2584 for (i = 0; i < num_devices; i++)
2586 struct gomp_device_descr *devicep = &devices[i];
2587 gomp_mutex_lock (&devicep->lock);
2588 if (devicep->type == target_type
2589 && devicep->state == GOMP_DEVICE_INITIALIZED)
2590 gomp_unload_image_from_device (devicep, version,
2591 host_table, target_data);
2592 gomp_mutex_unlock (&devicep->lock);
2595 /* Remove image from array of pending images. */
2596 for (i = 0; i < num_offload_images; i++)
2597 if (offload_images[i].target_data == target_data)
2599 offload_images[i] = offload_images[--num_offload_images];
2600 break;
2603 gomp_mutex_unlock (&register_lock);
2606 /* Legacy entry point. */
2608 void
2609 GOMP_offload_unregister (const void *host_table, int target_type,
2610 const void *target_data)
2612 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2615 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2616 must be locked on entry, and remains locked on return. */
2618 attribute_hidden void
2619 gomp_init_device (struct gomp_device_descr *devicep)
2621 int i;
2622 if (!devicep->init_device_func (devicep->target_id))
2624 gomp_mutex_unlock (&devicep->lock);
2625 gomp_fatal ("device initialization failed");
2628 /* Load to device all images registered by the moment. */
2629 for (i = 0; i < num_offload_images; i++)
2631 struct offload_image_descr *image = &offload_images[i];
2632 if (image->type == devicep->type)
2633 gomp_load_image_to_device (devicep, image->version,
2634 image->host_table, image->target_data,
2635 false);
2638 /* Initialize OpenACC asynchronous queues. */
2639 goacc_init_asyncqueues (devicep);
2641 devicep->state = GOMP_DEVICE_INITIALIZED;
2644 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2645 must be locked on entry, and remains locked on return. */
2647 attribute_hidden bool
2648 gomp_fini_device (struct gomp_device_descr *devicep)
2650 bool ret = goacc_fini_asyncqueues (devicep);
2651 ret &= devicep->fini_device_func (devicep->target_id);
2652 devicep->state = GOMP_DEVICE_FINALIZED;
2653 return ret;
2656 attribute_hidden void
2657 gomp_unload_device (struct gomp_device_descr *devicep)
2659 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2661 unsigned i;
2663 /* Unload from device all images registered at the moment. */
2664 for (i = 0; i < num_offload_images; i++)
2666 struct offload_image_descr *image = &offload_images[i];
2667 if (image->type == devicep->type)
2668 gomp_unload_image_from_device (devicep, image->version,
2669 image->host_table,
2670 image->target_data);
2675 /* Host fallback for GOMP_target{,_ext} routines. */
2677 static void
2678 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2679 struct gomp_device_descr *devicep, void **args)
2681 struct gomp_thread old_thr, *thr = gomp_thread ();
2683 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2684 && devicep != NULL)
2685 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2686 "be used for offloading");
2688 old_thr = *thr;
2689 memset (thr, '\0', sizeof (*thr));
2690 if (gomp_places_list)
2692 thr->place = old_thr.place;
2693 thr->ts.place_partition_len = gomp_places_list_len;
2695 if (args)
2696 while (*args)
2698 intptr_t id = (intptr_t) *args++, val;
2699 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2700 val = (intptr_t) *args++;
2701 else
2702 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2703 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2704 continue;
2705 id &= GOMP_TARGET_ARG_ID_MASK;
2706 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2707 continue;
2708 val = val > INT_MAX ? INT_MAX : val;
2709 if (val)
2710 gomp_icv (true)->thread_limit_var = val;
2711 break;
2714 fn (hostaddrs);
2715 gomp_free_thread (thr);
2716 *thr = old_thr;
2719 /* Calculate alignment and size requirements of a private copy of data shared
2720 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2722 static inline void
2723 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2724 unsigned short *kinds, size_t *tgt_align,
2725 size_t *tgt_size)
2727 size_t i;
2728 for (i = 0; i < mapnum; i++)
2729 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2731 size_t align = (size_t) 1 << (kinds[i] >> 8);
2732 if (*tgt_align < align)
2733 *tgt_align = align;
2734 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2735 *tgt_size += sizes[i];
2739 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2741 static inline void
2742 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2743 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2744 size_t tgt_size)
2746 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2747 if (al)
2748 tgt += tgt_align - al;
2749 tgt_size = 0;
2750 size_t i;
2751 for (i = 0; i < mapnum; i++)
2752 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2754 size_t align = (size_t) 1 << (kinds[i] >> 8);
2755 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2756 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2757 hostaddrs[i] = tgt + tgt_size;
2758 tgt_size = tgt_size + sizes[i];
2759 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2761 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2762 ++i;
2767 /* Helper function of GOMP_target{,_ext} routines. */
2769 static void *
2770 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2771 void (*host_fn) (void *))
2773 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2774 return (void *) host_fn;
2775 else
2777 gomp_mutex_lock (&devicep->lock);
2778 if (devicep->state == GOMP_DEVICE_FINALIZED)
2780 gomp_mutex_unlock (&devicep->lock);
2781 return NULL;
2784 struct splay_tree_key_s k;
2785 k.host_start = (uintptr_t) host_fn;
2786 k.host_end = k.host_start + 1;
2787 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2788 gomp_mutex_unlock (&devicep->lock);
2789 if (tgt_fn == NULL)
2790 return NULL;
2792 return (void *) tgt_fn->tgt_offset;
2796 /* Called when encountering a target directive. If DEVICE
2797 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2798 GOMP_DEVICE_HOST_FALLBACK (or any value
2799 larger than last available hw device), use host fallback.
2800 FN is address of host code, UNUSED is part of the current ABI, but
2801 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2802 with MAPNUM entries, with addresses of the host objects,
2803 sizes of the host objects (resp. for pointer kind pointer bias
2804 and assumed sizeof (void *) size) and kinds. */
2806 void
2807 GOMP_target (int device, void (*fn) (void *), const void *unused,
2808 size_t mapnum, void **hostaddrs, size_t *sizes,
2809 unsigned char *kinds)
2811 struct gomp_device_descr *devicep = resolve_device (device, true);
2813 void *fn_addr;
2814 if (devicep == NULL
2815 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816 /* All shared memory devices should use the GOMP_target_ext function. */
2817 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2818 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2819 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2821 htab_t refcount_set = htab_create (mapnum);
2822 struct target_mem_desc *tgt_vars
2823 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2824 &refcount_set, GOMP_MAP_VARS_TARGET);
2825 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2826 NULL);
2827 htab_clear (refcount_set);
2828 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2829 htab_free (refcount_set);
2832 static inline unsigned int
2833 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2835 /* If we cannot run asynchronously, simply ignore nowait. */
2836 if (devicep != NULL && devicep->async_run_func == NULL)
2837 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2839 return flags;
2842 static void
2843 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2845 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2846 if (item == NULL)
2847 return;
2849 void *host_ptr = &item->icvs;
2850 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2851 if (dev_ptr != NULL)
2852 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2853 sizeof (struct gomp_offload_icvs));
2856 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2857 and several arguments have been added:
2858 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2859 DEPEND is array of dependencies, see GOMP_task for details.
2861 ARGS is a pointer to an array consisting of a variable number of both
2862 device-independent and device-specific arguments, which can take one two
2863 elements where the first specifies for which device it is intended, the type
2864 and optionally also the value. If the value is not present in the first
2865 one, the whole second element the actual value. The last element of the
2866 array is a single NULL. Among the device independent can be for example
2867 NUM_TEAMS and THREAD_LIMIT.
2869 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2870 that value, or 1 if teams construct is not present, or 0, if
2871 teams construct does not have num_teams clause and so the choice is
2872 implementation defined, and -1 if it can't be determined on the host
2873 what value will GOMP_teams have on the device.
2874 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2875 body with that value, or 0, if teams construct does not have thread_limit
2876 clause or the teams construct is not present, or -1 if it can't be
2877 determined on the host what value will GOMP_teams have on the device. */
2879 void
2880 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2881 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2882 unsigned int flags, void **depend, void **args)
2884 struct gomp_device_descr *devicep = resolve_device (device, true);
2885 size_t tgt_align = 0, tgt_size = 0;
2886 bool fpc_done = false;
2888 /* Obtain the original TEAMS and THREADS values from ARGS. */
2889 intptr_t orig_teams = 1, orig_threads = 0;
2890 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2891 void **tmpargs = args;
2892 while (*tmpargs)
2894 intptr_t id = (intptr_t) *tmpargs++, val;
2895 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2897 val = (intptr_t) *tmpargs++;
2898 len = 2;
2900 else
2902 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2903 len = 1;
2905 num_args += len;
2906 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2907 continue;
2908 val = val > INT_MAX ? INT_MAX : val;
2909 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2911 orig_teams = val;
2912 teams_len = len;
2914 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2916 orig_threads = val;
2917 threads_len = len;
2921 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2922 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2923 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2924 value could not be determined. No change.
2925 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2926 Set device-specific value.
2927 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2928 No change. */
2929 if (orig_teams == -2)
2930 new_teams = 1;
2931 else if (orig_teams == 0)
2933 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2934 if (item != NULL)
2935 new_teams = item->icvs.nteams;
2937 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2938 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2939 e.g. a THREAD_LIMIT clause. */
2940 if (orig_teams > -2 && orig_threads == 0)
2942 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2943 if (item != NULL)
2944 new_threads = item->icvs.teams_thread_limit;
2947 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2948 updated. */
2949 void **new_args = args;
2950 if (orig_teams != new_teams || orig_threads != new_threads)
2952 size_t tms_len = (orig_teams == new_teams
2953 ? teams_len
2954 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
2955 ? 1 : 2));
2956 size_t ths_len = (orig_threads == new_threads
2957 ? threads_len
2958 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
2959 ? 1 : 2));
2960 /* One additional item after the last arg must be NULL. */
2961 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
2962 + ths_len + 1;
2963 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
2965 tmpargs = args;
2966 void **tmp_new_args = new_args;
2967 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2968 too if they have not been changed and skipped otherwise. */
2969 while (*tmpargs)
2971 intptr_t id = (intptr_t) *tmpargs;
2972 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
2973 && orig_teams != new_teams)
2974 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
2975 && orig_threads != new_threads))
2977 tmpargs++;
2978 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2979 tmpargs++;
2981 else
2983 *tmp_new_args++ = *tmpargs++;
2984 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2985 *tmp_new_args++ = *tmpargs++;
2989 /* Add the new TEAMS arg to the new args list if it has been changed. */
2990 if (orig_teams != new_teams)
2992 intptr_t new_val = new_teams;
2993 if (tms_len == 1)
2995 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
2996 | GOMP_TARGET_ARG_NUM_TEAMS;
2997 *tmp_new_args++ = (void *) new_val;
2999 else
3001 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3002 | GOMP_TARGET_ARG_NUM_TEAMS);
3003 *tmp_new_args++ = (void *) new_val;
3007 /* Add the new THREADS arg to the new args list if it has been changed. */
3008 if (orig_threads != new_threads)
3010 intptr_t new_val = new_threads;
3011 if (ths_len == 1)
3013 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3014 | GOMP_TARGET_ARG_THREAD_LIMIT;
3015 *tmp_new_args++ = (void *) new_val;
3017 else
3019 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3020 | GOMP_TARGET_ARG_THREAD_LIMIT);
3021 *tmp_new_args++ = (void *) new_val;
3025 *tmp_new_args = NULL;
3028 flags = clear_unsupported_flags (devicep, flags);
3030 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3032 struct gomp_thread *thr = gomp_thread ();
3033 /* Create a team if we don't have any around, as nowait
3034 target tasks make sense to run asynchronously even when
3035 outside of any parallel. */
3036 if (__builtin_expect (thr->ts.team == NULL, 0))
3038 struct gomp_team *team = gomp_new_team (1);
3039 struct gomp_task *task = thr->task;
3040 struct gomp_task **implicit_task = &task;
3041 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3042 team->prev_ts = thr->ts;
3043 thr->ts.team = team;
3044 thr->ts.team_id = 0;
3045 thr->ts.work_share = &team->work_shares[0];
3046 thr->ts.last_work_share = NULL;
3047 #ifdef HAVE_SYNC_BUILTINS
3048 thr->ts.single_count = 0;
3049 #endif
3050 thr->ts.static_trip = 0;
3051 thr->task = &team->implicit_task[0];
3052 gomp_init_task (thr->task, NULL, icv);
3053 while (*implicit_task
3054 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3055 implicit_task = &(*implicit_task)->parent;
3056 if (*implicit_task)
3058 thr->task = *implicit_task;
3059 gomp_end_task ();
3060 free (*implicit_task);
3061 thr->task = &team->implicit_task[0];
3063 else
3064 pthread_setspecific (gomp_thread_destructor, thr);
3065 if (implicit_task != &task)
3067 *implicit_task = thr->task;
3068 thr->task = task;
3071 if (thr->ts.team
3072 && !thr->task->final_task)
3074 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3075 sizes, kinds, flags, depend, new_args,
3076 GOMP_TARGET_TASK_BEFORE_MAP);
3077 return;
3081 /* If there are depend clauses, but nowait is not present
3082 (or we are in a final task), block the parent task until the
3083 dependencies are resolved and then just continue with the rest
3084 of the function as if it is a merged task. */
3085 if (depend != NULL)
3087 struct gomp_thread *thr = gomp_thread ();
3088 if (thr->task && thr->task->depend_hash)
3090 /* If we might need to wait, copy firstprivate now. */
3091 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3092 &tgt_align, &tgt_size);
3093 if (tgt_align)
3095 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3096 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3097 tgt_align, tgt_size);
3099 fpc_done = true;
3100 gomp_task_maybe_wait_for_dependencies (depend);
3104 void *fn_addr;
3105 if (devicep == NULL
3106 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3107 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3108 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3110 if (!fpc_done)
3112 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3113 &tgt_align, &tgt_size);
3114 if (tgt_align)
3116 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3117 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3118 tgt_align, tgt_size);
3121 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3122 return;
3125 struct target_mem_desc *tgt_vars;
3126 htab_t refcount_set = NULL;
3128 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3130 if (!fpc_done)
3132 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3133 &tgt_align, &tgt_size);
3134 if (tgt_align)
3136 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3137 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3138 tgt_align, tgt_size);
3141 tgt_vars = NULL;
3143 else
3145 refcount_set = htab_create (mapnum);
3146 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3147 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3149 devicep->run_func (devicep->target_id, fn_addr,
3150 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3151 new_args);
3152 if (tgt_vars)
3154 htab_clear (refcount_set);
3155 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3157 if (refcount_set)
3158 htab_free (refcount_set);
3160 /* Copy back ICVs from device to host.
3161 HOST_PTR is expected to exist since it was added in
3162 gomp_load_image_to_device if not already available. */
3163 gomp_copy_back_icvs (devicep, device);
3168 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3169 keeping track of all variable handling - assuming that reverse offload occurs
3170 ony very rarely. Downside is that the reverse search is slow. */
3172 struct gomp_splay_tree_rev_lookup_data {
3173 uintptr_t tgt_start;
3174 uintptr_t tgt_end;
3175 splay_tree_key key;
3178 static int
3179 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3181 struct gomp_splay_tree_rev_lookup_data *data;
3182 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3183 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3185 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3186 return 0;
3188 size_t j;
3189 for (j = 0; j < key->tgt->list_count; j++)
3190 if (key->tgt->list[j].key == key)
3191 break;
3192 assert (j < key->tgt->list_count);
3193 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3195 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3196 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3198 data->key = key;
3199 return 1;
3201 return 0;
3204 static inline splay_tree_key
3205 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3206 bool zero_len)
3208 struct gomp_splay_tree_rev_lookup_data data;
3209 data.key = NULL;
3210 data.tgt_start = tgt_start;
3211 data.tgt_end = tgt_end;
3213 if (tgt_start != tgt_end)
3215 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3216 return data.key;
3219 data.tgt_end++;
3220 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3221 if (data.key != NULL || zero_len)
3222 return data.key;
3223 data.tgt_end--;
3225 data.tgt_start--;
3226 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3227 return data.key;
3230 struct cpy_data
3232 uint64_t devaddr;
3233 bool present, aligned;
3237 /* Search just mapped reverse-offload data; returns index if found,
3238 otherwise >= n. */
3240 static inline int
3241 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3242 unsigned short *kinds, uint64_t *sizes, size_t n,
3243 uint64_t tgt_start, uint64_t tgt_end)
3245 const bool short_mapkind = true;
3246 const int typemask = short_mapkind ? 0xff : 0x7;
3247 size_t i;
3248 for (i = 0; i < n; i++)
3250 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3251 == GOMP_MAP_STRUCT);
3252 uint64_t dev_end;
3253 if (!is_struct)
3254 dev_end = d[i].devaddr + sizes[i];
3255 else
3257 if (i + sizes[i] < n)
3258 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3259 else
3260 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3262 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3263 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3264 break;
3265 if (is_struct)
3266 i += sizes[i];
3268 return i;
3271 static inline int
3272 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3273 unsigned short *kinds, uint64_t *sizes,
3274 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3275 bool zero_len)
3277 size_t i;
3278 if (tgt_start != tgt_end)
3279 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3280 tgt_start, tgt_end);
3281 tgt_end++;
3282 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3283 tgt_start, tgt_end);
3284 if (i < n || zero_len)
3285 return i;
3286 tgt_end--;
3288 tgt_start--;
3289 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3290 tgt_start, tgt_end);
3293 /* Handle reverse offload. This is called by the device plugins for a
3294 reverse offload; it is not called if the outer target runs on the host.
3295 The mapping is simplified device-affecting constructs (except for target
3296 with device(ancestor:1)) must not be encountered; in particular not
3297 target (enter/exit) data. */
3299 void
3300 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3301 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3302 void (*dev_to_host_cpy) (void *, const void *, size_t, void*),
3303 void (*host_to_dev_cpy) (void *, const void *, size_t, void*),
3304 void *token)
3306 /* Return early if there is no offload code. */
3307 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3308 return;
3309 /* Currently, this fails because of calculate_firstprivate_requirements
3310 below; it could be fixed but additional code needs to be updated to
3311 handle 32bit hosts - thus, it is not worthwhile. */
3312 if (sizeof (void *) != sizeof (uint64_t))
3313 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3315 struct cpy_data *cdata = NULL;
3316 uint64_t *devaddrs;
3317 uint64_t *sizes;
3318 unsigned short *kinds;
3319 const bool short_mapkind = true;
3320 const int typemask = short_mapkind ? 0xff : 0x7;
3321 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3323 reverse_splay_tree_key n;
3324 struct reverse_splay_tree_key_s k;
3325 k.dev = fn_ptr;
3327 gomp_mutex_lock (&devicep->lock);
3328 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3329 gomp_mutex_unlock (&devicep->lock);
3331 if (n == NULL)
3332 gomp_fatal ("Cannot find reverse-offload function");
3333 void (*host_fn)() = (void (*)()) n->k->host_start;
3335 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3337 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3338 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3339 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3341 else
3343 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3344 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3345 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3346 if (dev_to_host_cpy)
3348 dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr,
3349 mapnum * sizeof (uint64_t), token);
3350 dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr,
3351 mapnum * sizeof (uint64_t), token);
3352 dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr,
3353 mapnum * sizeof (unsigned short), token);
3355 else
3357 gomp_copy_dev2host (devicep, NULL, devaddrs,
3358 (const void *) (uintptr_t) devaddrs_ptr,
3359 mapnum * sizeof (uint64_t));
3360 gomp_copy_dev2host (devicep, NULL, sizes,
3361 (const void *) (uintptr_t) sizes_ptr,
3362 mapnum * sizeof (uint64_t));
3363 gomp_copy_dev2host (devicep, NULL, kinds, (const void *) (uintptr_t) kinds_ptr,
3364 mapnum * sizeof (unsigned short));
3368 size_t tgt_align = 0, tgt_size = 0;
3370 /* If actually executed on 32bit systems, the casts lead to wrong code;
3371 but 32bit with offloading is not supported; see top of this function. */
3372 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3373 (void *) (uintptr_t) kinds,
3374 &tgt_align, &tgt_size);
3376 if (tgt_align)
3378 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3379 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3380 if (al)
3381 tgt += tgt_align - al;
3382 tgt_size = 0;
3383 for (uint64_t i = 0; i < mapnum; i++)
3384 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3385 && devaddrs[i] != 0)
3387 size_t align = (size_t) 1 << (kinds[i] >> 8);
3388 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3389 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3390 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3391 (size_t) sizes[i]);
3392 else if (dev_to_host_cpy)
3393 dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3394 (size_t) sizes[i], token);
3395 else
3396 gomp_copy_dev2host (devicep, NULL, tgt + tgt_size,
3397 (void *) (uintptr_t) devaddrs[i],
3398 (size_t) sizes[i]);
3399 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3400 tgt_size = tgt_size + sizes[i];
3401 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3402 && i + 1 < mapnum
3403 && ((get_kind (short_mapkind, kinds, i) & typemask)
3404 == GOMP_MAP_ATTACH))
3406 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3407 = (uint64_t) devaddrs[i];
3408 ++i;
3413 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3415 size_t j, struct_cpy = 0;
3416 splay_tree_key n2;
3417 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3418 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3419 gomp_mutex_lock (&devicep->lock);
3420 for (uint64_t i = 0; i < mapnum; i++)
3422 if (devaddrs[i] == 0)
3423 continue;
3424 n = NULL;
3425 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3426 switch (kind)
3428 case GOMP_MAP_FIRSTPRIVATE:
3429 case GOMP_MAP_FIRSTPRIVATE_INT:
3430 continue;
3432 case GOMP_MAP_DELETE:
3433 case GOMP_MAP_RELEASE:
3434 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3435 /* Assume it is present; look it up - but ignore otherwise. */
3436 case GOMP_MAP_ALLOC:
3437 case GOMP_MAP_FROM:
3438 case GOMP_MAP_FORCE_ALLOC:
3439 case GOMP_MAP_FORCE_FROM:
3440 case GOMP_MAP_ALWAYS_FROM:
3441 case GOMP_MAP_TO:
3442 case GOMP_MAP_TOFROM:
3443 case GOMP_MAP_FORCE_TO:
3444 case GOMP_MAP_FORCE_TOFROM:
3445 case GOMP_MAP_ALWAYS_TO:
3446 case GOMP_MAP_ALWAYS_TOFROM:
3447 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3448 cdata[i].devaddr = devaddrs[i];
3449 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3450 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3451 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3452 devaddrs[i],
3453 devaddrs[i] + sizes[i], zero_len);
3454 if (j < i)
3456 n2 = NULL;
3457 cdata[i].present = true;
3458 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3460 else
3462 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3463 devaddrs[i],
3464 devaddrs[i] + sizes[i], zero_len);
3465 cdata[i].present = n2 != NULL;
3467 if (!cdata[i].present
3468 && kind != GOMP_MAP_DELETE
3469 && kind != GOMP_MAP_RELEASE
3470 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3472 cdata[i].aligned = true;
3473 size_t align = (size_t) 1 << (kinds[i] >> 8);
3474 devaddrs[i]
3475 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3476 sizes[i]);
3478 else if (n2 != NULL)
3479 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3480 - (n2->tgt->tgt_start + n2->tgt_offset));
3481 if (((!cdata[i].present || struct_cpy)
3482 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3483 || kind == GOMP_MAP_FORCE_TO
3484 || kind == GOMP_MAP_FORCE_TOFROM
3485 || kind == GOMP_MAP_ALWAYS_TO
3486 || kind == GOMP_MAP_ALWAYS_TOFROM)
3488 if (dev_to_host_cpy)
3489 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3490 (void *) (uintptr_t) cdata[i].devaddr,
3491 sizes[i], token);
3492 else
3493 gomp_copy_dev2host (devicep, NULL,
3494 (void *) (uintptr_t) devaddrs[i],
3495 (void *) (uintptr_t) cdata[i].devaddr,
3496 sizes[i]);
3498 if (struct_cpy)
3499 struct_cpy--;
3500 break;
3501 case GOMP_MAP_ATTACH:
3502 case GOMP_MAP_POINTER:
3503 case GOMP_MAP_ALWAYS_POINTER:
3504 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3505 devaddrs[i] + sizes[i],
3506 devaddrs[i] + sizes[i]
3507 + sizeof (void*), false);
3508 cdata[i].present = n2 != NULL;
3509 cdata[i].devaddr = devaddrs[i];
3510 if (n2)
3511 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3512 - (n2->tgt->tgt_start + n2->tgt_offset));
3513 else
3515 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3516 devaddrs[i] + sizes[i],
3517 devaddrs[i] + sizes[i]
3518 + sizeof (void*), false);
3519 if (j < i)
3521 cdata[i].present = true;
3522 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3523 - cdata[j].devaddr);
3526 if (!cdata[i].present)
3527 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3528 /* Assume that when present, the pointer is already correct. */
3529 if (!n2)
3530 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3531 = devaddrs[i-1];
3532 break;
3533 case GOMP_MAP_TO_PSET:
3534 /* Assume that when present, the pointers are fine and no 'to:'
3535 is required. */
3536 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3537 devaddrs[i], devaddrs[i] + sizes[i],
3538 false);
3539 cdata[i].present = n2 != NULL;
3540 cdata[i].devaddr = devaddrs[i];
3541 if (n2)
3542 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3543 - (n2->tgt->tgt_start + n2->tgt_offset));
3544 else
3546 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3547 devaddrs[i],
3548 devaddrs[i] + sizes[i], false);
3549 if (j < i)
3551 cdata[i].present = true;
3552 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3553 - cdata[j].devaddr);
3556 if (!cdata[i].present)
3558 cdata[i].aligned = true;
3559 size_t align = (size_t) 1 << (kinds[i] >> 8);
3560 devaddrs[i]
3561 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3562 sizes[i]);
3563 if (dev_to_host_cpy)
3564 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3565 (void *) (uintptr_t) cdata[i].devaddr,
3566 sizes[i], token);
3567 else
3568 gomp_copy_dev2host (devicep, NULL,
3569 (void *) (uintptr_t) devaddrs[i],
3570 (void *) (uintptr_t) cdata[i].devaddr,
3571 sizes[i]);
3573 for (j = i + 1; j < mapnum; j++)
3575 kind = get_kind (short_mapkind, kinds, j) & typemask;
3576 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3577 && !GOMP_MAP_POINTER_P (kind))
3578 break;
3579 if (devaddrs[j] < devaddrs[i])
3580 break;
3581 if (cdata[i].present)
3582 continue;
3583 if (devaddrs[j] == 0)
3585 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3586 continue;
3588 int k;
3589 n2 = NULL;
3590 /* Dereference devaddrs[j] to get the device addr. */
3591 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3592 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3593 + sizes[j]);
3594 cdata[j].present = true;
3595 cdata[j].devaddr = devaddrs[j];
3596 if (devaddrs[j] == 0)
3597 continue;
3598 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3599 devaddrs[j],
3600 devaddrs[j] + sizeof (void*),
3601 false);
3602 if (k < j)
3603 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3604 - cdata[k].devaddr);
3605 else
3607 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3608 devaddrs[j],
3609 devaddrs[j] + sizeof (void*),
3610 false);
3611 if (n2 == NULL)
3613 gomp_mutex_unlock (&devicep->lock);
3614 gomp_fatal ("Pointer target wasn't mapped");
3616 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3617 - (n2->tgt->tgt_start + n2->tgt_offset));
3619 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3620 = (void *) (uintptr_t) devaddrs[j];
3622 i = j -1;
3623 break;
3624 case GOMP_MAP_STRUCT:
3625 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3626 devaddrs[i + sizes[i]]
3627 + sizes[i + sizes[i]], false);
3628 cdata[i].present = n2 != NULL;
3629 cdata[i].devaddr = devaddrs[i];
3630 struct_cpy = cdata[i].present ? 0 : sizes[i];
3631 if (!n2)
3633 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3634 - devaddrs[i+1]
3635 + sizes[i + sizes[i]]);
3636 size_t align = (size_t) 1 << (kinds[i] >> 8);
3637 cdata[i].aligned = true;
3638 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3639 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3641 else
3642 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3643 - (n2->tgt->tgt_start + n2->tgt_offset));
3644 break;
3645 default:
3646 gomp_mutex_unlock (&devicep->lock);
3647 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3650 gomp_mutex_unlock (&devicep->lock);
3653 host_fn (devaddrs);
3655 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3657 uint64_t struct_cpy = 0;
3658 bool clean_struct = false;
3659 for (uint64_t i = 0; i < mapnum; i++)
3661 if (cdata[i].devaddr == 0)
3662 continue;
3663 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3664 bool copy = !cdata[i].present || struct_cpy;
3665 switch (kind)
3667 case GOMP_MAP_FORCE_FROM:
3668 case GOMP_MAP_FORCE_TOFROM:
3669 case GOMP_MAP_ALWAYS_FROM:
3670 case GOMP_MAP_ALWAYS_TOFROM:
3671 copy = true;
3672 /* FALLTHRU */
3673 case GOMP_MAP_FROM:
3674 case GOMP_MAP_TOFROM:
3675 if (copy && host_to_dev_cpy)
3676 host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr,
3677 (void *) (uintptr_t) devaddrs[i],
3678 sizes[i], token);
3679 else if (copy)
3680 gomp_copy_host2dev (devicep, NULL,
3681 (void *) (uintptr_t) cdata[i].devaddr,
3682 (void *) (uintptr_t) devaddrs[i],
3683 sizes[i], false, NULL);
3684 default:
3685 break;
3687 if (struct_cpy)
3689 struct_cpy--;
3690 continue;
3692 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3694 clean_struct = true;
3695 struct_cpy = sizes[i];
3697 else if (!cdata[i].present && cdata[i].aligned)
3698 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3699 else if (!cdata[i].present)
3700 free ((void *) (uintptr_t) devaddrs[i]);
3702 if (clean_struct)
3703 for (uint64_t i = 0; i < mapnum; i++)
3704 if (!cdata[i].present
3705 && ((get_kind (short_mapkind, kinds, i) & typemask)
3706 == GOMP_MAP_STRUCT))
3708 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3709 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3712 free (devaddrs);
3713 free (sizes);
3714 free (kinds);
3718 /* Host fallback for GOMP_target_data{,_ext} routines. */
3720 static void
3721 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3723 struct gomp_task_icv *icv = gomp_icv (false);
3725 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3726 && devicep != NULL)
3727 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3728 "be used for offloading");
3730 if (icv->target_data)
3732 /* Even when doing a host fallback, if there are any active
3733 #pragma omp target data constructs, need to remember the
3734 new #pragma omp target data, otherwise GOMP_target_end_data
3735 would get out of sync. */
3736 struct target_mem_desc *tgt
3737 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3738 NULL, GOMP_MAP_VARS_DATA);
3739 tgt->prev = icv->target_data;
3740 icv->target_data = tgt;
3744 void
3745 GOMP_target_data (int device, const void *unused, size_t mapnum,
3746 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3748 struct gomp_device_descr *devicep = resolve_device (device, true);
3750 if (devicep == NULL
3751 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3752 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3753 return gomp_target_data_fallback (devicep);
3755 struct target_mem_desc *tgt
3756 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3757 NULL, GOMP_MAP_VARS_DATA);
3758 struct gomp_task_icv *icv = gomp_icv (true);
3759 tgt->prev = icv->target_data;
3760 icv->target_data = tgt;
3763 void
3764 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3765 size_t *sizes, unsigned short *kinds)
3767 struct gomp_device_descr *devicep = resolve_device (device, true);
3769 if (devicep == NULL
3770 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3771 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3772 return gomp_target_data_fallback (devicep);
3774 struct target_mem_desc *tgt
3775 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3776 NULL, GOMP_MAP_VARS_DATA);
3777 struct gomp_task_icv *icv = gomp_icv (true);
3778 tgt->prev = icv->target_data;
3779 icv->target_data = tgt;
3782 void
3783 GOMP_target_end_data (void)
3785 struct gomp_task_icv *icv = gomp_icv (false);
3786 if (icv->target_data)
3788 struct target_mem_desc *tgt = icv->target_data;
3789 icv->target_data = tgt->prev;
3790 gomp_unmap_vars (tgt, true, NULL);
3794 void
3795 GOMP_target_update (int device, const void *unused, size_t mapnum,
3796 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3798 struct gomp_device_descr *devicep = resolve_device (device, true);
3800 if (devicep == NULL
3801 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3802 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3803 return;
3805 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3808 void
3809 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3810 size_t *sizes, unsigned short *kinds,
3811 unsigned int flags, void **depend)
3813 struct gomp_device_descr *devicep = resolve_device (device, true);
3815 /* If there are depend clauses, but nowait is not present,
3816 block the parent task until the dependencies are resolved
3817 and then just continue with the rest of the function as if it
3818 is a merged task. Until we are able to schedule task during
3819 variable mapping or unmapping, ignore nowait if depend clauses
3820 are not present. */
3821 if (depend != NULL)
3823 struct gomp_thread *thr = gomp_thread ();
3824 if (thr->task && thr->task->depend_hash)
3826 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3827 && thr->ts.team
3828 && !thr->task->final_task)
3830 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3831 mapnum, hostaddrs, sizes, kinds,
3832 flags | GOMP_TARGET_FLAG_UPDATE,
3833 depend, NULL, GOMP_TARGET_TASK_DATA))
3834 return;
3836 else
3838 struct gomp_team *team = thr->ts.team;
3839 /* If parallel or taskgroup has been cancelled, don't start new
3840 tasks. */
3841 if (__builtin_expect (gomp_cancel_var, 0) && team)
3843 if (gomp_team_barrier_cancelled (&team->barrier))
3844 return;
3845 if (thr->task->taskgroup)
3847 if (thr->task->taskgroup->cancelled)
3848 return;
3849 if (thr->task->taskgroup->workshare
3850 && thr->task->taskgroup->prev
3851 && thr->task->taskgroup->prev->cancelled)
3852 return;
3856 gomp_task_maybe_wait_for_dependencies (depend);
3861 if (devicep == NULL
3862 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3863 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3864 return;
3866 struct gomp_thread *thr = gomp_thread ();
3867 struct gomp_team *team = thr->ts.team;
3868 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3869 if (__builtin_expect (gomp_cancel_var, 0) && team)
3871 if (gomp_team_barrier_cancelled (&team->barrier))
3872 return;
3873 if (thr->task->taskgroup)
3875 if (thr->task->taskgroup->cancelled)
3876 return;
3877 if (thr->task->taskgroup->workshare
3878 && thr->task->taskgroup->prev
3879 && thr->task->taskgroup->prev->cancelled)
3880 return;
3884 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3887 static void
3888 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3889 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3890 htab_t *refcount_set)
3892 const int typemask = 0xff;
3893 size_t i;
3894 gomp_mutex_lock (&devicep->lock);
3895 if (devicep->state == GOMP_DEVICE_FINALIZED)
3897 gomp_mutex_unlock (&devicep->lock);
3898 return;
3901 for (i = 0; i < mapnum; i++)
3902 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3904 struct splay_tree_key_s cur_node;
3905 cur_node.host_start = (uintptr_t) hostaddrs[i];
3906 cur_node.host_end = cur_node.host_start + sizeof (void *);
3907 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3909 if (n)
3910 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3911 false, NULL);
3914 int nrmvars = 0;
3915 splay_tree_key remove_vars[mapnum];
3917 for (i = 0; i < mapnum; i++)
3919 struct splay_tree_key_s cur_node;
3920 unsigned char kind = kinds[i] & typemask;
3921 switch (kind)
3923 case GOMP_MAP_FROM:
3924 case GOMP_MAP_ALWAYS_FROM:
3925 case GOMP_MAP_DELETE:
3926 case GOMP_MAP_RELEASE:
3927 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3928 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3929 cur_node.host_start = (uintptr_t) hostaddrs[i];
3930 cur_node.host_end = cur_node.host_start + sizes[i];
3931 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3932 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3933 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3934 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3935 if (!k)
3936 continue;
3938 bool delete_p = (kind == GOMP_MAP_DELETE
3939 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3940 bool do_copy, do_remove;
3941 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3942 &do_remove);
3944 if ((kind == GOMP_MAP_FROM && do_copy)
3945 || kind == GOMP_MAP_ALWAYS_FROM)
3947 if (k->aux && k->aux->attach_count)
3949 /* We have to be careful not to overwrite still attached
3950 pointers during the copyback to host. */
3951 uintptr_t addr = k->host_start;
3952 while (addr < k->host_end)
3954 size_t i = (addr - k->host_start) / sizeof (void *);
3955 if (k->aux->attach_count[i] == 0)
3956 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3957 (void *) (k->tgt->tgt_start
3958 + k->tgt_offset
3959 + addr - k->host_start),
3960 sizeof (void *));
3961 addr += sizeof (void *);
3964 else
3965 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3966 (void *) (k->tgt->tgt_start + k->tgt_offset
3967 + cur_node.host_start
3968 - k->host_start),
3969 cur_node.host_end - cur_node.host_start);
3972 /* Structure elements lists are removed altogether at once, which
3973 may cause immediate deallocation of the target_mem_desc, causing
3974 errors if we still have following element siblings to copy back.
3975 While we're at it, it also seems more disciplined to simply
3976 queue all removals together for processing below.
3978 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3979 not have this problem, since they maintain an additional
3980 tgt->refcount = 1 reference to the target_mem_desc to start with.
3982 if (do_remove)
3983 remove_vars[nrmvars++] = k;
3984 break;
3986 case GOMP_MAP_DETACH:
3987 break;
3988 default:
3989 gomp_mutex_unlock (&devicep->lock);
3990 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3991 kind);
3995 for (int i = 0; i < nrmvars; i++)
3996 gomp_remove_var (devicep, remove_vars[i]);
3998 gomp_mutex_unlock (&devicep->lock);
4001 void
4002 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4003 size_t *sizes, unsigned short *kinds,
4004 unsigned int flags, void **depend)
4006 struct gomp_device_descr *devicep = resolve_device (device, true);
4008 /* If there are depend clauses, but nowait is not present,
4009 block the parent task until the dependencies are resolved
4010 and then just continue with the rest of the function as if it
4011 is a merged task. Until we are able to schedule task during
4012 variable mapping or unmapping, ignore nowait if depend clauses
4013 are not present. */
4014 if (depend != NULL)
4016 struct gomp_thread *thr = gomp_thread ();
4017 if (thr->task && thr->task->depend_hash)
4019 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4020 && thr->ts.team
4021 && !thr->task->final_task)
4023 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4024 mapnum, hostaddrs, sizes, kinds,
4025 flags, depend, NULL,
4026 GOMP_TARGET_TASK_DATA))
4027 return;
4029 else
4031 struct gomp_team *team = thr->ts.team;
4032 /* If parallel or taskgroup has been cancelled, don't start new
4033 tasks. */
4034 if (__builtin_expect (gomp_cancel_var, 0) && team)
4036 if (gomp_team_barrier_cancelled (&team->barrier))
4037 return;
4038 if (thr->task->taskgroup)
4040 if (thr->task->taskgroup->cancelled)
4041 return;
4042 if (thr->task->taskgroup->workshare
4043 && thr->task->taskgroup->prev
4044 && thr->task->taskgroup->prev->cancelled)
4045 return;
4049 gomp_task_maybe_wait_for_dependencies (depend);
4054 if (devicep == NULL
4055 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4056 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4057 return;
4059 struct gomp_thread *thr = gomp_thread ();
4060 struct gomp_team *team = thr->ts.team;
4061 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4062 if (__builtin_expect (gomp_cancel_var, 0) && team)
4064 if (gomp_team_barrier_cancelled (&team->barrier))
4065 return;
4066 if (thr->task->taskgroup)
4068 if (thr->task->taskgroup->cancelled)
4069 return;
4070 if (thr->task->taskgroup->workshare
4071 && thr->task->taskgroup->prev
4072 && thr->task->taskgroup->prev->cancelled)
4073 return;
4077 htab_t refcount_set = htab_create (mapnum);
4079 /* The variables are mapped separately such that they can be released
4080 independently. */
4081 size_t i, j;
4082 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4083 for (i = 0; i < mapnum; i++)
4084 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4086 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4087 &kinds[i], true, &refcount_set,
4088 GOMP_MAP_VARS_ENTER_DATA);
4089 i += sizes[i];
4091 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4093 for (j = i + 1; j < mapnum; j++)
4094 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4095 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4096 break;
4097 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4098 &kinds[i], true, &refcount_set,
4099 GOMP_MAP_VARS_ENTER_DATA);
4100 i += j - i - 1;
4102 else if (i + 1 < mapnum
4103 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4104 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4105 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4107 /* An attach operation must be processed together with the mapped
4108 base-pointer list item. */
4109 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4110 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4111 i += 1;
4113 else
4114 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4115 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4116 else
4117 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4118 htab_free (refcount_set);
4121 bool
4122 gomp_target_task_fn (void *data)
4124 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4125 struct gomp_device_descr *devicep = ttask->devicep;
4127 if (ttask->fn != NULL)
4129 void *fn_addr;
4130 if (devicep == NULL
4131 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4132 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4133 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4135 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4136 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4137 ttask->args);
4138 return false;
4141 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4143 if (ttask->tgt)
4144 gomp_unmap_vars (ttask->tgt, true, NULL);
4145 return false;
4148 void *actual_arguments;
4149 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4151 ttask->tgt = NULL;
4152 actual_arguments = ttask->hostaddrs;
4154 else
4156 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4157 NULL, ttask->sizes, ttask->kinds, true,
4158 NULL, GOMP_MAP_VARS_TARGET);
4159 actual_arguments = (void *) ttask->tgt->tgt_start;
4161 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4163 assert (devicep->async_run_func);
4164 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4165 ttask->args, (void *) ttask);
4166 return true;
4168 else if (devicep == NULL
4169 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4170 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4171 return false;
4173 size_t i;
4174 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4175 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4176 ttask->kinds, true);
4177 else
4179 htab_t refcount_set = htab_create (ttask->mapnum);
4180 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4181 for (i = 0; i < ttask->mapnum; i++)
4182 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4184 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4185 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4186 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4187 i += ttask->sizes[i];
4189 else
4190 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4191 &ttask->kinds[i], true, &refcount_set,
4192 GOMP_MAP_VARS_ENTER_DATA);
4193 else
4194 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4195 ttask->kinds, &refcount_set);
4196 htab_free (refcount_set);
4198 return false;
4201 void
4202 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4204 if (thread_limit)
4206 struct gomp_task_icv *icv = gomp_icv (true);
4207 icv->thread_limit_var
4208 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4210 (void) num_teams;
4213 bool
4214 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4215 unsigned int thread_limit, bool first)
4217 struct gomp_thread *thr = gomp_thread ();
4218 if (first)
4220 if (thread_limit)
4222 struct gomp_task_icv *icv = gomp_icv (true);
4223 icv->thread_limit_var
4224 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4226 (void) num_teams_high;
4227 if (num_teams_low == 0)
4228 num_teams_low = 1;
4229 thr->num_teams = num_teams_low - 1;
4230 thr->team_num = 0;
4232 else if (thr->team_num == thr->num_teams)
4233 return false;
4234 else
4235 ++thr->team_num;
4236 return true;
4239 void *
4240 omp_target_alloc (size_t size, int device_num)
4242 if (device_num == omp_initial_device
4243 || device_num == gomp_get_num_devices ())
4244 return malloc (size);
4246 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4247 if (devicep == NULL)
4248 return NULL;
4250 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4251 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4252 return malloc (size);
4254 gomp_mutex_lock (&devicep->lock);
4255 void *ret = devicep->alloc_func (devicep->target_id, size);
4256 gomp_mutex_unlock (&devicep->lock);
4257 return ret;
4260 void
4261 omp_target_free (void *device_ptr, int device_num)
4263 if (device_num == omp_initial_device
4264 || device_num == gomp_get_num_devices ())
4266 free (device_ptr);
4267 return;
4270 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4271 if (devicep == NULL || device_ptr == NULL)
4272 return;
4274 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4275 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4277 free (device_ptr);
4278 return;
4281 gomp_mutex_lock (&devicep->lock);
4282 gomp_free_device_memory (devicep, device_ptr);
4283 gomp_mutex_unlock (&devicep->lock);
4287 omp_target_is_present (const void *ptr, int device_num)
4289 if (device_num == omp_initial_device
4290 || device_num == gomp_get_num_devices ())
4291 return 1;
4293 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4294 if (devicep == NULL)
4295 return 0;
4297 if (ptr == NULL)
4298 return 1;
4300 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4301 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4302 return 1;
4304 gomp_mutex_lock (&devicep->lock);
4305 struct splay_tree_s *mem_map = &devicep->mem_map;
4306 struct splay_tree_key_s cur_node;
4308 cur_node.host_start = (uintptr_t) ptr;
4309 cur_node.host_end = cur_node.host_start;
4310 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4311 int ret = n != NULL;
4312 gomp_mutex_unlock (&devicep->lock);
4313 return ret;
4316 static int
4317 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4318 struct gomp_device_descr **dst_devicep,
4319 struct gomp_device_descr **src_devicep)
4321 if (dst_device_num != gomp_get_num_devices ()
4322 /* Above gomp_get_num_devices has to be called unconditionally. */
4323 && dst_device_num != omp_initial_device)
4325 *dst_devicep = resolve_device (dst_device_num, false);
4326 if (*dst_devicep == NULL)
4327 return EINVAL;
4329 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4330 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4331 *dst_devicep = NULL;
4334 if (src_device_num != num_devices_openmp
4335 && src_device_num != omp_initial_device)
4337 *src_devicep = resolve_device (src_device_num, false);
4338 if (*src_devicep == NULL)
4339 return EINVAL;
4341 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4342 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4343 *src_devicep = NULL;
4346 return 0;
4349 static int
4350 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4351 size_t dst_offset, size_t src_offset,
4352 struct gomp_device_descr *dst_devicep,
4353 struct gomp_device_descr *src_devicep)
4355 bool ret;
4356 if (src_devicep == NULL && dst_devicep == NULL)
4358 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4359 return 0;
4361 if (src_devicep == NULL)
4363 gomp_mutex_lock (&dst_devicep->lock);
4364 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4365 (char *) dst + dst_offset,
4366 (char *) src + src_offset, length);
4367 gomp_mutex_unlock (&dst_devicep->lock);
4368 return (ret ? 0 : EINVAL);
4370 if (dst_devicep == NULL)
4372 gomp_mutex_lock (&src_devicep->lock);
4373 ret = src_devicep->dev2host_func (src_devicep->target_id,
4374 (char *) dst + dst_offset,
4375 (char *) src + src_offset, length);
4376 gomp_mutex_unlock (&src_devicep->lock);
4377 return (ret ? 0 : EINVAL);
4379 if (src_devicep == dst_devicep)
4381 gomp_mutex_lock (&src_devicep->lock);
4382 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4383 (char *) dst + dst_offset,
4384 (char *) src + src_offset, length);
4385 gomp_mutex_unlock (&src_devicep->lock);
4386 return (ret ? 0 : EINVAL);
4388 return EINVAL;
4392 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4393 size_t src_offset, int dst_device_num, int src_device_num)
4395 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4396 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4397 &dst_devicep, &src_devicep);
4399 if (ret)
4400 return ret;
4402 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4403 dst_devicep, src_devicep);
4405 return ret;
4408 typedef struct
4410 void *dst;
4411 const void *src;
4412 size_t length;
4413 size_t dst_offset;
4414 size_t src_offset;
4415 struct gomp_device_descr *dst_devicep;
4416 struct gomp_device_descr *src_devicep;
4417 } omp_target_memcpy_data;
4419 static void
4420 omp_target_memcpy_async_helper (void *args)
4422 omp_target_memcpy_data *a = args;
4423 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4424 a->src_offset, a->dst_devicep, a->src_devicep))
4425 gomp_fatal ("omp_target_memcpy failed");
4429 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4430 size_t dst_offset, size_t src_offset,
4431 int dst_device_num, int src_device_num,
4432 int depobj_count, omp_depend_t *depobj_list)
4434 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4435 unsigned int flags = 0;
4436 void *depend[depobj_count + 5];
4437 int i;
4438 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4439 &dst_devicep, &src_devicep);
4441 omp_target_memcpy_data s = {
4442 .dst = dst,
4443 .src = src,
4444 .length = length,
4445 .dst_offset = dst_offset,
4446 .src_offset = src_offset,
4447 .dst_devicep = dst_devicep,
4448 .src_devicep = src_devicep
4451 if (check)
4452 return check;
4454 if (depobj_count > 0 && depobj_list != NULL)
4456 flags |= GOMP_TASK_FLAG_DEPEND;
4457 depend[0] = 0;
4458 depend[1] = (void *) (uintptr_t) depobj_count;
4459 depend[2] = depend[3] = depend[4] = 0;
4460 for (i = 0; i < depobj_count; ++i)
4461 depend[i + 5] = &depobj_list[i];
4464 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4465 __alignof__ (s), true, flags, depend, 0, NULL);
4467 return 0;
4470 static int
4471 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4472 int num_dims, const size_t *volume,
4473 const size_t *dst_offsets,
4474 const size_t *src_offsets,
4475 const size_t *dst_dimensions,
4476 const size_t *src_dimensions,
4477 struct gomp_device_descr *dst_devicep,
4478 struct gomp_device_descr *src_devicep)
4480 size_t dst_slice = element_size;
4481 size_t src_slice = element_size;
4482 size_t j, dst_off, src_off, length;
4483 int i, ret;
4485 if (num_dims == 1)
4487 if (__builtin_mul_overflow (element_size, volume[0], &length)
4488 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4489 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4490 return EINVAL;
4491 if (dst_devicep == NULL && src_devicep == NULL)
4493 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4494 length);
4495 ret = 1;
4497 else if (src_devicep == NULL)
4498 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4499 (char *) dst + dst_off,
4500 (const char *) src + src_off,
4501 length);
4502 else if (dst_devicep == NULL)
4503 ret = src_devicep->dev2host_func (src_devicep->target_id,
4504 (char *) dst + dst_off,
4505 (const char *) src + src_off,
4506 length);
4507 else if (src_devicep == dst_devicep)
4508 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4509 (char *) dst + dst_off,
4510 (const char *) src + src_off,
4511 length);
4512 else
4513 ret = 0;
4514 return ret ? 0 : EINVAL;
4517 /* FIXME: it would be nice to have some plugin function to handle
4518 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4519 be handled in the generic recursion below, and for host-host it
4520 should be used even for any num_dims >= 2. */
4522 for (i = 1; i < num_dims; i++)
4523 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4524 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4525 return EINVAL;
4526 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4527 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4528 return EINVAL;
4529 for (j = 0; j < volume[0]; j++)
4531 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4532 (const char *) src + src_off,
4533 element_size, num_dims - 1,
4534 volume + 1, dst_offsets + 1,
4535 src_offsets + 1, dst_dimensions + 1,
4536 src_dimensions + 1, dst_devicep,
4537 src_devicep);
4538 if (ret)
4539 return ret;
4540 dst_off += dst_slice;
4541 src_off += src_slice;
4543 return 0;
4546 static int
4547 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4548 int src_device_num,
4549 struct gomp_device_descr **dst_devicep,
4550 struct gomp_device_descr **src_devicep)
4552 if (!dst && !src)
4553 return INT_MAX;
4555 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4556 dst_devicep, src_devicep);
4557 if (ret)
4558 return ret;
4560 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
4561 return EINVAL;
4563 return 0;
4566 static int
4567 omp_target_memcpy_rect_copy (void *dst, const void *src,
4568 size_t element_size, int num_dims,
4569 const size_t *volume, const size_t *dst_offsets,
4570 const size_t *src_offsets,
4571 const size_t *dst_dimensions,
4572 const size_t *src_dimensions,
4573 struct gomp_device_descr *dst_devicep,
4574 struct gomp_device_descr *src_devicep)
4576 if (src_devicep)
4577 gomp_mutex_lock (&src_devicep->lock);
4578 else if (dst_devicep)
4579 gomp_mutex_lock (&dst_devicep->lock);
4580 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4581 volume, dst_offsets, src_offsets,
4582 dst_dimensions, src_dimensions,
4583 dst_devicep, src_devicep);
4584 if (src_devicep)
4585 gomp_mutex_unlock (&src_devicep->lock);
4586 else if (dst_devicep)
4587 gomp_mutex_unlock (&dst_devicep->lock);
4589 return ret;
4593 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4594 int num_dims, const size_t *volume,
4595 const size_t *dst_offsets,
4596 const size_t *src_offsets,
4597 const size_t *dst_dimensions,
4598 const size_t *src_dimensions,
4599 int dst_device_num, int src_device_num)
4601 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4603 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4604 src_device_num, &dst_devicep,
4605 &src_devicep);
4607 if (check)
4608 return check;
4610 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4611 volume, dst_offsets, src_offsets,
4612 dst_dimensions, src_dimensions,
4613 dst_devicep, src_devicep);
4615 return ret;
4618 typedef struct
4620 void *dst;
4621 const void *src;
4622 size_t element_size;
4623 const size_t *volume;
4624 const size_t *dst_offsets;
4625 const size_t *src_offsets;
4626 const size_t *dst_dimensions;
4627 const size_t *src_dimensions;
4628 struct gomp_device_descr *dst_devicep;
4629 struct gomp_device_descr *src_devicep;
4630 int num_dims;
4631 } omp_target_memcpy_rect_data;
4633 static void
4634 omp_target_memcpy_rect_async_helper (void *args)
4636 omp_target_memcpy_rect_data *a = args;
4637 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4638 a->num_dims, a->volume, a->dst_offsets,
4639 a->src_offsets, a->dst_dimensions,
4640 a->src_dimensions, a->dst_devicep,
4641 a->src_devicep);
4642 if (ret)
4643 gomp_fatal ("omp_target_memcpy_rect failed");
4647 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4648 int num_dims, const size_t *volume,
4649 const size_t *dst_offsets,
4650 const size_t *src_offsets,
4651 const size_t *dst_dimensions,
4652 const size_t *src_dimensions,
4653 int dst_device_num, int src_device_num,
4654 int depobj_count, omp_depend_t *depobj_list)
4656 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4657 unsigned flags = 0;
4658 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4659 src_device_num, &dst_devicep,
4660 &src_devicep);
4661 void *depend[depobj_count + 5];
4662 int i;
4664 omp_target_memcpy_rect_data s = {
4665 .dst = dst,
4666 .src = src,
4667 .element_size = element_size,
4668 .num_dims = num_dims,
4669 .volume = volume,
4670 .dst_offsets = dst_offsets,
4671 .src_offsets = src_offsets,
4672 .dst_dimensions = dst_dimensions,
4673 .src_dimensions = src_dimensions,
4674 .dst_devicep = dst_devicep,
4675 .src_devicep = src_devicep
4678 if (check)
4679 return check;
4681 if (depobj_count > 0 && depobj_list != NULL)
4683 flags |= GOMP_TASK_FLAG_DEPEND;
4684 depend[0] = 0;
4685 depend[1] = (void *) (uintptr_t) depobj_count;
4686 depend[2] = depend[3] = depend[4] = 0;
4687 for (i = 0; i < depobj_count; ++i)
4688 depend[i + 5] = &depobj_list[i];
4691 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4692 __alignof__ (s), true, flags, depend, 0, NULL);
4694 return 0;
4698 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4699 size_t size, size_t device_offset, int device_num)
4701 if (device_num == omp_initial_device
4702 || device_num == gomp_get_num_devices ())
4703 return EINVAL;
4705 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4706 if (devicep == NULL)
4707 return EINVAL;
4709 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4710 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4711 return EINVAL;
4713 gomp_mutex_lock (&devicep->lock);
4715 struct splay_tree_s *mem_map = &devicep->mem_map;
4716 struct splay_tree_key_s cur_node;
4717 int ret = EINVAL;
4719 cur_node.host_start = (uintptr_t) host_ptr;
4720 cur_node.host_end = cur_node.host_start + size;
4721 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4722 if (n)
4724 if (n->tgt->tgt_start + n->tgt_offset
4725 == (uintptr_t) device_ptr + device_offset
4726 && n->host_start <= cur_node.host_start
4727 && n->host_end >= cur_node.host_end)
4728 ret = 0;
4730 else
4732 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4733 tgt->array = gomp_malloc (sizeof (*tgt->array));
4734 tgt->refcount = 1;
4735 tgt->tgt_start = 0;
4736 tgt->tgt_end = 0;
4737 tgt->to_free = NULL;
4738 tgt->prev = NULL;
4739 tgt->list_count = 0;
4740 tgt->device_descr = devicep;
4741 splay_tree_node array = tgt->array;
4742 splay_tree_key k = &array->key;
4743 k->host_start = cur_node.host_start;
4744 k->host_end = cur_node.host_end;
4745 k->tgt = tgt;
4746 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4747 k->refcount = REFCOUNT_INFINITY;
4748 k->dynamic_refcount = 0;
4749 k->aux = NULL;
4750 array->left = NULL;
4751 array->right = NULL;
4752 splay_tree_insert (&devicep->mem_map, array);
4753 ret = 0;
4755 gomp_mutex_unlock (&devicep->lock);
4756 return ret;
4760 omp_target_disassociate_ptr (const void *ptr, int device_num)
4762 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4763 if (devicep == NULL)
4764 return EINVAL;
4766 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4767 return EINVAL;
4769 gomp_mutex_lock (&devicep->lock);
4771 struct splay_tree_s *mem_map = &devicep->mem_map;
4772 struct splay_tree_key_s cur_node;
4773 int ret = EINVAL;
4775 cur_node.host_start = (uintptr_t) ptr;
4776 cur_node.host_end = cur_node.host_start;
4777 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4778 if (n
4779 && n->host_start == cur_node.host_start
4780 && n->refcount == REFCOUNT_INFINITY
4781 && n->tgt->tgt_start == 0
4782 && n->tgt->to_free == NULL
4783 && n->tgt->refcount == 1
4784 && n->tgt->list_count == 0)
4786 splay_tree_remove (&devicep->mem_map, n);
4787 gomp_unmap_tgt (n->tgt);
4788 ret = 0;
4791 gomp_mutex_unlock (&devicep->lock);
4792 return ret;
4795 void *
4796 omp_get_mapped_ptr (const void *ptr, int device_num)
4798 if (device_num == omp_initial_device
4799 || device_num == omp_get_initial_device ())
4800 return (void *) ptr;
4802 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4803 if (devicep == NULL)
4804 return NULL;
4806 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4807 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4808 return (void *) ptr;
4810 gomp_mutex_lock (&devicep->lock);
4812 struct splay_tree_s *mem_map = &devicep->mem_map;
4813 struct splay_tree_key_s cur_node;
4814 void *ret = NULL;
4816 cur_node.host_start = (uintptr_t) ptr;
4817 cur_node.host_end = cur_node.host_start;
4818 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4820 if (n)
4822 uintptr_t offset = cur_node.host_start - n->host_start;
4823 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4826 gomp_mutex_unlock (&devicep->lock);
4828 return ret;
4832 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4834 if (device_num == omp_initial_device
4835 || device_num == gomp_get_num_devices ())
4836 return true;
4838 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4839 if (devicep == NULL)
4840 return false;
4842 /* TODO: Unified shared memory must be handled when available. */
4844 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4848 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4850 (void) kind;
4851 if (device_num == omp_initial_device
4852 || device_num == gomp_get_num_devices ())
4853 return gomp_pause_host ();
4855 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4856 if (devicep == NULL)
4857 return -1;
4859 /* Do nothing for target devices for now. */
4860 return 0;
4864 omp_pause_resource_all (omp_pause_resource_t kind)
4866 (void) kind;
4867 if (gomp_pause_host ())
4868 return -1;
4869 /* Do nothing for target devices for now. */
4870 return 0;
4873 ialias (omp_pause_resource)
4874 ialias (omp_pause_resource_all)
4876 #ifdef PLUGIN_SUPPORT
4878 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4879 in PLUGIN_NAME.
4880 The handles of the found functions are stored in the corresponding fields
4881 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4883 static bool
4884 gomp_load_plugin_for_device (struct gomp_device_descr *device,
4885 const char *plugin_name)
4887 const char *err = NULL, *last_missing = NULL;
4889 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4890 if (!plugin_handle)
4891 #if OFFLOAD_DEFAULTED
4892 return 0;
4893 #else
4894 goto dl_fail;
4895 #endif
4897 /* Check if all required functions are available in the plugin and store
4898 their handlers. None of the symbols can legitimately be NULL,
4899 so we don't need to check dlerror all the time. */
4900 #define DLSYM(f) \
4901 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4902 goto dl_fail
4903 /* Similar, but missing functions are not an error. Return false if
4904 failed, true otherwise. */
4905 #define DLSYM_OPT(f, n) \
4906 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4907 || (last_missing = #n, 0))
4909 DLSYM (version);
4910 if (device->version_func () != GOMP_VERSION)
4912 err = "plugin version mismatch";
4913 goto fail;
4916 DLSYM (get_name);
4917 DLSYM (get_caps);
4918 DLSYM (get_type);
4919 DLSYM (get_num_devices);
4920 DLSYM (init_device);
4921 DLSYM (fini_device);
4922 DLSYM (load_image);
4923 DLSYM (unload_image);
4924 DLSYM (alloc);
4925 DLSYM (free);
4926 DLSYM (dev2host);
4927 DLSYM (host2dev);
4928 device->capabilities = device->get_caps_func ();
4929 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4931 DLSYM (run);
4932 DLSYM_OPT (async_run, async_run);
4933 DLSYM_OPT (can_run, can_run);
4934 DLSYM (dev2dev);
4936 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4938 if (!DLSYM_OPT (openacc.exec, openacc_exec)
4939 || !DLSYM_OPT (openacc.create_thread_data,
4940 openacc_create_thread_data)
4941 || !DLSYM_OPT (openacc.destroy_thread_data,
4942 openacc_destroy_thread_data)
4943 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4944 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4945 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4946 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4947 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4948 || !DLSYM_OPT (openacc.async.queue_callback,
4949 openacc_async_queue_callback)
4950 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4951 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4952 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4953 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4955 /* Require all the OpenACC handlers if we have
4956 GOMP_OFFLOAD_CAP_OPENACC_200. */
4957 err = "plugin missing OpenACC handler function";
4958 goto fail;
4961 unsigned cuda = 0;
4962 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4963 openacc_cuda_get_current_device);
4964 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4965 openacc_cuda_get_current_context);
4966 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4967 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4968 if (cuda && cuda != 4)
4970 /* Make sure all the CUDA functions are there if any of them are. */
4971 err = "plugin missing OpenACC CUDA handler function";
4972 goto fail;
4975 #undef DLSYM
4976 #undef DLSYM_OPT
4978 return 1;
4980 dl_fail:
4981 err = dlerror ();
4982 fail:
4983 gomp_error ("while loading %s: %s", plugin_name, err);
4984 if (last_missing)
4985 gomp_error ("missing function was %s", last_missing);
4986 if (plugin_handle)
4987 dlclose (plugin_handle);
4989 return 0;
4992 /* This function finalizes all initialized devices. */
4994 static void
4995 gomp_target_fini (void)
4997 int i;
4998 for (i = 0; i < num_devices; i++)
5000 bool ret = true;
5001 struct gomp_device_descr *devicep = &devices[i];
5002 gomp_mutex_lock (&devicep->lock);
5003 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5004 ret = gomp_fini_device (devicep);
5005 gomp_mutex_unlock (&devicep->lock);
5006 if (!ret)
5007 gomp_fatal ("device finalization failed");
5011 /* This function initializes the runtime for offloading.
5012 It parses the list of offload plugins, and tries to load these.
5013 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5014 will be set, and the array DEVICES initialized, containing descriptors for
5015 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5016 by the others. */
5018 static void
5019 gomp_target_init (void)
5021 const char *prefix ="libgomp-plugin-";
5022 const char *suffix = SONAME_SUFFIX (1);
5023 const char *cur, *next;
5024 char *plugin_name;
5025 int i, new_num_devs;
5026 int num_devs = 0, num_devs_openmp;
5027 struct gomp_device_descr *devs = NULL;
5029 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5030 return;
5032 cur = OFFLOAD_PLUGINS;
5033 if (*cur)
5036 struct gomp_device_descr current_device;
5037 size_t prefix_len, suffix_len, cur_len;
5039 next = strchr (cur, ',');
5041 prefix_len = strlen (prefix);
5042 cur_len = next ? next - cur : strlen (cur);
5043 suffix_len = strlen (suffix);
5045 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5046 if (!plugin_name)
5048 num_devs = 0;
5049 break;
5052 memcpy (plugin_name, prefix, prefix_len);
5053 memcpy (plugin_name + prefix_len, cur, cur_len);
5054 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5056 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5058 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5059 new_num_devs = current_device.get_num_devices_func (omp_req);
5060 if (gomp_debug_var > 0 && new_num_devs < 0)
5062 bool found = false;
5063 int type = current_device.get_type_func ();
5064 for (int img = 0; img < num_offload_images; img++)
5065 if (type == offload_images[img].type)
5066 found = true;
5067 if (found)
5069 char buf[sizeof ("unified_address, unified_shared_memory, "
5070 "reverse_offload")];
5071 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5072 char *name = (char *) malloc (cur_len + 1);
5073 memcpy (name, cur, cur_len);
5074 name[cur_len] = '\0';
5075 gomp_debug (1,
5076 "%s devices present but 'omp requires %s' "
5077 "cannot be fulfilled\n", name, buf);
5078 free (name);
5081 else if (new_num_devs >= 1)
5083 /* Augment DEVICES and NUM_DEVICES. */
5085 devs = realloc (devs, (num_devs + new_num_devs)
5086 * sizeof (struct gomp_device_descr));
5087 if (!devs)
5089 num_devs = 0;
5090 free (plugin_name);
5091 break;
5094 current_device.name = current_device.get_name_func ();
5095 /* current_device.capabilities has already been set. */
5096 current_device.type = current_device.get_type_func ();
5097 current_device.mem_map.root = NULL;
5098 current_device.mem_map_rev.root = NULL;
5099 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5100 for (i = 0; i < new_num_devs; i++)
5102 current_device.target_id = i;
5103 devs[num_devs] = current_device;
5104 gomp_mutex_init (&devs[num_devs].lock);
5105 num_devs++;
5110 free (plugin_name);
5111 cur = next + 1;
5113 while (next);
5115 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5116 NUM_DEVICES_OPENMP. */
5117 struct gomp_device_descr *devs_s
5118 = malloc (num_devs * sizeof (struct gomp_device_descr));
5119 if (!devs_s)
5121 num_devs = 0;
5122 free (devs);
5123 devs = NULL;
5125 num_devs_openmp = 0;
5126 for (i = 0; i < num_devs; i++)
5127 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5128 devs_s[num_devs_openmp++] = devs[i];
5129 int num_devs_after_openmp = num_devs_openmp;
5130 for (i = 0; i < num_devs; i++)
5131 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5132 devs_s[num_devs_after_openmp++] = devs[i];
5133 free (devs);
5134 devs = devs_s;
5136 for (i = 0; i < num_devs; i++)
5138 /* The 'devices' array can be moved (by the realloc call) until we have
5139 found all the plugins, so registering with the OpenACC runtime (which
5140 takes a copy of the pointer argument) must be delayed until now. */
5141 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5142 goacc_register (&devs[i]);
5145 num_devices = num_devs;
5146 num_devices_openmp = num_devs_openmp;
5147 devices = devs;
5148 if (atexit (gomp_target_fini) != 0)
5149 gomp_fatal ("atexit failed");
5152 #else /* PLUGIN_SUPPORT */
5153 /* If dlfcn.h is unavailable we always fallback to host execution.
5154 GOMP_target* routines are just stubs for this case. */
5155 static void
5156 gomp_target_init (void)
5159 #endif /* PLUGIN_SUPPORT */