ada: Fix internal error on Big_Integer conversion ghost instance
[official-gcc.git] / libgomp / target.c
blob32389540accd306cc1aeaa468d6f7583bedacc75
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
59 ialias_redirect (GOMP_task)
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
64 return hash_pointer ((void *) element);
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
70 return x == y;
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
96 /* Total number of offload images. */
97 static int num_offload_images;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
102 /* Total number of available devices. */
103 static int num_devices;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
116 void *ret = realloc (old, size);
117 if (ret == NULL)
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 return ret;
125 attribute_hidden void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
131 attribute_hidden int
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp;
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
141 if (remapped && device_id == GOMP_DEVICE_ICV)
143 struct gomp_task_icv *icv = gomp_icv (false);
144 device_id = icv->default_device_var;
145 remapped = false;
148 if (device_id < 0)
150 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device))
152 return NULL;
153 if (device_id == omp_invalid_device)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
159 return NULL;
161 else if (device_id >= gomp_get_num_devices ())
163 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id != num_devices_openmp)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
168 return NULL;
171 gomp_mutex_lock (&devices[device_id].lock);
172 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
173 gomp_init_device (&devices[device_id]);
174 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
176 gomp_mutex_unlock (&devices[device_id].lock);
178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
182 return NULL;
184 gomp_mutex_unlock (&devices[device_id].lock);
186 return &devices[device_id];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
193 if (key->host_start != key->host_end)
194 return splay_tree_lookup (mem_map, key);
196 key->host_end++;
197 splay_tree_key n = splay_tree_lookup (mem_map, key);
198 key->host_end--;
199 if (n)
200 return n;
201 key->host_start--;
202 n = splay_tree_lookup (mem_map, key);
203 key->host_start++;
204 if (n)
205 return n;
206 return splay_tree_lookup (mem_map, key);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
212 return reverse_splay_tree_lookup (mem_map_rev, key);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
218 if (key->host_start != key->host_end)
219 return splay_tree_lookup (mem_map, key);
221 key->host_end++;
222 splay_tree_key n = splay_tree_lookup (mem_map, key);
223 key->host_end--;
224 return n;
227 static inline void
228 gomp_device_copy (struct gomp_device_descr *devicep,
229 bool (*copy_func) (int, void *, const void *, size_t),
230 const char *dst, void *dstaddr,
231 const char *src, const void *srcaddr,
232 size_t size)
234 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
236 gomp_mutex_unlock (&devicep->lock);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
242 static inline void
243 goacc_device_copy_async (struct gomp_device_descr *devicep,
244 bool (*copy_func) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue *),
246 const char *dst, void *dstaddr,
247 const char *src, const void *srcaddr,
248 const void *srcaddr_orig,
249 size_t size, struct goacc_asyncqueue *aq)
251 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
253 gomp_mutex_unlock (&devicep->lock);
254 if (srcaddr_orig && srcaddr_orig != srcaddr)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src, srcaddr_orig, srcaddr_orig + size,
259 src, srcaddr, srcaddr + size,
260 dst, dstaddr, dstaddr + size);
261 else
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src, srcaddr, srcaddr + size,
265 dst, dstaddr, dstaddr + size);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
275 size_t start, end;
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
282 void *buf;
283 struct target_mem_desc *tgt;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk *chunks;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
289 be performed. */
290 long chunk_cnt;
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
294 long use_cnt;
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example). The exception is for EPHEMERAL data, that we know is available
314 already "by construction". */
316 static inline void
317 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
319 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
320 return;
321 if (cbuf->chunk_cnt)
323 if (cbuf->chunk_cnt < 0)
324 return;
325 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
327 cbuf->chunk_cnt = -1;
328 return;
330 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
332 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
333 cbuf->use_cnt++;
334 return;
336 /* If the last chunk is only used by one mapping, discard it,
337 as it will be one host to device copy anyway and
338 memcpying it around will only waste cycles. */
339 if (cbuf->use_cnt == 1)
340 cbuf->chunk_cnt--;
342 cbuf->chunks[cbuf->chunk_cnt].start = start;
343 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
344 cbuf->chunk_cnt++;
345 cbuf->use_cnt = 1;
348 /* Return true for mapping kinds which need to copy data from the
349 host to device for regions that weren't previously mapped. */
351 static inline bool
352 gomp_to_device_kind_p (int kind)
354 switch (kind)
356 case GOMP_MAP_ALLOC:
357 case GOMP_MAP_FROM:
358 case GOMP_MAP_FORCE_ALLOC:
359 case GOMP_MAP_FORCE_FROM:
360 case GOMP_MAP_ALWAYS_FROM:
361 return false;
362 default:
363 return true;
367 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
368 non-NULL), when the source data is stack or may otherwise be deallocated
369 before the asynchronous copy takes place, EPHEMERAL must be passed as
370 TRUE. */
372 attribute_hidden void
373 gomp_copy_host2dev (struct gomp_device_descr *devicep,
374 struct goacc_asyncqueue *aq,
375 void *d, const void *h, size_t sz,
376 bool ephemeral, struct gomp_coalesce_buf *cbuf)
378 if (cbuf)
380 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
381 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
383 long first = 0;
384 long last = cbuf->chunk_cnt - 1;
385 while (first <= last)
387 long middle = (first + last) >> 1;
388 if (cbuf->chunks[middle].end <= doff)
389 first = middle + 1;
390 else if (cbuf->chunks[middle].start <= doff)
392 if (doff + sz > cbuf->chunks[middle].end)
394 gomp_mutex_unlock (&devicep->lock);
395 gomp_fatal ("internal libgomp cbuf error");
398 /* In an asynchronous context, verify that CBUF isn't used
399 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
400 if (__builtin_expect (aq != NULL, 0))
401 assert (ephemeral);
403 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
404 h, sz);
405 return;
407 else
408 last = middle - 1;
413 if (__builtin_expect (aq != NULL, 0))
415 void *h_buf = (void *) h;
416 if (ephemeral)
418 /* We're queueing up an asynchronous copy from data that may
419 disappear before the transfer takes place (i.e. because it is a
420 stack local in a function that is no longer executing). As we've
421 not been able to use CBUF, make a copy of the data into a
422 temporary buffer. */
423 h_buf = gomp_malloc (sz);
424 memcpy (h_buf, h, sz);
426 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
427 "dev", d, "host", h_buf, h, sz, aq);
428 if (ephemeral)
429 /* Free once the transfer has completed. */
430 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
432 else
433 gomp_device_copy (devicep, devicep->host2dev_func,
434 "dev", d, "host", h, sz);
437 attribute_hidden void
438 gomp_copy_dev2host (struct gomp_device_descr *devicep,
439 struct goacc_asyncqueue *aq,
440 void *h, const void *d, size_t sz)
442 if (__builtin_expect (aq != NULL, 0))
443 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
444 "host", h, "dev", d, NULL, sz, aq);
445 else
446 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
449 static void
450 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
452 if (!devicep->free_func (devicep->target_id, devptr))
454 gomp_mutex_unlock (&devicep->lock);
455 gomp_fatal ("error in freeing device memory block at %p", devptr);
459 /* Increment reference count of a splay_tree_key region K by 1.
460 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
461 increment the value if refcount is not yet contained in the set (used for
462 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
463 once for each construct). */
465 static inline void
466 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
468 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
469 return;
471 uintptr_t *refcount_ptr = &k->refcount;
473 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
474 refcount_ptr = &k->structelem_refcount;
475 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
476 refcount_ptr = k->structelem_refcount_ptr;
478 if (refcount_set)
480 if (htab_find (*refcount_set, refcount_ptr))
481 return;
482 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
483 *slot = refcount_ptr;
486 *refcount_ptr += 1;
487 return;
490 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
491 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
492 track already seen refcounts, and only adjust the value if refcount is not
493 yet contained in the set (like gomp_increment_refcount).
495 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
496 it is already zero and we know we decremented it earlier. This signals that
497 associated maps should be copied back to host.
499 *DO_REMOVE is set to true when we this is the first handling of this refcount
500 and we are setting it to zero. This signals a removal of this key from the
501 splay-tree map.
503 Copy and removal are separated due to cases like handling of structure
504 elements, e.g. each map of a structure element representing a possible copy
505 out of a structure field has to be handled individually, but we only signal
506 removal for one (the first encountered) sibing map. */
508 static inline void
509 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
510 bool *do_copy, bool *do_remove)
512 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
514 *do_copy = *do_remove = false;
515 return;
518 uintptr_t *refcount_ptr = &k->refcount;
520 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
521 refcount_ptr = &k->structelem_refcount;
522 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
523 refcount_ptr = k->structelem_refcount_ptr;
525 bool new_encountered_refcount;
526 bool set_to_zero = false;
527 bool is_zero = false;
529 uintptr_t orig_refcount = *refcount_ptr;
531 if (refcount_set)
533 if (htab_find (*refcount_set, refcount_ptr))
535 new_encountered_refcount = false;
536 goto end;
539 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
540 *slot = refcount_ptr;
541 new_encountered_refcount = true;
543 else
544 /* If no refcount_set being used, assume all keys are being decremented
545 for the first time. */
546 new_encountered_refcount = true;
548 if (delete_p)
549 *refcount_ptr = 0;
550 else if (*refcount_ptr > 0)
551 *refcount_ptr -= 1;
553 end:
554 if (*refcount_ptr == 0)
556 if (orig_refcount > 0)
557 set_to_zero = true;
559 is_zero = true;
562 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
563 *do_remove = (new_encountered_refcount && set_to_zero);
566 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
567 gomp_map_0len_lookup found oldn for newn.
568 Helper function of gomp_map_vars. */
570 static inline void
571 gomp_map_vars_existing (struct gomp_device_descr *devicep,
572 struct goacc_asyncqueue *aq, splay_tree_key oldn,
573 splay_tree_key newn, struct target_var_desc *tgt_var,
574 unsigned char kind, bool always_to_flag, bool implicit,
575 struct gomp_coalesce_buf *cbuf,
576 htab_t *refcount_set)
578 assert (kind != GOMP_MAP_ATTACH
579 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
581 tgt_var->key = oldn;
582 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
583 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
584 tgt_var->is_attach = false;
585 tgt_var->offset = newn->host_start - oldn->host_start;
587 /* For implicit maps, old contained in new is valid. */
588 bool implicit_subset = (implicit
589 && newn->host_start <= oldn->host_start
590 && oldn->host_end <= newn->host_end);
591 if (implicit_subset)
592 tgt_var->length = oldn->host_end - oldn->host_start;
593 else
594 tgt_var->length = newn->host_end - newn->host_start;
596 if ((kind & GOMP_MAP_FLAG_FORCE)
597 /* For implicit maps, old contained in new is valid. */
598 || !(implicit_subset
599 /* Otherwise, new contained inside old is considered valid. */
600 || (oldn->host_start <= newn->host_start
601 && newn->host_end <= oldn->host_end)))
603 gomp_mutex_unlock (&devicep->lock);
604 gomp_fatal ("Trying to map into device [%p..%p) object when "
605 "[%p..%p) is already mapped",
606 (void *) newn->host_start, (void *) newn->host_end,
607 (void *) oldn->host_start, (void *) oldn->host_end);
610 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
612 /* Implicit + always should not happen. If this does occur, below
613 address/length adjustment is a TODO. */
614 assert (!implicit_subset);
616 if (oldn->aux && oldn->aux->attach_count)
618 /* We have to be careful not to overwrite still attached pointers
619 during the copyback to host. */
620 uintptr_t addr = newn->host_start;
621 while (addr < newn->host_end)
623 size_t i = (addr - oldn->host_start) / sizeof (void *);
624 if (oldn->aux->attach_count[i] == 0)
625 gomp_copy_host2dev (devicep, aq,
626 (void *) (oldn->tgt->tgt_start
627 + oldn->tgt_offset
628 + addr - oldn->host_start),
629 (void *) addr,
630 sizeof (void *), false, cbuf);
631 addr += sizeof (void *);
634 else
635 gomp_copy_host2dev (devicep, aq,
636 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
637 + newn->host_start - oldn->host_start),
638 (void *) newn->host_start,
639 newn->host_end - newn->host_start, false, cbuf);
642 gomp_increment_refcount (oldn, refcount_set);
645 static int
646 get_kind (bool short_mapkind, void *kinds, int idx)
648 if (!short_mapkind)
649 return ((unsigned char *) kinds)[idx];
651 int val = ((unsigned short *) kinds)[idx];
652 if (GOMP_MAP_IMPLICIT_P (val))
653 val &= ~GOMP_MAP_IMPLICIT;
654 return val;
658 static bool
659 get_implicit (bool short_mapkind, void *kinds, int idx)
661 if (!short_mapkind)
662 return false;
664 int val = ((unsigned short *) kinds)[idx];
665 return GOMP_MAP_IMPLICIT_P (val);
668 static void
669 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
670 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
671 struct gomp_coalesce_buf *cbuf,
672 bool allow_zero_length_array_sections)
674 struct gomp_device_descr *devicep = tgt->device_descr;
675 struct splay_tree_s *mem_map = &devicep->mem_map;
676 struct splay_tree_key_s cur_node;
678 cur_node.host_start = host_ptr;
679 if (cur_node.host_start == (uintptr_t) NULL)
681 cur_node.tgt_offset = (uintptr_t) NULL;
682 gomp_copy_host2dev (devicep, aq,
683 (void *) (tgt->tgt_start + target_offset),
684 (void *) &cur_node.tgt_offset, sizeof (void *),
685 true, cbuf);
686 return;
688 /* Add bias to the pointer value. */
689 cur_node.host_start += bias;
690 cur_node.host_end = cur_node.host_start;
691 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
692 if (n == NULL)
694 if (allow_zero_length_array_sections)
695 cur_node.tgt_offset = 0;
696 else
698 gomp_mutex_unlock (&devicep->lock);
699 gomp_fatal ("Pointer target of array section wasn't mapped");
702 else
704 cur_node.host_start -= n->host_start;
705 cur_node.tgt_offset
706 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
707 /* At this point tgt_offset is target address of the
708 array section. Now subtract bias to get what we want
709 to initialize the pointer with. */
710 cur_node.tgt_offset -= bias;
712 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
713 (void *) &cur_node.tgt_offset, sizeof (void *),
714 true, cbuf);
717 static void
718 gomp_map_fields_existing (struct target_mem_desc *tgt,
719 struct goacc_asyncqueue *aq, splay_tree_key n,
720 size_t first, size_t i, void **hostaddrs,
721 size_t *sizes, void *kinds,
722 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
724 struct gomp_device_descr *devicep = tgt->device_descr;
725 struct splay_tree_s *mem_map = &devicep->mem_map;
726 struct splay_tree_key_s cur_node;
727 int kind;
728 bool implicit;
729 const bool short_mapkind = true;
730 const int typemask = short_mapkind ? 0xff : 0x7;
732 cur_node.host_start = (uintptr_t) hostaddrs[i];
733 cur_node.host_end = cur_node.host_start + sizes[i];
734 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
735 kind = get_kind (short_mapkind, kinds, i);
736 implicit = get_implicit (short_mapkind, kinds, i);
737 if (n2
738 && n2->tgt == n->tgt
739 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
741 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
742 kind & typemask, false, implicit, cbuf,
743 refcount_set);
744 return;
746 if (sizes[i] == 0)
748 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
750 cur_node.host_start--;
751 n2 = splay_tree_lookup (mem_map, &cur_node);
752 cur_node.host_start++;
753 if (n2
754 && n2->tgt == n->tgt
755 && n2->host_start - n->host_start
756 == n2->tgt_offset - n->tgt_offset)
758 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
759 kind & typemask, false, implicit, cbuf,
760 refcount_set);
761 return;
764 cur_node.host_end++;
765 n2 = splay_tree_lookup (mem_map, &cur_node);
766 cur_node.host_end--;
767 if (n2
768 && n2->tgt == n->tgt
769 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
771 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
772 kind & typemask, false, implicit, cbuf,
773 refcount_set);
774 return;
777 gomp_mutex_unlock (&devicep->lock);
778 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
779 "other mapped elements from the same structure weren't mapped "
780 "together with it", (void *) cur_node.host_start,
781 (void *) cur_node.host_end);
784 attribute_hidden void
785 gomp_attach_pointer (struct gomp_device_descr *devicep,
786 struct goacc_asyncqueue *aq, splay_tree mem_map,
787 splay_tree_key n, uintptr_t attach_to, size_t bias,
788 struct gomp_coalesce_buf *cbufp,
789 bool allow_zero_length_array_sections)
791 struct splay_tree_key_s s;
792 size_t size, idx;
794 if (n == NULL)
796 gomp_mutex_unlock (&devicep->lock);
797 gomp_fatal ("enclosing struct not mapped for attach");
800 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
801 /* We might have a pointer in a packed struct: however we cannot have more
802 than one such pointer in each pointer-sized portion of the struct, so
803 this is safe. */
804 idx = (attach_to - n->host_start) / sizeof (void *);
806 if (!n->aux)
807 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
809 if (!n->aux->attach_count)
810 n->aux->attach_count
811 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
813 if (n->aux->attach_count[idx] < UINTPTR_MAX)
814 n->aux->attach_count[idx]++;
815 else
817 gomp_mutex_unlock (&devicep->lock);
818 gomp_fatal ("attach count overflow");
821 if (n->aux->attach_count[idx] == 1)
823 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
824 - n->host_start;
825 uintptr_t target = (uintptr_t) *(void **) attach_to;
826 splay_tree_key tn;
827 uintptr_t data;
829 if ((void *) target == NULL)
831 gomp_mutex_unlock (&devicep->lock);
832 gomp_fatal ("attempt to attach null pointer");
835 s.host_start = target + bias;
836 s.host_end = s.host_start + 1;
837 tn = splay_tree_lookup (mem_map, &s);
839 if (!tn)
841 if (allow_zero_length_array_sections)
842 /* When allowing attachment to zero-length array sections, we
843 allow attaching to NULL pointers when the target region is not
844 mapped. */
845 data = 0;
846 else
848 gomp_mutex_unlock (&devicep->lock);
849 gomp_fatal ("pointer target not mapped for attach");
852 else
853 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
855 gomp_debug (1,
856 "%s: attaching host %p, target %p (struct base %p) to %p\n",
857 __FUNCTION__, (void *) attach_to, (void *) devptr,
858 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
860 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
861 sizeof (void *), true, cbufp);
863 else
864 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
865 (void *) attach_to, (int) n->aux->attach_count[idx]);
868 attribute_hidden void
869 gomp_detach_pointer (struct gomp_device_descr *devicep,
870 struct goacc_asyncqueue *aq, splay_tree_key n,
871 uintptr_t detach_from, bool finalize,
872 struct gomp_coalesce_buf *cbufp)
874 size_t idx;
876 if (n == NULL)
878 gomp_mutex_unlock (&devicep->lock);
879 gomp_fatal ("enclosing struct not mapped for detach");
882 idx = (detach_from - n->host_start) / sizeof (void *);
884 if (!n->aux || !n->aux->attach_count)
886 gomp_mutex_unlock (&devicep->lock);
887 gomp_fatal ("no attachment counters for struct");
890 if (finalize)
891 n->aux->attach_count[idx] = 1;
893 if (n->aux->attach_count[idx] == 0)
895 gomp_mutex_unlock (&devicep->lock);
896 gomp_fatal ("attach count underflow");
898 else
899 n->aux->attach_count[idx]--;
901 if (n->aux->attach_count[idx] == 0)
903 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
904 - n->host_start;
905 uintptr_t target = (uintptr_t) *(void **) detach_from;
907 gomp_debug (1,
908 "%s: detaching host %p, target %p (struct base %p) to %p\n",
909 __FUNCTION__, (void *) detach_from, (void *) devptr,
910 (void *) (n->tgt->tgt_start + n->tgt_offset),
911 (void *) target);
913 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
914 sizeof (void *), true, cbufp);
916 else
917 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
918 (void *) detach_from, (int) n->aux->attach_count[idx]);
921 attribute_hidden uintptr_t
922 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
924 if (tgt->list[i].key != NULL)
925 return tgt->list[i].key->tgt->tgt_start
926 + tgt->list[i].key->tgt_offset
927 + tgt->list[i].offset;
929 switch (tgt->list[i].offset)
931 case OFFSET_INLINED:
932 return (uintptr_t) hostaddrs[i];
934 case OFFSET_POINTER:
935 return 0;
937 case OFFSET_STRUCT:
938 return tgt->list[i + 1].key->tgt->tgt_start
939 + tgt->list[i + 1].key->tgt_offset
940 + tgt->list[i + 1].offset
941 + (uintptr_t) hostaddrs[i]
942 - (uintptr_t) hostaddrs[i + 1];
944 default:
945 return tgt->tgt_start + tgt->list[i].offset;
949 static inline __attribute__((always_inline)) struct target_mem_desc *
950 gomp_map_vars_internal (struct gomp_device_descr *devicep,
951 struct goacc_asyncqueue *aq, size_t mapnum,
952 void **hostaddrs, void **devaddrs, size_t *sizes,
953 void *kinds, bool short_mapkind,
954 htab_t *refcount_set,
955 enum gomp_map_vars_kind pragma_kind)
957 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
958 bool has_firstprivate = false;
959 bool has_always_ptrset = false;
960 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
961 const int rshift = short_mapkind ? 8 : 3;
962 const int typemask = short_mapkind ? 0xff : 0x7;
963 struct splay_tree_s *mem_map = &devicep->mem_map;
964 struct splay_tree_key_s cur_node;
965 struct target_mem_desc *tgt
966 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
967 tgt->list_count = mapnum;
968 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
969 tgt->device_descr = devicep;
970 tgt->prev = NULL;
971 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
973 if (mapnum == 0)
975 tgt->tgt_start = 0;
976 tgt->tgt_end = 0;
977 return tgt;
980 tgt_align = sizeof (void *);
981 tgt_size = 0;
982 cbuf.chunks = NULL;
983 cbuf.chunk_cnt = -1;
984 cbuf.use_cnt = 0;
985 cbuf.buf = NULL;
986 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
988 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
989 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
990 cbuf.chunk_cnt = 0;
992 if (pragma_kind & GOMP_MAP_VARS_TARGET)
994 size_t align = 4 * sizeof (void *);
995 tgt_align = align;
996 tgt_size = mapnum * sizeof (void *);
997 cbuf.chunk_cnt = 1;
998 cbuf.use_cnt = 1 + (mapnum > 1);
999 cbuf.chunks[0].start = 0;
1000 cbuf.chunks[0].end = tgt_size;
1003 gomp_mutex_lock (&devicep->lock);
1004 if (devicep->state == GOMP_DEVICE_FINALIZED)
1006 gomp_mutex_unlock (&devicep->lock);
1007 free (tgt);
1008 return NULL;
1011 for (i = 0; i < mapnum; i++)
1013 int kind = get_kind (short_mapkind, kinds, i);
1014 bool implicit = get_implicit (short_mapkind, kinds, i);
1015 if (hostaddrs[i] == NULL
1016 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1018 tgt->list[i].key = NULL;
1019 tgt->list[i].offset = OFFSET_INLINED;
1020 continue;
1022 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1023 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1025 tgt->list[i].key = NULL;
1026 if (!not_found_cnt)
1028 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1029 on a separate construct prior to using use_device_{addr,ptr}.
1030 In OpenMP 5.0, map directives need to be ordered by the
1031 middle-end before the use_device_* clauses. If
1032 !not_found_cnt, all mappings requested (if any) are already
1033 mapped, so use_device_{addr,ptr} can be resolved right away.
1034 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1035 now but would succeed after performing the mappings in the
1036 following loop. We can't defer this always to the second
1037 loop, because it is not even invoked when !not_found_cnt
1038 after the first loop. */
1039 cur_node.host_start = (uintptr_t) hostaddrs[i];
1040 cur_node.host_end = cur_node.host_start;
1041 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1042 if (n != NULL)
1044 cur_node.host_start -= n->host_start;
1045 hostaddrs[i]
1046 = (void *) (n->tgt->tgt_start + n->tgt_offset
1047 + cur_node.host_start);
1049 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1051 gomp_mutex_unlock (&devicep->lock);
1052 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1054 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1055 /* If not present, continue using the host address. */
1057 else
1058 __builtin_unreachable ();
1059 tgt->list[i].offset = OFFSET_INLINED;
1061 else
1062 tgt->list[i].offset = 0;
1063 continue;
1065 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1067 size_t first = i + 1;
1068 size_t last = i + sizes[i];
1069 cur_node.host_start = (uintptr_t) hostaddrs[i];
1070 cur_node.host_end = (uintptr_t) hostaddrs[last]
1071 + sizes[last];
1072 tgt->list[i].key = NULL;
1073 tgt->list[i].offset = OFFSET_STRUCT;
1074 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1075 if (n == NULL)
1077 size_t align = (size_t) 1 << (kind >> rshift);
1078 if (tgt_align < align)
1079 tgt_align = align;
1080 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1081 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1082 tgt_size += cur_node.host_end - cur_node.host_start;
1083 not_found_cnt += last - i;
1084 for (i = first; i <= last; i++)
1086 tgt->list[i].key = NULL;
1087 if (!aq
1088 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1089 & typemask))
1090 gomp_coalesce_buf_add (&cbuf,
1091 tgt_size - cur_node.host_end
1092 + (uintptr_t) hostaddrs[i],
1093 sizes[i]);
1095 i--;
1096 continue;
1098 for (i = first; i <= last; i++)
1099 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1100 sizes, kinds, NULL, refcount_set);
1101 i--;
1102 continue;
1104 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1106 tgt->list[i].key = NULL;
1107 tgt->list[i].offset = OFFSET_POINTER;
1108 has_firstprivate = true;
1109 continue;
1111 else if ((kind & typemask) == GOMP_MAP_ATTACH
1112 || ((kind & typemask)
1113 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1115 tgt->list[i].key = NULL;
1116 has_firstprivate = true;
1117 continue;
1119 cur_node.host_start = (uintptr_t) hostaddrs[i];
1120 if (!GOMP_MAP_POINTER_P (kind & typemask))
1121 cur_node.host_end = cur_node.host_start + sizes[i];
1122 else
1123 cur_node.host_end = cur_node.host_start + sizeof (void *);
1124 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1126 tgt->list[i].key = NULL;
1128 size_t align = (size_t) 1 << (kind >> rshift);
1129 if (tgt_align < align)
1130 tgt_align = align;
1131 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1132 if (!aq)
1133 gomp_coalesce_buf_add (&cbuf, tgt_size,
1134 cur_node.host_end - cur_node.host_start);
1135 tgt_size += cur_node.host_end - cur_node.host_start;
1136 has_firstprivate = true;
1137 continue;
1139 splay_tree_key n;
1140 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1142 n = gomp_map_0len_lookup (mem_map, &cur_node);
1143 if (!n)
1145 tgt->list[i].key = NULL;
1146 tgt->list[i].offset = OFFSET_POINTER;
1147 continue;
1150 else
1151 n = splay_tree_lookup (mem_map, &cur_node);
1152 if (n && n->refcount != REFCOUNT_LINK)
1154 int always_to_cnt = 0;
1155 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1157 bool has_nullptr = false;
1158 size_t j;
1159 for (j = 0; j < n->tgt->list_count; j++)
1160 if (n->tgt->list[j].key == n)
1162 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1163 break;
1165 if (n->tgt->list_count == 0)
1167 /* 'declare target'; assume has_nullptr; it could also be
1168 statically assigned pointer, but that it should be to
1169 the equivalent variable on the host. */
1170 assert (n->refcount == REFCOUNT_INFINITY);
1171 has_nullptr = true;
1173 else
1174 assert (j < n->tgt->list_count);
1175 /* Re-map the data if there is an 'always' modifier or if it a
1176 null pointer was there and non a nonnull has been found; that
1177 permits transparent re-mapping for Fortran array descriptors
1178 which were previously mapped unallocated. */
1179 for (j = i + 1; j < mapnum; j++)
1181 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1182 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1183 && (!has_nullptr
1184 || !GOMP_MAP_POINTER_P (ptr_kind)
1185 || *(void **) hostaddrs[j] == NULL))
1186 break;
1187 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1188 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1189 > cur_node.host_end))
1190 break;
1191 else
1193 has_always_ptrset = true;
1194 ++always_to_cnt;
1198 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1199 kind & typemask, always_to_cnt > 0, implicit,
1200 NULL, refcount_set);
1201 i += always_to_cnt;
1203 else
1205 tgt->list[i].key = NULL;
1207 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1209 /* Not present, hence, skip entry - including its MAP_POINTER,
1210 when existing. */
1211 tgt->list[i].offset = OFFSET_INLINED;
1212 if (i + 1 < mapnum
1213 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1214 == GOMP_MAP_POINTER))
1216 ++i;
1217 tgt->list[i].key = NULL;
1218 tgt->list[i].offset = 0;
1220 continue;
1222 size_t align = (size_t) 1 << (kind >> rshift);
1223 not_found_cnt++;
1224 if (tgt_align < align)
1225 tgt_align = align;
1226 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1227 if (!aq
1228 && gomp_to_device_kind_p (kind & typemask))
1229 gomp_coalesce_buf_add (&cbuf, tgt_size,
1230 cur_node.host_end - cur_node.host_start);
1231 tgt_size += cur_node.host_end - cur_node.host_start;
1232 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1234 size_t j;
1235 int kind;
1236 for (j = i + 1; j < mapnum; j++)
1237 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1238 kinds, j)) & typemask))
1239 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1240 break;
1241 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1242 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1243 > cur_node.host_end))
1244 break;
1245 else
1247 tgt->list[j].key = NULL;
1248 i++;
1254 if (devaddrs)
1256 if (mapnum != 1)
1258 gomp_mutex_unlock (&devicep->lock);
1259 gomp_fatal ("unexpected aggregation");
1261 tgt->to_free = devaddrs[0];
1262 tgt->tgt_start = (uintptr_t) tgt->to_free;
1263 tgt->tgt_end = tgt->tgt_start + sizes[0];
1265 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1267 /* Allocate tgt_align aligned tgt_size block of memory. */
1268 /* FIXME: Perhaps change interface to allocate properly aligned
1269 memory. */
1270 tgt->to_free = devicep->alloc_func (devicep->target_id,
1271 tgt_size + tgt_align - 1);
1272 if (!tgt->to_free)
1274 gomp_mutex_unlock (&devicep->lock);
1275 gomp_fatal ("device memory allocation fail");
1278 tgt->tgt_start = (uintptr_t) tgt->to_free;
1279 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1280 tgt->tgt_end = tgt->tgt_start + tgt_size;
1282 if (cbuf.use_cnt == 1)
1283 cbuf.chunk_cnt--;
1284 if (cbuf.chunk_cnt > 0)
1286 cbuf.buf
1287 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1288 if (cbuf.buf)
1290 cbuf.tgt = tgt;
1291 cbufp = &cbuf;
1295 else
1297 tgt->to_free = NULL;
1298 tgt->tgt_start = 0;
1299 tgt->tgt_end = 0;
1302 tgt_size = 0;
1303 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1304 tgt_size = mapnum * sizeof (void *);
1306 tgt->array = NULL;
1307 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1309 if (not_found_cnt)
1310 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1311 splay_tree_node array = tgt->array;
1312 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1313 uintptr_t field_tgt_base = 0;
1314 splay_tree_key field_tgt_structelem_first = NULL;
1316 for (i = 0; i < mapnum; i++)
1317 if (has_always_ptrset
1318 && tgt->list[i].key
1319 && (get_kind (short_mapkind, kinds, i) & typemask)
1320 == GOMP_MAP_TO_PSET)
1322 splay_tree_key k = tgt->list[i].key;
1323 bool has_nullptr = false;
1324 size_t j;
1325 for (j = 0; j < k->tgt->list_count; j++)
1326 if (k->tgt->list[j].key == k)
1328 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1329 break;
1331 if (k->tgt->list_count == 0)
1332 has_nullptr = true;
1333 else
1334 assert (j < k->tgt->list_count);
1336 tgt->list[i].has_null_ptr_assoc = false;
1337 for (j = i + 1; j < mapnum; j++)
1339 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1340 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1341 && (!has_nullptr
1342 || !GOMP_MAP_POINTER_P (ptr_kind)
1343 || *(void **) hostaddrs[j] == NULL))
1344 break;
1345 else if ((uintptr_t) hostaddrs[j] < k->host_start
1346 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1347 > k->host_end))
1348 break;
1349 else
1351 if (*(void **) hostaddrs[j] == NULL)
1352 tgt->list[i].has_null_ptr_assoc = true;
1353 tgt->list[j].key = k;
1354 tgt->list[j].copy_from = false;
1355 tgt->list[j].always_copy_from = false;
1356 tgt->list[j].is_attach = false;
1357 gomp_increment_refcount (k, refcount_set);
1358 gomp_map_pointer (k->tgt, aq,
1359 (uintptr_t) *(void **) hostaddrs[j],
1360 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1361 - k->host_start),
1362 sizes[j], cbufp, false);
1365 i = j - 1;
1367 else if (tgt->list[i].key == NULL)
1369 int kind = get_kind (short_mapkind, kinds, i);
1370 bool implicit = get_implicit (short_mapkind, kinds, i);
1371 if (hostaddrs[i] == NULL)
1372 continue;
1373 switch (kind & typemask)
1375 size_t align, len, first, last;
1376 splay_tree_key n;
1377 case GOMP_MAP_FIRSTPRIVATE:
1378 align = (size_t) 1 << (kind >> rshift);
1379 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1380 tgt->list[i].offset = tgt_size;
1381 len = sizes[i];
1382 gomp_copy_host2dev (devicep, aq,
1383 (void *) (tgt->tgt_start + tgt_size),
1384 (void *) hostaddrs[i], len, false, cbufp);
1385 /* Save device address in hostaddr to permit latter availablity
1386 when doing a deep-firstprivate with pointer attach. */
1387 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1388 tgt_size += len;
1390 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1391 firstprivate to hostaddrs[i+1], which is assumed to contain a
1392 device address. */
1393 if (i + 1 < mapnum
1394 && (GOMP_MAP_ATTACH
1395 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1397 uintptr_t target = (uintptr_t) hostaddrs[i];
1398 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1399 /* Per
1400 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1401 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1402 this probably needs revision for 'aq' usage. */
1403 assert (!aq);
1404 gomp_copy_host2dev (devicep, aq, devptr, &target,
1405 sizeof (void *), false, cbufp);
1406 ++i;
1408 continue;
1409 case GOMP_MAP_FIRSTPRIVATE_INT:
1410 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1411 continue;
1412 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1413 /* The OpenACC 'host_data' construct only allows 'use_device'
1414 "mapping" clauses, so in the first loop, 'not_found_cnt'
1415 must always have been zero, so all OpenACC 'use_device'
1416 clauses have already been handled. (We can only easily test
1417 'use_device' with 'if_present' clause here.) */
1418 assert (tgt->list[i].offset == OFFSET_INLINED);
1419 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1420 code conceptually simple, similar to the first loop. */
1421 case GOMP_MAP_USE_DEVICE_PTR:
1422 if (tgt->list[i].offset == 0)
1424 cur_node.host_start = (uintptr_t) hostaddrs[i];
1425 cur_node.host_end = cur_node.host_start;
1426 n = gomp_map_lookup (mem_map, &cur_node);
1427 if (n != NULL)
1429 cur_node.host_start -= n->host_start;
1430 hostaddrs[i]
1431 = (void *) (n->tgt->tgt_start + n->tgt_offset
1432 + cur_node.host_start);
1434 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1436 gomp_mutex_unlock (&devicep->lock);
1437 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1439 else if ((kind & typemask)
1440 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1441 /* If not present, continue using the host address. */
1443 else
1444 __builtin_unreachable ();
1445 tgt->list[i].offset = OFFSET_INLINED;
1447 continue;
1448 case GOMP_MAP_STRUCT:
1449 first = i + 1;
1450 last = i + sizes[i];
1451 cur_node.host_start = (uintptr_t) hostaddrs[i];
1452 cur_node.host_end = (uintptr_t) hostaddrs[last]
1453 + sizes[last];
1454 if (tgt->list[first].key != NULL)
1455 continue;
1456 n = splay_tree_lookup (mem_map, &cur_node);
1457 if (n == NULL)
1459 size_t align = (size_t) 1 << (kind >> rshift);
1460 tgt_size -= (uintptr_t) hostaddrs[first]
1461 - (uintptr_t) hostaddrs[i];
1462 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1463 tgt_size += (uintptr_t) hostaddrs[first]
1464 - (uintptr_t) hostaddrs[i];
1465 field_tgt_base = (uintptr_t) hostaddrs[first];
1466 field_tgt_offset = tgt_size;
1467 field_tgt_clear = last;
1468 field_tgt_structelem_first = NULL;
1469 tgt_size += cur_node.host_end
1470 - (uintptr_t) hostaddrs[first];
1471 continue;
1473 for (i = first; i <= last; i++)
1474 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1475 sizes, kinds, cbufp, refcount_set);
1476 i--;
1477 continue;
1478 case GOMP_MAP_ALWAYS_POINTER:
1479 cur_node.host_start = (uintptr_t) hostaddrs[i];
1480 cur_node.host_end = cur_node.host_start + sizeof (void *);
1481 n = splay_tree_lookup (mem_map, &cur_node);
1482 if (n == NULL
1483 || n->host_start > cur_node.host_start
1484 || n->host_end < cur_node.host_end)
1486 gomp_mutex_unlock (&devicep->lock);
1487 gomp_fatal ("always pointer not mapped");
1489 if (i > 0
1490 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1491 != GOMP_MAP_ALWAYS_POINTER))
1492 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1493 if (cur_node.tgt_offset)
1494 cur_node.tgt_offset -= sizes[i];
1495 gomp_copy_host2dev (devicep, aq,
1496 (void *) (n->tgt->tgt_start
1497 + n->tgt_offset
1498 + cur_node.host_start
1499 - n->host_start),
1500 (void *) &cur_node.tgt_offset,
1501 sizeof (void *), true, cbufp);
1502 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1503 + cur_node.host_start - n->host_start;
1504 continue;
1505 case GOMP_MAP_IF_PRESENT:
1506 /* Not present - otherwise handled above. Skip over its
1507 MAP_POINTER as well. */
1508 if (i + 1 < mapnum
1509 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1510 == GOMP_MAP_POINTER))
1511 ++i;
1512 continue;
1513 case GOMP_MAP_ATTACH:
1514 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1516 cur_node.host_start = (uintptr_t) hostaddrs[i];
1517 cur_node.host_end = cur_node.host_start + sizeof (void *);
1518 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1519 if (n != NULL)
1521 tgt->list[i].key = n;
1522 tgt->list[i].offset = cur_node.host_start - n->host_start;
1523 tgt->list[i].length = n->host_end - n->host_start;
1524 tgt->list[i].copy_from = false;
1525 tgt->list[i].always_copy_from = false;
1526 tgt->list[i].is_attach = true;
1527 /* OpenACC 'attach'/'detach' doesn't affect
1528 structured/dynamic reference counts ('n->refcount',
1529 'n->dynamic_refcount'). */
1531 bool zlas
1532 = ((kind & typemask)
1533 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1534 gomp_attach_pointer (devicep, aq, mem_map, n,
1535 (uintptr_t) hostaddrs[i], sizes[i],
1536 cbufp, zlas);
1538 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1540 gomp_mutex_unlock (&devicep->lock);
1541 gomp_fatal ("outer struct not mapped for attach");
1543 continue;
1545 default:
1546 break;
1548 splay_tree_key k = &array->key;
1549 k->host_start = (uintptr_t) hostaddrs[i];
1550 if (!GOMP_MAP_POINTER_P (kind & typemask))
1551 k->host_end = k->host_start + sizes[i];
1552 else
1553 k->host_end = k->host_start + sizeof (void *);
1554 splay_tree_key n = splay_tree_lookup (mem_map, k);
1555 if (n && n->refcount != REFCOUNT_LINK)
1556 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1557 kind & typemask, false, implicit, cbufp,
1558 refcount_set);
1559 else
1561 k->aux = NULL;
1562 if (n && n->refcount == REFCOUNT_LINK)
1564 /* Replace target address of the pointer with target address
1565 of mapped object in the splay tree. */
1566 splay_tree_remove (mem_map, n);
1567 k->aux
1568 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1569 k->aux->link_key = n;
1571 size_t align = (size_t) 1 << (kind >> rshift);
1572 tgt->list[i].key = k;
1573 k->tgt = tgt;
1574 k->refcount = 0;
1575 k->dynamic_refcount = 0;
1576 if (field_tgt_clear != FIELD_TGT_EMPTY)
1578 k->tgt_offset = k->host_start - field_tgt_base
1579 + field_tgt_offset;
1580 if (openmp_p)
1582 k->refcount = REFCOUNT_STRUCTELEM;
1583 if (field_tgt_structelem_first == NULL)
1585 /* Set to first structure element of sequence. */
1586 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1587 field_tgt_structelem_first = k;
1589 else
1590 /* Point to refcount of leading element, but do not
1591 increment again. */
1592 k->structelem_refcount_ptr
1593 = &field_tgt_structelem_first->structelem_refcount;
1595 if (i == field_tgt_clear)
1597 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1598 field_tgt_structelem_first = NULL;
1601 if (i == field_tgt_clear)
1602 field_tgt_clear = FIELD_TGT_EMPTY;
1604 else
1606 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1607 k->tgt_offset = tgt_size;
1608 tgt_size += k->host_end - k->host_start;
1610 /* First increment, from 0 to 1. gomp_increment_refcount
1611 encapsulates the different increment cases, so use this
1612 instead of directly setting 1 during initialization. */
1613 gomp_increment_refcount (k, refcount_set);
1615 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1616 tgt->list[i].always_copy_from
1617 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1618 tgt->list[i].is_attach = false;
1619 tgt->list[i].offset = 0;
1620 tgt->list[i].length = k->host_end - k->host_start;
1621 tgt->refcount++;
1622 array->left = NULL;
1623 array->right = NULL;
1624 splay_tree_insert (mem_map, array);
1625 switch (kind & typemask)
1627 case GOMP_MAP_ALLOC:
1628 case GOMP_MAP_FROM:
1629 case GOMP_MAP_FORCE_ALLOC:
1630 case GOMP_MAP_FORCE_FROM:
1631 case GOMP_MAP_ALWAYS_FROM:
1632 break;
1633 case GOMP_MAP_TO:
1634 case GOMP_MAP_TOFROM:
1635 case GOMP_MAP_FORCE_TO:
1636 case GOMP_MAP_FORCE_TOFROM:
1637 case GOMP_MAP_ALWAYS_TO:
1638 case GOMP_MAP_ALWAYS_TOFROM:
1639 gomp_copy_host2dev (devicep, aq,
1640 (void *) (tgt->tgt_start
1641 + k->tgt_offset),
1642 (void *) k->host_start,
1643 k->host_end - k->host_start,
1644 false, cbufp);
1645 break;
1646 case GOMP_MAP_POINTER:
1647 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1648 gomp_map_pointer
1649 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1650 k->tgt_offset, sizes[i], cbufp,
1651 ((kind & typemask)
1652 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1653 break;
1654 case GOMP_MAP_TO_PSET:
1655 gomp_copy_host2dev (devicep, aq,
1656 (void *) (tgt->tgt_start
1657 + k->tgt_offset),
1658 (void *) k->host_start,
1659 k->host_end - k->host_start,
1660 false, cbufp);
1661 tgt->list[i].has_null_ptr_assoc = false;
1663 for (j = i + 1; j < mapnum; j++)
1665 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1666 & typemask);
1667 if (!GOMP_MAP_POINTER_P (ptr_kind)
1668 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1669 break;
1670 else if ((uintptr_t) hostaddrs[j] < k->host_start
1671 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1672 > k->host_end))
1673 break;
1674 else
1676 tgt->list[j].key = k;
1677 tgt->list[j].copy_from = false;
1678 tgt->list[j].always_copy_from = false;
1679 tgt->list[j].is_attach = false;
1680 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1681 /* For OpenMP, the use of refcount_sets causes
1682 errors if we set k->refcount = 1 above but also
1683 increment it again here, for decrementing will
1684 not properly match, since we decrement only once
1685 for each key's refcount. Therefore avoid this
1686 increment for OpenMP constructs. */
1687 if (!openmp_p)
1688 gomp_increment_refcount (k, refcount_set);
1689 gomp_map_pointer (tgt, aq,
1690 (uintptr_t) *(void **) hostaddrs[j],
1691 k->tgt_offset
1692 + ((uintptr_t) hostaddrs[j]
1693 - k->host_start),
1694 sizes[j], cbufp, false);
1697 i = j - 1;
1698 break;
1699 case GOMP_MAP_FORCE_PRESENT:
1701 /* We already looked up the memory region above and it
1702 was missing. */
1703 size_t size = k->host_end - k->host_start;
1704 gomp_mutex_unlock (&devicep->lock);
1705 #ifdef HAVE_INTTYPES_H
1706 gomp_fatal ("present clause: !acc_is_present (%p, "
1707 "%"PRIu64" (0x%"PRIx64"))",
1708 (void *) k->host_start,
1709 (uint64_t) size, (uint64_t) size);
1710 #else
1711 gomp_fatal ("present clause: !acc_is_present (%p, "
1712 "%lu (0x%lx))", (void *) k->host_start,
1713 (unsigned long) size, (unsigned long) size);
1714 #endif
1716 break;
1717 case GOMP_MAP_FORCE_DEVICEPTR:
1718 assert (k->host_end - k->host_start == sizeof (void *));
1719 gomp_copy_host2dev (devicep, aq,
1720 (void *) (tgt->tgt_start
1721 + k->tgt_offset),
1722 (void *) k->host_start,
1723 sizeof (void *), false, cbufp);
1724 break;
1725 default:
1726 gomp_mutex_unlock (&devicep->lock);
1727 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1728 kind);
1731 if (k->aux && k->aux->link_key)
1733 /* Set link pointer on target to the device address of the
1734 mapped object. */
1735 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1736 /* We intentionally do not use coalescing here, as it's not
1737 data allocated by the current call to this function. */
1738 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1739 &tgt_addr, sizeof (void *), true, NULL);
1741 array++;
1746 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1748 for (i = 0; i < mapnum; i++)
1750 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1751 gomp_copy_host2dev (devicep, aq,
1752 (void *) (tgt->tgt_start + i * sizeof (void *)),
1753 (void *) &cur_node.tgt_offset, sizeof (void *),
1754 true, cbufp);
1758 if (cbufp)
1760 long c = 0;
1761 for (c = 0; c < cbuf.chunk_cnt; ++c)
1762 gomp_copy_host2dev (devicep, aq,
1763 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1764 (char *) cbuf.buf + (cbuf.chunks[c].start
1765 - cbuf.chunks[0].start),
1766 cbuf.chunks[c].end - cbuf.chunks[c].start,
1767 false, NULL);
1768 if (aq)
1769 /* Free once the transfer has completed. */
1770 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1771 else
1772 free (cbuf.buf);
1773 cbuf.buf = NULL;
1774 cbufp = NULL;
1777 /* If the variable from "omp target enter data" map-list was already mapped,
1778 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1779 gomp_exit_data. */
1780 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1782 free (tgt);
1783 tgt = NULL;
1786 gomp_mutex_unlock (&devicep->lock);
1787 return tgt;
1790 static struct target_mem_desc *
1791 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1792 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1793 bool short_mapkind, htab_t *refcount_set,
1794 enum gomp_map_vars_kind pragma_kind)
1796 /* This management of a local refcount_set is for convenience of callers
1797 who do not share a refcount_set over multiple map/unmap uses. */
1798 htab_t local_refcount_set = NULL;
1799 if (refcount_set == NULL)
1801 local_refcount_set = htab_create (mapnum);
1802 refcount_set = &local_refcount_set;
1805 struct target_mem_desc *tgt;
1806 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1807 sizes, kinds, short_mapkind, refcount_set,
1808 pragma_kind);
1809 if (local_refcount_set)
1810 htab_free (local_refcount_set);
1812 return tgt;
1815 attribute_hidden struct target_mem_desc *
1816 goacc_map_vars (struct gomp_device_descr *devicep,
1817 struct goacc_asyncqueue *aq, size_t mapnum,
1818 void **hostaddrs, void **devaddrs, size_t *sizes,
1819 void *kinds, bool short_mapkind,
1820 enum gomp_map_vars_kind pragma_kind)
1822 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1823 sizes, kinds, short_mapkind, NULL,
1824 GOMP_MAP_VARS_OPENACC | pragma_kind);
1827 static void
1828 gomp_unmap_tgt (struct target_mem_desc *tgt)
1830 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1831 if (tgt->tgt_end)
1832 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1834 free (tgt->array);
1835 free (tgt);
1838 static bool
1839 gomp_unref_tgt (void *ptr)
1841 bool is_tgt_unmapped = false;
1843 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1845 if (tgt->refcount > 1)
1846 tgt->refcount--;
1847 else
1849 gomp_unmap_tgt (tgt);
1850 is_tgt_unmapped = true;
1853 return is_tgt_unmapped;
1856 static void
1857 gomp_unref_tgt_void (void *ptr)
1859 (void) gomp_unref_tgt (ptr);
1862 static void
1863 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1865 splay_tree_remove (sp, k);
1866 if (k->aux)
1868 if (k->aux->link_key)
1869 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1870 if (k->aux->attach_count)
1871 free (k->aux->attach_count);
1872 free (k->aux);
1873 k->aux = NULL;
1877 static inline __attribute__((always_inline)) bool
1878 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1879 struct goacc_asyncqueue *aq)
1881 bool is_tgt_unmapped = false;
1883 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1885 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1886 /* Infer the splay_tree_key of the first structelem key using the
1887 pointer to the first structleme_refcount. */
1888 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1889 - offsetof (struct splay_tree_key_s,
1890 structelem_refcount));
1891 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1893 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1894 with the splay_tree_keys embedded inside. */
1895 splay_tree_node node =
1896 (splay_tree_node) ((char *) k
1897 - offsetof (struct splay_tree_node_s, key));
1898 while (true)
1900 /* Starting from the _FIRST key, and continue for all following
1901 sibling keys. */
1902 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1903 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1904 break;
1905 else
1906 k = &(++node)->key;
1909 else
1910 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1912 if (aq)
1913 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1914 (void *) k->tgt);
1915 else
1916 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1917 return is_tgt_unmapped;
1920 attribute_hidden bool
1921 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1923 return gomp_remove_var_internal (devicep, k, NULL);
1926 /* Remove a variable asynchronously. This actually removes the variable
1927 mapping immediately, but retains the linked target_mem_desc until the
1928 asynchronous operation has completed (as it may still refer to target
1929 memory). The device lock must be held before entry, and remains locked on
1930 exit. */
1932 attribute_hidden void
1933 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1934 struct goacc_asyncqueue *aq)
1936 (void) gomp_remove_var_internal (devicep, k, aq);
1939 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1940 variables back from device to host: if it is false, it is assumed that this
1941 has been done already. */
1943 static inline __attribute__((always_inline)) void
1944 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1945 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1947 struct gomp_device_descr *devicep = tgt->device_descr;
1949 if (tgt->list_count == 0)
1951 free (tgt);
1952 return;
1955 gomp_mutex_lock (&devicep->lock);
1956 if (devicep->state == GOMP_DEVICE_FINALIZED)
1958 gomp_mutex_unlock (&devicep->lock);
1959 free (tgt->array);
1960 free (tgt);
1961 return;
1964 size_t i;
1966 /* We must perform detachments before any copies back to the host. */
1967 for (i = 0; i < tgt->list_count; i++)
1969 splay_tree_key k = tgt->list[i].key;
1971 if (k != NULL && tgt->list[i].is_attach)
1972 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1973 + tgt->list[i].offset,
1974 false, NULL);
1977 for (i = 0; i < tgt->list_count; i++)
1979 splay_tree_key k = tgt->list[i].key;
1980 if (k == NULL)
1981 continue;
1983 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1984 counts ('n->refcount', 'n->dynamic_refcount'). */
1985 if (tgt->list[i].is_attach)
1986 continue;
1988 bool do_copy, do_remove;
1989 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1991 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1992 || tgt->list[i].always_copy_from)
1993 gomp_copy_dev2host (devicep, aq,
1994 (void *) (k->host_start + tgt->list[i].offset),
1995 (void *) (k->tgt->tgt_start + k->tgt_offset
1996 + tgt->list[i].offset),
1997 tgt->list[i].length);
1998 if (do_remove)
2000 struct target_mem_desc *k_tgt = k->tgt;
2001 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2002 /* It would be bad if TGT got unmapped while we're still iterating
2003 over its LIST_COUNT, and also expect to use it in the following
2004 code. */
2005 assert (!is_tgt_unmapped
2006 || k_tgt != tgt);
2010 if (aq)
2011 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2012 (void *) tgt);
2013 else
2014 gomp_unref_tgt ((void *) tgt);
2016 gomp_mutex_unlock (&devicep->lock);
2019 static void
2020 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2021 htab_t *refcount_set)
2023 /* This management of a local refcount_set is for convenience of callers
2024 who do not share a refcount_set over multiple map/unmap uses. */
2025 htab_t local_refcount_set = NULL;
2026 if (refcount_set == NULL)
2028 local_refcount_set = htab_create (tgt->list_count);
2029 refcount_set = &local_refcount_set;
2032 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2034 if (local_refcount_set)
2035 htab_free (local_refcount_set);
2038 attribute_hidden void
2039 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2040 struct goacc_asyncqueue *aq)
2042 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2045 static void
2046 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2047 size_t *sizes, void *kinds, bool short_mapkind)
2049 size_t i;
2050 struct splay_tree_key_s cur_node;
2051 const int typemask = short_mapkind ? 0xff : 0x7;
2053 if (!devicep)
2054 return;
2056 if (mapnum == 0)
2057 return;
2059 gomp_mutex_lock (&devicep->lock);
2060 if (devicep->state == GOMP_DEVICE_FINALIZED)
2062 gomp_mutex_unlock (&devicep->lock);
2063 return;
2066 for (i = 0; i < mapnum; i++)
2067 if (sizes[i])
2069 cur_node.host_start = (uintptr_t) hostaddrs[i];
2070 cur_node.host_end = cur_node.host_start + sizes[i];
2071 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2072 if (n)
2074 int kind = get_kind (short_mapkind, kinds, i);
2075 if (n->host_start > cur_node.host_start
2076 || n->host_end < cur_node.host_end)
2078 gomp_mutex_unlock (&devicep->lock);
2079 gomp_fatal ("Trying to update [%p..%p) object when "
2080 "only [%p..%p) is mapped",
2081 (void *) cur_node.host_start,
2082 (void *) cur_node.host_end,
2083 (void *) n->host_start,
2084 (void *) n->host_end);
2087 if (n->aux && n->aux->attach_count)
2089 uintptr_t addr = cur_node.host_start;
2090 while (addr < cur_node.host_end)
2092 /* We have to be careful not to overwrite still attached
2093 pointers during host<->device updates. */
2094 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2095 if (n->aux->attach_count[i] == 0)
2097 void *devaddr = (void *) (n->tgt->tgt_start
2098 + n->tgt_offset
2099 + addr - n->host_start);
2100 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2101 gomp_copy_host2dev (devicep, NULL,
2102 devaddr, (void *) addr,
2103 sizeof (void *), false, NULL);
2104 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2105 gomp_copy_dev2host (devicep, NULL,
2106 (void *) addr, devaddr,
2107 sizeof (void *));
2109 addr += sizeof (void *);
2112 else
2114 void *hostaddr = (void *) cur_node.host_start;
2115 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2116 + cur_node.host_start
2117 - n->host_start);
2118 size_t size = cur_node.host_end - cur_node.host_start;
2120 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2121 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2122 false, NULL);
2123 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2124 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2128 gomp_mutex_unlock (&devicep->lock);
2131 static struct gomp_offload_icv_list *
2132 gomp_get_offload_icv_item (int dev_num)
2134 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2135 while (l != NULL && l->device_num != dev_num)
2136 l = l->next;
2138 return l;
2141 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2142 depending on the device num and the variable hierarchy
2143 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2144 device and thus no item with that device number is contained in
2145 gomp_offload_icv_list, then a new item is created and added to the list. */
2147 static struct gomp_offload_icvs *
2148 get_gomp_offload_icvs (int dev_num)
2150 struct gomp_icv_list *dev
2151 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2152 struct gomp_icv_list *all
2153 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2154 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2155 struct gomp_offload_icv_list *offload_icvs
2156 = gomp_get_offload_icv_item (dev_num);
2158 if (offload_icvs != NULL)
2159 return &offload_icvs->icvs;
2161 struct gomp_offload_icv_list *new
2162 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2164 new->device_num = dev_num;
2165 new->icvs.device_num = dev_num;
2166 new->next = gomp_offload_icv_list;
2168 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2169 new->icvs.nteams = dev_x->icvs.nteams_var;
2170 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2171 new->icvs.nteams = dev->icvs.nteams_var;
2172 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2173 new->icvs.nteams = all->icvs.nteams_var;
2174 else
2175 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2177 if (dev_x != NULL
2178 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2179 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2180 else if (dev != NULL
2181 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2182 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2183 else if (all != NULL
2184 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2185 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2186 else
2187 new->icvs.teams_thread_limit
2188 = gomp_default_icv_values.teams_thread_limit_var;
2190 if (dev_x != NULL
2191 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2192 new->icvs.default_device = dev_x->icvs.default_device_var;
2193 else if (dev != NULL
2194 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2195 new->icvs.default_device = dev->icvs.default_device_var;
2196 else if (all != NULL
2197 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2198 new->icvs.default_device = all->icvs.default_device_var;
2199 else
2200 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2202 gomp_offload_icv_list = new;
2203 return &new->icvs;
2206 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2207 And insert to splay tree the mapping between addresses from HOST_TABLE and
2208 from loaded target image. We rely in the host and device compiler
2209 emitting variable and functions in the same order. */
2211 static void
2212 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2213 const void *host_table, const void *target_data,
2214 bool is_register_lock)
2216 void **host_func_table = ((void ***) host_table)[0];
2217 void **host_funcs_end = ((void ***) host_table)[1];
2218 void **host_var_table = ((void ***) host_table)[2];
2219 void **host_vars_end = ((void ***) host_table)[3];
2221 /* The func table contains only addresses, the var table contains addresses
2222 and corresponding sizes. */
2223 int num_funcs = host_funcs_end - host_func_table;
2224 int num_vars = (host_vars_end - host_var_table) / 2;
2226 /* Load image to device and get target addresses for the image. */
2227 struct addr_pair *target_table = NULL;
2228 uint64_t *rev_target_fn_table = NULL;
2229 int i, num_target_entries;
2231 /* With reverse offload, insert also target-host addresses. */
2232 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2234 num_target_entries
2235 = devicep->load_image_func (devicep->target_id, version,
2236 target_data, &target_table,
2237 rev_lookup ? &rev_target_fn_table : NULL);
2239 if (num_target_entries != num_funcs + num_vars
2240 /* "+1" due to the additional ICV struct. */
2241 && num_target_entries != num_funcs + num_vars + 1)
2243 gomp_mutex_unlock (&devicep->lock);
2244 if (is_register_lock)
2245 gomp_mutex_unlock (&register_lock);
2246 gomp_fatal ("Cannot map target functions or variables"
2247 " (expected %u, have %u)", num_funcs + num_vars,
2248 num_target_entries);
2251 /* Insert host-target address mapping into splay tree. */
2252 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2253 /* "+1" due to the additional ICV struct. */
2254 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2255 * sizeof (*tgt->array));
2256 if (rev_target_fn_table)
2257 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2258 else
2259 tgt->rev_array = NULL;
2260 tgt->refcount = REFCOUNT_INFINITY;
2261 tgt->tgt_start = 0;
2262 tgt->tgt_end = 0;
2263 tgt->to_free = NULL;
2264 tgt->prev = NULL;
2265 tgt->list_count = 0;
2266 tgt->device_descr = devicep;
2267 splay_tree_node array = tgt->array;
2268 reverse_splay_tree_node rev_array = tgt->rev_array;
2270 for (i = 0; i < num_funcs; i++)
2272 splay_tree_key k = &array->key;
2273 k->host_start = (uintptr_t) host_func_table[i];
2274 k->host_end = k->host_start + 1;
2275 k->tgt = tgt;
2276 k->tgt_offset = target_table[i].start;
2277 k->refcount = REFCOUNT_INFINITY;
2278 k->dynamic_refcount = 0;
2279 k->aux = NULL;
2280 array->left = NULL;
2281 array->right = NULL;
2282 splay_tree_insert (&devicep->mem_map, array);
2283 if (rev_target_fn_table)
2285 reverse_splay_tree_key k2 = &rev_array->key;
2286 k2->dev = rev_target_fn_table[i];
2287 k2->k = k;
2288 rev_array->left = NULL;
2289 rev_array->right = NULL;
2290 if (k2->dev != 0)
2291 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2292 rev_array++;
2294 array++;
2297 /* Most significant bit of the size in host and target tables marks
2298 "omp declare target link" variables. */
2299 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2300 const uintptr_t size_mask = ~link_bit;
2302 for (i = 0; i < num_vars; i++)
2304 struct addr_pair *target_var = &target_table[num_funcs + i];
2305 uintptr_t target_size = target_var->end - target_var->start;
2306 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2308 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2310 gomp_mutex_unlock (&devicep->lock);
2311 if (is_register_lock)
2312 gomp_mutex_unlock (&register_lock);
2313 gomp_fatal ("Cannot map target variables (size mismatch)");
2316 splay_tree_key k = &array->key;
2317 k->host_start = (uintptr_t) host_var_table[i * 2];
2318 k->host_end
2319 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2320 k->tgt = tgt;
2321 k->tgt_offset = target_var->start;
2322 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2323 k->dynamic_refcount = 0;
2324 k->aux = NULL;
2325 array->left = NULL;
2326 array->right = NULL;
2327 splay_tree_insert (&devicep->mem_map, array);
2328 array++;
2331 /* Last entry is for a ICVs variable.
2332 Tolerate case where plugin does not return those entries. */
2333 if (num_funcs + num_vars < num_target_entries)
2335 struct addr_pair *var = &target_table[num_funcs + num_vars];
2337 /* Start address will be non-zero for the ICVs variable if
2338 the variable was found in this image. */
2339 if (var->start != 0)
2341 /* The index of the devicep within devices[] is regarded as its
2342 'device number', which is different from the per-device type
2343 devicep->target_id. */
2344 int dev_num = (int) (devicep - &devices[0]);
2345 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2346 size_t var_size = var->end - var->start;
2347 if (var_size != sizeof (struct gomp_offload_icvs))
2349 gomp_mutex_unlock (&devicep->lock);
2350 if (is_register_lock)
2351 gomp_mutex_unlock (&register_lock);
2352 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2353 "format");
2355 /* Copy the ICVs variable to place on device memory, hereby
2356 actually designating its device number into effect. */
2357 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2358 var_size, false, NULL);
2359 splay_tree_key k = &array->key;
2360 k->host_start = (uintptr_t) icvs;
2361 k->host_end =
2362 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2363 k->tgt = tgt;
2364 k->tgt_offset = var->start;
2365 k->refcount = REFCOUNT_INFINITY;
2366 k->dynamic_refcount = 0;
2367 k->aux = NULL;
2368 array->left = NULL;
2369 array->right = NULL;
2370 splay_tree_insert (&devicep->mem_map, array);
2371 array++;
2375 free (target_table);
2378 /* Unload the mappings described by target_data from device DEVICE_P.
2379 The device must be locked. */
2381 static void
2382 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2383 unsigned version,
2384 const void *host_table, const void *target_data)
2386 void **host_func_table = ((void ***) host_table)[0];
2387 void **host_funcs_end = ((void ***) host_table)[1];
2388 void **host_var_table = ((void ***) host_table)[2];
2389 void **host_vars_end = ((void ***) host_table)[3];
2391 /* The func table contains only addresses, the var table contains addresses
2392 and corresponding sizes. */
2393 int num_funcs = host_funcs_end - host_func_table;
2394 int num_vars = (host_vars_end - host_var_table) / 2;
2396 struct splay_tree_key_s k;
2397 splay_tree_key node = NULL;
2399 /* Find mapping at start of node array */
2400 if (num_funcs || num_vars)
2402 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2403 : (uintptr_t) host_var_table[0]);
2404 k.host_end = k.host_start + 1;
2405 node = splay_tree_lookup (&devicep->mem_map, &k);
2408 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2410 gomp_mutex_unlock (&devicep->lock);
2411 gomp_fatal ("image unload fail");
2413 if (devicep->mem_map_rev.root)
2415 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2416 real allocation. */
2417 assert (node && node->tgt && node->tgt->rev_array);
2418 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2419 free (node->tgt->rev_array);
2420 devicep->mem_map_rev.root = NULL;
2423 /* Remove mappings from splay tree. */
2424 int i;
2425 for (i = 0; i < num_funcs; i++)
2427 k.host_start = (uintptr_t) host_func_table[i];
2428 k.host_end = k.host_start + 1;
2429 splay_tree_remove (&devicep->mem_map, &k);
2432 /* Most significant bit of the size in host and target tables marks
2433 "omp declare target link" variables. */
2434 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2435 const uintptr_t size_mask = ~link_bit;
2436 bool is_tgt_unmapped = false;
2438 for (i = 0; i < num_vars; i++)
2440 k.host_start = (uintptr_t) host_var_table[i * 2];
2441 k.host_end
2442 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2444 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2445 splay_tree_remove (&devicep->mem_map, &k);
2446 else
2448 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2449 is_tgt_unmapped = gomp_remove_var (devicep, n);
2453 if (node && !is_tgt_unmapped)
2455 free (node->tgt);
2456 free (node);
2460 static void
2461 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2463 char *end = buf + size, *p = buf;
2464 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2465 p += snprintf (p, end - p, "unified_address");
2466 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2467 p += snprintf (p, end - p, "%sunified_shared_memory",
2468 (p == buf ? "" : ", "));
2469 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2470 p += snprintf (p, end - p, "%sreverse_offload",
2471 (p == buf ? "" : ", "));
2474 /* This function should be called from every offload image while loading.
2475 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2476 the target, and DATA. */
2478 void
2479 GOMP_offload_register_ver (unsigned version, const void *host_table,
2480 int target_type, const void *data)
2482 int i;
2484 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2485 gomp_fatal ("Library too old for offload (version %u < %u)",
2486 GOMP_VERSION, GOMP_VERSION_LIB (version));
2488 int omp_req;
2489 const void *target_data;
2490 if (GOMP_VERSION_LIB (version) > 1)
2492 omp_req = (int) (size_t) ((void **) data)[0];
2493 target_data = &((void **) data)[1];
2495 else
2497 omp_req = 0;
2498 target_data = data;
2501 gomp_mutex_lock (&register_lock);
2503 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2505 char buf1[sizeof ("unified_address, unified_shared_memory, "
2506 "reverse_offload")];
2507 char buf2[sizeof ("unified_address, unified_shared_memory, "
2508 "reverse_offload")];
2509 gomp_requires_to_name (buf2, sizeof (buf2),
2510 omp_req != GOMP_REQUIRES_TARGET_USED
2511 ? omp_req : omp_requires_mask);
2512 if (omp_req != GOMP_REQUIRES_TARGET_USED
2513 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2515 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2516 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2517 "in multiple compilation units: '%s' vs. '%s'",
2518 buf1, buf2);
2520 else
2521 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2522 "some compilation units", buf2);
2524 omp_requires_mask = omp_req;
2526 /* Load image to all initialized devices. */
2527 for (i = 0; i < num_devices; i++)
2529 struct gomp_device_descr *devicep = &devices[i];
2530 gomp_mutex_lock (&devicep->lock);
2531 if (devicep->type == target_type
2532 && devicep->state == GOMP_DEVICE_INITIALIZED)
2533 gomp_load_image_to_device (devicep, version,
2534 host_table, target_data, true);
2535 gomp_mutex_unlock (&devicep->lock);
2538 /* Insert image to array of pending images. */
2539 offload_images
2540 = gomp_realloc_unlock (offload_images,
2541 (num_offload_images + 1)
2542 * sizeof (struct offload_image_descr));
2543 offload_images[num_offload_images].version = version;
2544 offload_images[num_offload_images].type = target_type;
2545 offload_images[num_offload_images].host_table = host_table;
2546 offload_images[num_offload_images].target_data = target_data;
2548 num_offload_images++;
2549 gomp_mutex_unlock (&register_lock);
2552 /* Legacy entry point. */
2554 void
2555 GOMP_offload_register (const void *host_table, int target_type,
2556 const void *target_data)
2558 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2561 /* This function should be called from every offload image while unloading.
2562 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2563 the target, and DATA. */
2565 void
2566 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2567 int target_type, const void *data)
2569 int i;
2571 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2572 gomp_fatal ("Library too old for offload (version %u < %u)",
2573 GOMP_VERSION, GOMP_VERSION_LIB (version));
2575 const void *target_data;
2576 if (GOMP_VERSION_LIB (version) > 1)
2577 target_data = &((void **) data)[1];
2578 else
2579 target_data = data;
2581 gomp_mutex_lock (&register_lock);
2583 /* Unload image from all initialized devices. */
2584 for (i = 0; i < num_devices; i++)
2586 struct gomp_device_descr *devicep = &devices[i];
2587 gomp_mutex_lock (&devicep->lock);
2588 if (devicep->type == target_type
2589 && devicep->state == GOMP_DEVICE_INITIALIZED)
2590 gomp_unload_image_from_device (devicep, version,
2591 host_table, target_data);
2592 gomp_mutex_unlock (&devicep->lock);
2595 /* Remove image from array of pending images. */
2596 for (i = 0; i < num_offload_images; i++)
2597 if (offload_images[i].target_data == target_data)
2599 offload_images[i] = offload_images[--num_offload_images];
2600 break;
2603 gomp_mutex_unlock (&register_lock);
2606 /* Legacy entry point. */
2608 void
2609 GOMP_offload_unregister (const void *host_table, int target_type,
2610 const void *target_data)
2612 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2615 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2616 must be locked on entry, and remains locked on return. */
2618 attribute_hidden void
2619 gomp_init_device (struct gomp_device_descr *devicep)
2621 int i;
2622 if (!devicep->init_device_func (devicep->target_id))
2624 gomp_mutex_unlock (&devicep->lock);
2625 gomp_fatal ("device initialization failed");
2628 /* Load to device all images registered by the moment. */
2629 for (i = 0; i < num_offload_images; i++)
2631 struct offload_image_descr *image = &offload_images[i];
2632 if (image->type == devicep->type)
2633 gomp_load_image_to_device (devicep, image->version,
2634 image->host_table, image->target_data,
2635 false);
2638 /* Initialize OpenACC asynchronous queues. */
2639 goacc_init_asyncqueues (devicep);
2641 devicep->state = GOMP_DEVICE_INITIALIZED;
2644 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2645 must be locked on entry, and remains locked on return. */
2647 attribute_hidden bool
2648 gomp_fini_device (struct gomp_device_descr *devicep)
2650 bool ret = goacc_fini_asyncqueues (devicep);
2651 ret &= devicep->fini_device_func (devicep->target_id);
2652 devicep->state = GOMP_DEVICE_FINALIZED;
2653 return ret;
2656 attribute_hidden void
2657 gomp_unload_device (struct gomp_device_descr *devicep)
2659 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2661 unsigned i;
2663 /* Unload from device all images registered at the moment. */
2664 for (i = 0; i < num_offload_images; i++)
2666 struct offload_image_descr *image = &offload_images[i];
2667 if (image->type == devicep->type)
2668 gomp_unload_image_from_device (devicep, image->version,
2669 image->host_table,
2670 image->target_data);
2675 /* Host fallback for GOMP_target{,_ext} routines. */
2677 static void
2678 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2679 struct gomp_device_descr *devicep, void **args)
2681 struct gomp_thread old_thr, *thr = gomp_thread ();
2683 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2684 && devicep != NULL)
2685 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2686 "be used for offloading");
2688 old_thr = *thr;
2689 memset (thr, '\0', sizeof (*thr));
2690 if (gomp_places_list)
2692 thr->place = old_thr.place;
2693 thr->ts.place_partition_len = gomp_places_list_len;
2695 if (args)
2696 while (*args)
2698 intptr_t id = (intptr_t) *args++, val;
2699 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2700 val = (intptr_t) *args++;
2701 else
2702 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2703 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2704 continue;
2705 id &= GOMP_TARGET_ARG_ID_MASK;
2706 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2707 continue;
2708 val = val > INT_MAX ? INT_MAX : val;
2709 if (val)
2710 gomp_icv (true)->thread_limit_var = val;
2711 break;
2714 fn (hostaddrs);
2715 gomp_free_thread (thr);
2716 *thr = old_thr;
2719 /* Calculate alignment and size requirements of a private copy of data shared
2720 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2722 static inline void
2723 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2724 unsigned short *kinds, size_t *tgt_align,
2725 size_t *tgt_size)
2727 size_t i;
2728 for (i = 0; i < mapnum; i++)
2729 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2731 size_t align = (size_t) 1 << (kinds[i] >> 8);
2732 if (*tgt_align < align)
2733 *tgt_align = align;
2734 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2735 *tgt_size += sizes[i];
2739 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2741 static inline void
2742 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2743 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2744 size_t tgt_size)
2746 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2747 if (al)
2748 tgt += tgt_align - al;
2749 tgt_size = 0;
2750 size_t i;
2751 for (i = 0; i < mapnum; i++)
2752 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2754 size_t align = (size_t) 1 << (kinds[i] >> 8);
2755 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2756 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2757 hostaddrs[i] = tgt + tgt_size;
2758 tgt_size = tgt_size + sizes[i];
2759 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2761 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2762 ++i;
2767 /* Helper function of GOMP_target{,_ext} routines. */
2769 static void *
2770 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2771 void (*host_fn) (void *))
2773 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2774 return (void *) host_fn;
2775 else
2777 gomp_mutex_lock (&devicep->lock);
2778 if (devicep->state == GOMP_DEVICE_FINALIZED)
2780 gomp_mutex_unlock (&devicep->lock);
2781 return NULL;
2784 struct splay_tree_key_s k;
2785 k.host_start = (uintptr_t) host_fn;
2786 k.host_end = k.host_start + 1;
2787 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2788 gomp_mutex_unlock (&devicep->lock);
2789 if (tgt_fn == NULL)
2790 return NULL;
2792 return (void *) tgt_fn->tgt_offset;
2796 /* Called when encountering a target directive. If DEVICE
2797 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2798 GOMP_DEVICE_HOST_FALLBACK (or any value
2799 larger than last available hw device), use host fallback.
2800 FN is address of host code, UNUSED is part of the current ABI, but
2801 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2802 with MAPNUM entries, with addresses of the host objects,
2803 sizes of the host objects (resp. for pointer kind pointer bias
2804 and assumed sizeof (void *) size) and kinds. */
2806 void
2807 GOMP_target (int device, void (*fn) (void *), const void *unused,
2808 size_t mapnum, void **hostaddrs, size_t *sizes,
2809 unsigned char *kinds)
2811 struct gomp_device_descr *devicep = resolve_device (device, true);
2813 void *fn_addr;
2814 if (devicep == NULL
2815 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816 /* All shared memory devices should use the GOMP_target_ext function. */
2817 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2818 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2819 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2821 htab_t refcount_set = htab_create (mapnum);
2822 struct target_mem_desc *tgt_vars
2823 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2824 &refcount_set, GOMP_MAP_VARS_TARGET);
2825 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2826 NULL);
2827 htab_clear (refcount_set);
2828 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2829 htab_free (refcount_set);
2832 static inline unsigned int
2833 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2835 /* If we cannot run asynchronously, simply ignore nowait. */
2836 if (devicep != NULL && devicep->async_run_func == NULL)
2837 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2839 return flags;
2842 static void
2843 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2845 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2846 if (item == NULL)
2847 return;
2849 void *host_ptr = &item->icvs;
2850 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2851 if (dev_ptr != NULL)
2852 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2853 sizeof (struct gomp_offload_icvs));
2856 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2857 and several arguments have been added:
2858 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2859 DEPEND is array of dependencies, see GOMP_task for details.
2861 ARGS is a pointer to an array consisting of a variable number of both
2862 device-independent and device-specific arguments, which can take one two
2863 elements where the first specifies for which device it is intended, the type
2864 and optionally also the value. If the value is not present in the first
2865 one, the whole second element the actual value. The last element of the
2866 array is a single NULL. Among the device independent can be for example
2867 NUM_TEAMS and THREAD_LIMIT.
2869 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2870 that value, or 1 if teams construct is not present, or 0, if
2871 teams construct does not have num_teams clause and so the choice is
2872 implementation defined, and -1 if it can't be determined on the host
2873 what value will GOMP_teams have on the device.
2874 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2875 body with that value, or 0, if teams construct does not have thread_limit
2876 clause or the teams construct is not present, or -1 if it can't be
2877 determined on the host what value will GOMP_teams have on the device. */
2879 void
2880 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2881 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2882 unsigned int flags, void **depend, void **args)
2884 struct gomp_device_descr *devicep = resolve_device (device, true);
2885 size_t tgt_align = 0, tgt_size = 0;
2886 bool fpc_done = false;
2888 /* Obtain the original TEAMS and THREADS values from ARGS. */
2889 intptr_t orig_teams = 1, orig_threads = 0;
2890 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2891 void **tmpargs = args;
2892 while (*tmpargs)
2894 intptr_t id = (intptr_t) *tmpargs++, val;
2895 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2897 val = (intptr_t) *tmpargs++;
2898 len = 2;
2900 else
2902 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2903 len = 1;
2905 num_args += len;
2906 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2907 continue;
2908 val = val > INT_MAX ? INT_MAX : val;
2909 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2911 orig_teams = val;
2912 teams_len = len;
2914 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2916 orig_threads = val;
2917 threads_len = len;
2921 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2922 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2923 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2924 value could not be determined. No change.
2925 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2926 Set device-specific value.
2927 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2928 No change. */
2929 if (orig_teams == -2)
2930 new_teams = 1;
2931 else if (orig_teams == 0)
2933 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2934 if (item != NULL)
2935 new_teams = item->icvs.nteams;
2937 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2938 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2939 e.g. a THREAD_LIMIT clause. */
2940 if (orig_teams > -2 && orig_threads == 0)
2942 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2943 if (item != NULL)
2944 new_threads = item->icvs.teams_thread_limit;
2947 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2948 updated. */
2949 void **new_args = args;
2950 if (orig_teams != new_teams || orig_threads != new_threads)
2952 size_t tms_len = (orig_teams == new_teams
2953 ? teams_len
2954 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
2955 ? 1 : 2));
2956 size_t ths_len = (orig_threads == new_threads
2957 ? threads_len
2958 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
2959 ? 1 : 2));
2960 /* One additional item after the last arg must be NULL. */
2961 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
2962 + ths_len + 1;
2963 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
2965 tmpargs = args;
2966 void **tmp_new_args = new_args;
2967 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2968 too if they have not been changed and skipped otherwise. */
2969 while (*tmpargs)
2971 intptr_t id = (intptr_t) *tmpargs;
2972 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
2973 && orig_teams != new_teams)
2974 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
2975 && orig_threads != new_threads))
2977 tmpargs++;
2978 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2979 tmpargs++;
2981 else
2983 *tmp_new_args++ = *tmpargs++;
2984 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2985 *tmp_new_args++ = *tmpargs++;
2989 /* Add the new TEAMS arg to the new args list if it has been changed. */
2990 if (orig_teams != new_teams)
2992 intptr_t new_val = new_teams;
2993 if (tms_len == 1)
2995 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
2996 | GOMP_TARGET_ARG_NUM_TEAMS;
2997 *tmp_new_args++ = (void *) new_val;
2999 else
3001 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3002 | GOMP_TARGET_ARG_NUM_TEAMS);
3003 *tmp_new_args++ = (void *) new_val;
3007 /* Add the new THREADS arg to the new args list if it has been changed. */
3008 if (orig_threads != new_threads)
3010 intptr_t new_val = new_threads;
3011 if (ths_len == 1)
3013 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3014 | GOMP_TARGET_ARG_THREAD_LIMIT;
3015 *tmp_new_args++ = (void *) new_val;
3017 else
3019 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3020 | GOMP_TARGET_ARG_THREAD_LIMIT);
3021 *tmp_new_args++ = (void *) new_val;
3025 *tmp_new_args = NULL;
3028 flags = clear_unsupported_flags (devicep, flags);
3030 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3032 struct gomp_thread *thr = gomp_thread ();
3033 /* Create a team if we don't have any around, as nowait
3034 target tasks make sense to run asynchronously even when
3035 outside of any parallel. */
3036 if (__builtin_expect (thr->ts.team == NULL, 0))
3038 struct gomp_team *team = gomp_new_team (1);
3039 struct gomp_task *task = thr->task;
3040 struct gomp_task **implicit_task = &task;
3041 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3042 team->prev_ts = thr->ts;
3043 thr->ts.team = team;
3044 thr->ts.team_id = 0;
3045 thr->ts.work_share = &team->work_shares[0];
3046 thr->ts.last_work_share = NULL;
3047 #ifdef HAVE_SYNC_BUILTINS
3048 thr->ts.single_count = 0;
3049 #endif
3050 thr->ts.static_trip = 0;
3051 thr->task = &team->implicit_task[0];
3052 gomp_init_task (thr->task, NULL, icv);
3053 while (*implicit_task
3054 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3055 implicit_task = &(*implicit_task)->parent;
3056 if (*implicit_task)
3058 thr->task = *implicit_task;
3059 gomp_end_task ();
3060 free (*implicit_task);
3061 thr->task = &team->implicit_task[0];
3063 else
3064 pthread_setspecific (gomp_thread_destructor, thr);
3065 if (implicit_task != &task)
3067 *implicit_task = thr->task;
3068 thr->task = task;
3071 if (thr->ts.team
3072 && !thr->task->final_task)
3074 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3075 sizes, kinds, flags, depend, new_args,
3076 GOMP_TARGET_TASK_BEFORE_MAP);
3077 return;
3081 /* If there are depend clauses, but nowait is not present
3082 (or we are in a final task), block the parent task until the
3083 dependencies are resolved and then just continue with the rest
3084 of the function as if it is a merged task. */
3085 if (depend != NULL)
3087 struct gomp_thread *thr = gomp_thread ();
3088 if (thr->task && thr->task->depend_hash)
3090 /* If we might need to wait, copy firstprivate now. */
3091 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3092 &tgt_align, &tgt_size);
3093 if (tgt_align)
3095 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3096 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3097 tgt_align, tgt_size);
3099 fpc_done = true;
3100 gomp_task_maybe_wait_for_dependencies (depend);
3104 void *fn_addr;
3105 if (devicep == NULL
3106 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3107 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3108 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3110 if (!fpc_done)
3112 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3113 &tgt_align, &tgt_size);
3114 if (tgt_align)
3116 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3117 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3118 tgt_align, tgt_size);
3121 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3122 return;
3125 struct target_mem_desc *tgt_vars;
3126 htab_t refcount_set = NULL;
3128 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3130 if (!fpc_done)
3132 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3133 &tgt_align, &tgt_size);
3134 if (tgt_align)
3136 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3137 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3138 tgt_align, tgt_size);
3141 tgt_vars = NULL;
3143 else
3145 refcount_set = htab_create (mapnum);
3146 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3147 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3149 devicep->run_func (devicep->target_id, fn_addr,
3150 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3151 new_args);
3152 if (tgt_vars)
3154 htab_clear (refcount_set);
3155 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3157 if (refcount_set)
3158 htab_free (refcount_set);
3160 /* Copy back ICVs from device to host.
3161 HOST_PTR is expected to exist since it was added in
3162 gomp_load_image_to_device if not already available. */
3163 gomp_copy_back_icvs (devicep, device);
3168 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3169 keeping track of all variable handling - assuming that reverse offload occurs
3170 ony very rarely. Downside is that the reverse search is slow. */
3172 struct gomp_splay_tree_rev_lookup_data {
3173 uintptr_t tgt_start;
3174 uintptr_t tgt_end;
3175 splay_tree_key key;
3178 static int
3179 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3181 struct gomp_splay_tree_rev_lookup_data *data;
3182 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3183 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3185 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3186 return 0;
3188 size_t j;
3189 for (j = 0; j < key->tgt->list_count; j++)
3190 if (key->tgt->list[j].key == key)
3191 break;
3192 assert (j < key->tgt->list_count);
3193 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3195 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3196 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3198 data->key = key;
3199 return 1;
3201 return 0;
3204 static inline splay_tree_key
3205 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3206 bool zero_len)
3208 struct gomp_splay_tree_rev_lookup_data data;
3209 data.key = NULL;
3210 data.tgt_start = tgt_start;
3211 data.tgt_end = tgt_end;
3213 if (tgt_start != tgt_end)
3215 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3216 return data.key;
3219 data.tgt_end++;
3220 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3221 if (data.key != NULL || zero_len)
3222 return data.key;
3223 data.tgt_end--;
3225 data.tgt_start--;
3226 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3227 return data.key;
3230 struct cpy_data
3232 uint64_t devaddr;
3233 bool present, aligned;
3237 /* Search just mapped reverse-offload data; returns index if found,
3238 otherwise >= n. */
3240 static inline int
3241 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3242 unsigned short *kinds, uint64_t *sizes, size_t n,
3243 uint64_t tgt_start, uint64_t tgt_end)
3245 const bool short_mapkind = true;
3246 const int typemask = short_mapkind ? 0xff : 0x7;
3247 size_t i;
3248 for (i = 0; i < n; i++)
3250 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3251 == GOMP_MAP_STRUCT);
3252 uint64_t dev_end;
3253 if (!is_struct)
3254 dev_end = d[i].devaddr + sizes[i];
3255 else
3257 if (i + sizes[i] < n)
3258 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3259 else
3260 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3262 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3263 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3264 break;
3265 if (is_struct)
3266 i += sizes[i];
3268 return i;
3271 static inline int
3272 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3273 unsigned short *kinds, uint64_t *sizes,
3274 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3275 bool zero_len)
3277 size_t i;
3278 if (tgt_start != tgt_end)
3279 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3280 tgt_start, tgt_end);
3281 tgt_end++;
3282 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3283 tgt_start, tgt_end);
3284 if (i < n || zero_len)
3285 return i;
3286 tgt_end--;
3288 tgt_start--;
3289 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3290 tgt_start, tgt_end);
3293 /* Handle reverse offload. This is called by the device plugins for a
3294 reverse offload; it is not called if the outer target runs on the host.
3295 The mapping is simplified device-affecting constructs (except for target
3296 with device(ancestor:1)) must not be encountered; in particular not
3297 target (enter/exit) data. */
3299 void
3300 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3301 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3302 struct goacc_asyncqueue *aq)
3304 /* Return early if there is no offload code. */
3305 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3306 return;
3307 /* Currently, this fails because of calculate_firstprivate_requirements
3308 below; it could be fixed but additional code needs to be updated to
3309 handle 32bit hosts - thus, it is not worthwhile. */
3310 if (sizeof (void *) != sizeof (uint64_t))
3311 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3313 struct cpy_data *cdata = NULL;
3314 uint64_t *devaddrs;
3315 uint64_t *sizes;
3316 unsigned short *kinds;
3317 const bool short_mapkind = true;
3318 const int typemask = short_mapkind ? 0xff : 0x7;
3319 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3321 reverse_splay_tree_key n;
3322 struct reverse_splay_tree_key_s k;
3323 k.dev = fn_ptr;
3325 gomp_mutex_lock (&devicep->lock);
3326 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3327 gomp_mutex_unlock (&devicep->lock);
3329 if (n == NULL)
3330 gomp_fatal ("Cannot find reverse-offload function");
3331 void (*host_fn)() = (void (*)()) n->k->host_start;
3333 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3335 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3336 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3337 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3339 else
3341 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3342 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3343 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3344 gomp_copy_dev2host (devicep, aq, devaddrs,
3345 (const void *) (uintptr_t) devaddrs_ptr,
3346 mapnum * sizeof (uint64_t));
3347 gomp_copy_dev2host (devicep, aq, sizes,
3348 (const void *) (uintptr_t) sizes_ptr,
3349 mapnum * sizeof (uint64_t));
3350 gomp_copy_dev2host (devicep, aq, kinds,
3351 (const void *) (uintptr_t) kinds_ptr,
3352 mapnum * sizeof (unsigned short));
3353 if (aq && !devicep->openacc.async.synchronize_func (aq))
3354 exit (EXIT_FAILURE);
3357 size_t tgt_align = 0, tgt_size = 0;
3359 /* If actually executed on 32bit systems, the casts lead to wrong code;
3360 but 32bit with offloading is not supported; see top of this function. */
3361 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3362 (void *) (uintptr_t) kinds,
3363 &tgt_align, &tgt_size);
3365 if (tgt_align)
3367 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3368 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3369 if (al)
3370 tgt += tgt_align - al;
3371 tgt_size = 0;
3372 for (uint64_t i = 0; i < mapnum; i++)
3373 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3374 && devaddrs[i] != 0)
3376 size_t align = (size_t) 1 << (kinds[i] >> 8);
3377 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3378 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3379 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3380 (size_t) sizes[i]);
3381 else
3383 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3384 (void *) (uintptr_t) devaddrs[i],
3385 (size_t) sizes[i]);
3386 if (aq && !devicep->openacc.async.synchronize_func (aq))
3387 exit (EXIT_FAILURE);
3389 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3390 tgt_size = tgt_size + sizes[i];
3391 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3392 && i + 1 < mapnum
3393 && ((get_kind (short_mapkind, kinds, i) & typemask)
3394 == GOMP_MAP_ATTACH))
3396 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3397 = (uint64_t) devaddrs[i];
3398 ++i;
3403 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3405 size_t j, struct_cpy = 0;
3406 splay_tree_key n2;
3407 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3408 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3409 gomp_mutex_lock (&devicep->lock);
3410 for (uint64_t i = 0; i < mapnum; i++)
3412 if (devaddrs[i] == 0)
3413 continue;
3414 n = NULL;
3415 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3416 switch (kind)
3418 case GOMP_MAP_FIRSTPRIVATE:
3419 case GOMP_MAP_FIRSTPRIVATE_INT:
3420 continue;
3422 case GOMP_MAP_DELETE:
3423 case GOMP_MAP_RELEASE:
3424 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3425 /* Assume it is present; look it up - but ignore otherwise. */
3426 case GOMP_MAP_ALLOC:
3427 case GOMP_MAP_FROM:
3428 case GOMP_MAP_FORCE_ALLOC:
3429 case GOMP_MAP_FORCE_FROM:
3430 case GOMP_MAP_ALWAYS_FROM:
3431 case GOMP_MAP_TO:
3432 case GOMP_MAP_TOFROM:
3433 case GOMP_MAP_FORCE_TO:
3434 case GOMP_MAP_FORCE_TOFROM:
3435 case GOMP_MAP_ALWAYS_TO:
3436 case GOMP_MAP_ALWAYS_TOFROM:
3437 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3438 cdata[i].devaddr = devaddrs[i];
3439 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3440 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3441 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3442 devaddrs[i],
3443 devaddrs[i] + sizes[i], zero_len);
3444 if (j < i)
3446 n2 = NULL;
3447 cdata[i].present = true;
3448 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3450 else
3452 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3453 devaddrs[i],
3454 devaddrs[i] + sizes[i], zero_len);
3455 cdata[i].present = n2 != NULL;
3457 if (!cdata[i].present
3458 && kind != GOMP_MAP_DELETE
3459 && kind != GOMP_MAP_RELEASE
3460 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3462 cdata[i].aligned = true;
3463 size_t align = (size_t) 1 << (kinds[i] >> 8);
3464 devaddrs[i]
3465 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3466 sizes[i]);
3468 else if (n2 != NULL)
3469 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3470 - (n2->tgt->tgt_start + n2->tgt_offset));
3471 if (((!cdata[i].present || struct_cpy)
3472 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3473 || kind == GOMP_MAP_FORCE_TO
3474 || kind == GOMP_MAP_FORCE_TOFROM
3475 || kind == GOMP_MAP_ALWAYS_TO
3476 || kind == GOMP_MAP_ALWAYS_TOFROM)
3478 gomp_copy_dev2host (devicep, aq,
3479 (void *) (uintptr_t) devaddrs[i],
3480 (void *) (uintptr_t) cdata[i].devaddr,
3481 sizes[i]);
3482 if (aq && !devicep->openacc.async.synchronize_func (aq))
3484 gomp_mutex_unlock (&devicep->lock);
3485 exit (EXIT_FAILURE);
3488 if (struct_cpy)
3489 struct_cpy--;
3490 break;
3491 case GOMP_MAP_ATTACH:
3492 case GOMP_MAP_POINTER:
3493 case GOMP_MAP_ALWAYS_POINTER:
3494 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3495 devaddrs[i] + sizes[i],
3496 devaddrs[i] + sizes[i]
3497 + sizeof (void*), false);
3498 cdata[i].present = n2 != NULL;
3499 cdata[i].devaddr = devaddrs[i];
3500 if (n2)
3501 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3502 - (n2->tgt->tgt_start + n2->tgt_offset));
3503 else
3505 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3506 devaddrs[i] + sizes[i],
3507 devaddrs[i] + sizes[i]
3508 + sizeof (void*), false);
3509 if (j < i)
3511 cdata[i].present = true;
3512 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3513 - cdata[j].devaddr);
3516 if (!cdata[i].present)
3517 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3518 /* Assume that when present, the pointer is already correct. */
3519 if (!n2)
3520 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3521 = devaddrs[i-1];
3522 break;
3523 case GOMP_MAP_TO_PSET:
3524 /* Assume that when present, the pointers are fine and no 'to:'
3525 is required. */
3526 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3527 devaddrs[i], devaddrs[i] + sizes[i],
3528 false);
3529 cdata[i].present = n2 != NULL;
3530 cdata[i].devaddr = devaddrs[i];
3531 if (n2)
3532 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3533 - (n2->tgt->tgt_start + n2->tgt_offset));
3534 else
3536 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3537 devaddrs[i],
3538 devaddrs[i] + sizes[i], false);
3539 if (j < i)
3541 cdata[i].present = true;
3542 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3543 - cdata[j].devaddr);
3546 if (!cdata[i].present)
3548 cdata[i].aligned = true;
3549 size_t align = (size_t) 1 << (kinds[i] >> 8);
3550 devaddrs[i]
3551 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3552 sizes[i]);
3553 gomp_copy_dev2host (devicep, aq,
3554 (void *) (uintptr_t) devaddrs[i],
3555 (void *) (uintptr_t) cdata[i].devaddr,
3556 sizes[i]);
3557 if (aq && !devicep->openacc.async.synchronize_func (aq))
3559 gomp_mutex_unlock (&devicep->lock);
3560 exit (EXIT_FAILURE);
3563 for (j = i + 1; j < mapnum; j++)
3565 kind = get_kind (short_mapkind, kinds, j) & typemask;
3566 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3567 && !GOMP_MAP_POINTER_P (kind))
3568 break;
3569 if (devaddrs[j] < devaddrs[i])
3570 break;
3571 if (cdata[i].present)
3572 continue;
3573 if (devaddrs[j] == 0)
3575 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3576 continue;
3578 int k;
3579 n2 = NULL;
3580 /* Dereference devaddrs[j] to get the device addr. */
3581 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3582 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3583 + sizes[j]);
3584 cdata[j].present = true;
3585 cdata[j].devaddr = devaddrs[j];
3586 if (devaddrs[j] == 0)
3587 continue;
3588 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3589 devaddrs[j],
3590 devaddrs[j] + sizeof (void*),
3591 false);
3592 if (k < j)
3593 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3594 - cdata[k].devaddr);
3595 else
3597 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3598 devaddrs[j],
3599 devaddrs[j] + sizeof (void*),
3600 false);
3601 if (n2 == NULL)
3603 gomp_mutex_unlock (&devicep->lock);
3604 gomp_fatal ("Pointer target wasn't mapped");
3606 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3607 - (n2->tgt->tgt_start + n2->tgt_offset));
3609 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3610 = (void *) (uintptr_t) devaddrs[j];
3612 i = j -1;
3613 break;
3614 case GOMP_MAP_STRUCT:
3615 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3616 devaddrs[i + sizes[i]]
3617 + sizes[i + sizes[i]], false);
3618 cdata[i].present = n2 != NULL;
3619 cdata[i].devaddr = devaddrs[i];
3620 struct_cpy = cdata[i].present ? 0 : sizes[i];
3621 if (!n2)
3623 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3624 - devaddrs[i+1]
3625 + sizes[i + sizes[i]]);
3626 size_t align = (size_t) 1 << (kinds[i] >> 8);
3627 cdata[i].aligned = true;
3628 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3629 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3631 else
3632 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3633 - (n2->tgt->tgt_start + n2->tgt_offset));
3634 break;
3635 default:
3636 gomp_mutex_unlock (&devicep->lock);
3637 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3640 gomp_mutex_unlock (&devicep->lock);
3643 host_fn (devaddrs);
3645 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3647 uint64_t struct_cpy = 0;
3648 bool clean_struct = false;
3649 for (uint64_t i = 0; i < mapnum; i++)
3651 if (cdata[i].devaddr == 0)
3652 continue;
3653 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3654 bool copy = !cdata[i].present || struct_cpy;
3655 switch (kind)
3657 case GOMP_MAP_FORCE_FROM:
3658 case GOMP_MAP_FORCE_TOFROM:
3659 case GOMP_MAP_ALWAYS_FROM:
3660 case GOMP_MAP_ALWAYS_TOFROM:
3661 copy = true;
3662 /* FALLTHRU */
3663 case GOMP_MAP_FROM:
3664 case GOMP_MAP_TOFROM:
3665 if (copy)
3667 gomp_copy_host2dev (devicep, aq,
3668 (void *) (uintptr_t) cdata[i].devaddr,
3669 (void *) (uintptr_t) devaddrs[i],
3670 sizes[i], false, NULL);
3671 if (aq && !devicep->openacc.async.synchronize_func (aq))
3672 exit (EXIT_FAILURE);
3674 default:
3675 break;
3677 if (struct_cpy)
3679 struct_cpy--;
3680 continue;
3682 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3684 clean_struct = true;
3685 struct_cpy = sizes[i];
3687 else if (!cdata[i].present && cdata[i].aligned)
3688 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3689 else if (!cdata[i].present)
3690 free ((void *) (uintptr_t) devaddrs[i]);
3692 if (clean_struct)
3693 for (uint64_t i = 0; i < mapnum; i++)
3694 if (!cdata[i].present
3695 && ((get_kind (short_mapkind, kinds, i) & typemask)
3696 == GOMP_MAP_STRUCT))
3698 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3699 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3702 free (devaddrs);
3703 free (sizes);
3704 free (kinds);
3708 /* Host fallback for GOMP_target_data{,_ext} routines. */
3710 static void
3711 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3713 struct gomp_task_icv *icv = gomp_icv (false);
3715 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3716 && devicep != NULL)
3717 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3718 "be used for offloading");
3720 if (icv->target_data)
3722 /* Even when doing a host fallback, if there are any active
3723 #pragma omp target data constructs, need to remember the
3724 new #pragma omp target data, otherwise GOMP_target_end_data
3725 would get out of sync. */
3726 struct target_mem_desc *tgt
3727 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3728 NULL, GOMP_MAP_VARS_DATA);
3729 tgt->prev = icv->target_data;
3730 icv->target_data = tgt;
3734 void
3735 GOMP_target_data (int device, const void *unused, size_t mapnum,
3736 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3738 struct gomp_device_descr *devicep = resolve_device (device, true);
3740 if (devicep == NULL
3741 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3742 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3743 return gomp_target_data_fallback (devicep);
3745 struct target_mem_desc *tgt
3746 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3747 NULL, GOMP_MAP_VARS_DATA);
3748 struct gomp_task_icv *icv = gomp_icv (true);
3749 tgt->prev = icv->target_data;
3750 icv->target_data = tgt;
3753 void
3754 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3755 size_t *sizes, unsigned short *kinds)
3757 struct gomp_device_descr *devicep = resolve_device (device, true);
3759 if (devicep == NULL
3760 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3761 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3762 return gomp_target_data_fallback (devicep);
3764 struct target_mem_desc *tgt
3765 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3766 NULL, GOMP_MAP_VARS_DATA);
3767 struct gomp_task_icv *icv = gomp_icv (true);
3768 tgt->prev = icv->target_data;
3769 icv->target_data = tgt;
3772 void
3773 GOMP_target_end_data (void)
3775 struct gomp_task_icv *icv = gomp_icv (false);
3776 if (icv->target_data)
3778 struct target_mem_desc *tgt = icv->target_data;
3779 icv->target_data = tgt->prev;
3780 gomp_unmap_vars (tgt, true, NULL);
3784 void
3785 GOMP_target_update (int device, const void *unused, size_t mapnum,
3786 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3788 struct gomp_device_descr *devicep = resolve_device (device, true);
3790 if (devicep == NULL
3791 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3792 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3793 return;
3795 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3798 void
3799 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3800 size_t *sizes, unsigned short *kinds,
3801 unsigned int flags, void **depend)
3803 struct gomp_device_descr *devicep = resolve_device (device, true);
3805 /* If there are depend clauses, but nowait is not present,
3806 block the parent task until the dependencies are resolved
3807 and then just continue with the rest of the function as if it
3808 is a merged task. Until we are able to schedule task during
3809 variable mapping or unmapping, ignore nowait if depend clauses
3810 are not present. */
3811 if (depend != NULL)
3813 struct gomp_thread *thr = gomp_thread ();
3814 if (thr->task && thr->task->depend_hash)
3816 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3817 && thr->ts.team
3818 && !thr->task->final_task)
3820 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3821 mapnum, hostaddrs, sizes, kinds,
3822 flags | GOMP_TARGET_FLAG_UPDATE,
3823 depend, NULL, GOMP_TARGET_TASK_DATA))
3824 return;
3826 else
3828 struct gomp_team *team = thr->ts.team;
3829 /* If parallel or taskgroup has been cancelled, don't start new
3830 tasks. */
3831 if (__builtin_expect (gomp_cancel_var, 0) && team)
3833 if (gomp_team_barrier_cancelled (&team->barrier))
3834 return;
3835 if (thr->task->taskgroup)
3837 if (thr->task->taskgroup->cancelled)
3838 return;
3839 if (thr->task->taskgroup->workshare
3840 && thr->task->taskgroup->prev
3841 && thr->task->taskgroup->prev->cancelled)
3842 return;
3846 gomp_task_maybe_wait_for_dependencies (depend);
3851 if (devicep == NULL
3852 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3853 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3854 return;
3856 struct gomp_thread *thr = gomp_thread ();
3857 struct gomp_team *team = thr->ts.team;
3858 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3859 if (__builtin_expect (gomp_cancel_var, 0) && team)
3861 if (gomp_team_barrier_cancelled (&team->barrier))
3862 return;
3863 if (thr->task->taskgroup)
3865 if (thr->task->taskgroup->cancelled)
3866 return;
3867 if (thr->task->taskgroup->workshare
3868 && thr->task->taskgroup->prev
3869 && thr->task->taskgroup->prev->cancelled)
3870 return;
3874 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3877 static void
3878 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3879 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3880 htab_t *refcount_set)
3882 const int typemask = 0xff;
3883 size_t i;
3884 gomp_mutex_lock (&devicep->lock);
3885 if (devicep->state == GOMP_DEVICE_FINALIZED)
3887 gomp_mutex_unlock (&devicep->lock);
3888 return;
3891 for (i = 0; i < mapnum; i++)
3892 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3894 struct splay_tree_key_s cur_node;
3895 cur_node.host_start = (uintptr_t) hostaddrs[i];
3896 cur_node.host_end = cur_node.host_start + sizeof (void *);
3897 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3899 if (n)
3900 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3901 false, NULL);
3904 int nrmvars = 0;
3905 splay_tree_key remove_vars[mapnum];
3907 for (i = 0; i < mapnum; i++)
3909 struct splay_tree_key_s cur_node;
3910 unsigned char kind = kinds[i] & typemask;
3911 switch (kind)
3913 case GOMP_MAP_FROM:
3914 case GOMP_MAP_ALWAYS_FROM:
3915 case GOMP_MAP_DELETE:
3916 case GOMP_MAP_RELEASE:
3917 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3918 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3919 cur_node.host_start = (uintptr_t) hostaddrs[i];
3920 cur_node.host_end = cur_node.host_start + sizes[i];
3921 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3922 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3923 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3924 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3925 if (!k)
3926 continue;
3928 bool delete_p = (kind == GOMP_MAP_DELETE
3929 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3930 bool do_copy, do_remove;
3931 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3932 &do_remove);
3934 if ((kind == GOMP_MAP_FROM && do_copy)
3935 || kind == GOMP_MAP_ALWAYS_FROM)
3937 if (k->aux && k->aux->attach_count)
3939 /* We have to be careful not to overwrite still attached
3940 pointers during the copyback to host. */
3941 uintptr_t addr = k->host_start;
3942 while (addr < k->host_end)
3944 size_t i = (addr - k->host_start) / sizeof (void *);
3945 if (k->aux->attach_count[i] == 0)
3946 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3947 (void *) (k->tgt->tgt_start
3948 + k->tgt_offset
3949 + addr - k->host_start),
3950 sizeof (void *));
3951 addr += sizeof (void *);
3954 else
3955 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3956 (void *) (k->tgt->tgt_start + k->tgt_offset
3957 + cur_node.host_start
3958 - k->host_start),
3959 cur_node.host_end - cur_node.host_start);
3962 /* Structure elements lists are removed altogether at once, which
3963 may cause immediate deallocation of the target_mem_desc, causing
3964 errors if we still have following element siblings to copy back.
3965 While we're at it, it also seems more disciplined to simply
3966 queue all removals together for processing below.
3968 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3969 not have this problem, since they maintain an additional
3970 tgt->refcount = 1 reference to the target_mem_desc to start with.
3972 if (do_remove)
3973 remove_vars[nrmvars++] = k;
3974 break;
3976 case GOMP_MAP_DETACH:
3977 break;
3978 default:
3979 gomp_mutex_unlock (&devicep->lock);
3980 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3981 kind);
3985 for (int i = 0; i < nrmvars; i++)
3986 gomp_remove_var (devicep, remove_vars[i]);
3988 gomp_mutex_unlock (&devicep->lock);
3991 void
3992 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3993 size_t *sizes, unsigned short *kinds,
3994 unsigned int flags, void **depend)
3996 struct gomp_device_descr *devicep = resolve_device (device, true);
3998 /* If there are depend clauses, but nowait is not present,
3999 block the parent task until the dependencies are resolved
4000 and then just continue with the rest of the function as if it
4001 is a merged task. Until we are able to schedule task during
4002 variable mapping or unmapping, ignore nowait if depend clauses
4003 are not present. */
4004 if (depend != NULL)
4006 struct gomp_thread *thr = gomp_thread ();
4007 if (thr->task && thr->task->depend_hash)
4009 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4010 && thr->ts.team
4011 && !thr->task->final_task)
4013 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4014 mapnum, hostaddrs, sizes, kinds,
4015 flags, depend, NULL,
4016 GOMP_TARGET_TASK_DATA))
4017 return;
4019 else
4021 struct gomp_team *team = thr->ts.team;
4022 /* If parallel or taskgroup has been cancelled, don't start new
4023 tasks. */
4024 if (__builtin_expect (gomp_cancel_var, 0) && team)
4026 if (gomp_team_barrier_cancelled (&team->barrier))
4027 return;
4028 if (thr->task->taskgroup)
4030 if (thr->task->taskgroup->cancelled)
4031 return;
4032 if (thr->task->taskgroup->workshare
4033 && thr->task->taskgroup->prev
4034 && thr->task->taskgroup->prev->cancelled)
4035 return;
4039 gomp_task_maybe_wait_for_dependencies (depend);
4044 if (devicep == NULL
4045 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4046 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4047 return;
4049 struct gomp_thread *thr = gomp_thread ();
4050 struct gomp_team *team = thr->ts.team;
4051 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4052 if (__builtin_expect (gomp_cancel_var, 0) && team)
4054 if (gomp_team_barrier_cancelled (&team->barrier))
4055 return;
4056 if (thr->task->taskgroup)
4058 if (thr->task->taskgroup->cancelled)
4059 return;
4060 if (thr->task->taskgroup->workshare
4061 && thr->task->taskgroup->prev
4062 && thr->task->taskgroup->prev->cancelled)
4063 return;
4067 htab_t refcount_set = htab_create (mapnum);
4069 /* The variables are mapped separately such that they can be released
4070 independently. */
4071 size_t i, j;
4072 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4073 for (i = 0; i < mapnum; i++)
4074 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4076 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4077 &kinds[i], true, &refcount_set,
4078 GOMP_MAP_VARS_ENTER_DATA);
4079 i += sizes[i];
4081 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4083 for (j = i + 1; j < mapnum; j++)
4084 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4085 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4086 break;
4087 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4088 &kinds[i], true, &refcount_set,
4089 GOMP_MAP_VARS_ENTER_DATA);
4090 i += j - i - 1;
4092 else if (i + 1 < mapnum
4093 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4094 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4095 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4097 /* An attach operation must be processed together with the mapped
4098 base-pointer list item. */
4099 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4100 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4101 i += 1;
4103 else
4104 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4105 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4106 else
4107 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4108 htab_free (refcount_set);
4111 bool
4112 gomp_target_task_fn (void *data)
4114 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4115 struct gomp_device_descr *devicep = ttask->devicep;
4117 if (ttask->fn != NULL)
4119 void *fn_addr;
4120 if (devicep == NULL
4121 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4122 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4123 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4125 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4126 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4127 ttask->args);
4128 return false;
4131 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4133 if (ttask->tgt)
4134 gomp_unmap_vars (ttask->tgt, true, NULL);
4135 return false;
4138 void *actual_arguments;
4139 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4141 ttask->tgt = NULL;
4142 actual_arguments = ttask->hostaddrs;
4144 else
4146 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4147 NULL, ttask->sizes, ttask->kinds, true,
4148 NULL, GOMP_MAP_VARS_TARGET);
4149 actual_arguments = (void *) ttask->tgt->tgt_start;
4151 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4153 assert (devicep->async_run_func);
4154 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4155 ttask->args, (void *) ttask);
4156 return true;
4158 else if (devicep == NULL
4159 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4160 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4161 return false;
4163 size_t i;
4164 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4165 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4166 ttask->kinds, true);
4167 else
4169 htab_t refcount_set = htab_create (ttask->mapnum);
4170 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4171 for (i = 0; i < ttask->mapnum; i++)
4172 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4174 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4175 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4176 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4177 i += ttask->sizes[i];
4179 else
4180 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4181 &ttask->kinds[i], true, &refcount_set,
4182 GOMP_MAP_VARS_ENTER_DATA);
4183 else
4184 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4185 ttask->kinds, &refcount_set);
4186 htab_free (refcount_set);
4188 return false;
4191 void
4192 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4194 if (thread_limit)
4196 struct gomp_task_icv *icv = gomp_icv (true);
4197 icv->thread_limit_var
4198 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4200 (void) num_teams;
4203 bool
4204 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4205 unsigned int thread_limit, bool first)
4207 struct gomp_thread *thr = gomp_thread ();
4208 if (first)
4210 if (thread_limit)
4212 struct gomp_task_icv *icv = gomp_icv (true);
4213 icv->thread_limit_var
4214 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4216 (void) num_teams_high;
4217 if (num_teams_low == 0)
4218 num_teams_low = 1;
4219 thr->num_teams = num_teams_low - 1;
4220 thr->team_num = 0;
4222 else if (thr->team_num == thr->num_teams)
4223 return false;
4224 else
4225 ++thr->team_num;
4226 return true;
4229 void *
4230 omp_target_alloc (size_t size, int device_num)
4232 if (device_num == omp_initial_device
4233 || device_num == gomp_get_num_devices ())
4234 return malloc (size);
4236 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4237 if (devicep == NULL)
4238 return NULL;
4240 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4241 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4242 return malloc (size);
4244 gomp_mutex_lock (&devicep->lock);
4245 void *ret = devicep->alloc_func (devicep->target_id, size);
4246 gomp_mutex_unlock (&devicep->lock);
4247 return ret;
4250 void
4251 omp_target_free (void *device_ptr, int device_num)
4253 if (device_num == omp_initial_device
4254 || device_num == gomp_get_num_devices ())
4256 free (device_ptr);
4257 return;
4260 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4261 if (devicep == NULL || device_ptr == NULL)
4262 return;
4264 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4265 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4267 free (device_ptr);
4268 return;
4271 gomp_mutex_lock (&devicep->lock);
4272 gomp_free_device_memory (devicep, device_ptr);
4273 gomp_mutex_unlock (&devicep->lock);
4277 omp_target_is_present (const void *ptr, int device_num)
4279 if (device_num == omp_initial_device
4280 || device_num == gomp_get_num_devices ())
4281 return 1;
4283 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4284 if (devicep == NULL)
4285 return 0;
4287 if (ptr == NULL)
4288 return 1;
4290 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4291 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4292 return 1;
4294 gomp_mutex_lock (&devicep->lock);
4295 struct splay_tree_s *mem_map = &devicep->mem_map;
4296 struct splay_tree_key_s cur_node;
4298 cur_node.host_start = (uintptr_t) ptr;
4299 cur_node.host_end = cur_node.host_start;
4300 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4301 int ret = n != NULL;
4302 gomp_mutex_unlock (&devicep->lock);
4303 return ret;
4306 static int
4307 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4308 struct gomp_device_descr **dst_devicep,
4309 struct gomp_device_descr **src_devicep)
4311 if (dst_device_num != gomp_get_num_devices ()
4312 /* Above gomp_get_num_devices has to be called unconditionally. */
4313 && dst_device_num != omp_initial_device)
4315 *dst_devicep = resolve_device (dst_device_num, false);
4316 if (*dst_devicep == NULL)
4317 return EINVAL;
4319 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4320 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4321 *dst_devicep = NULL;
4324 if (src_device_num != num_devices_openmp
4325 && src_device_num != omp_initial_device)
4327 *src_devicep = resolve_device (src_device_num, false);
4328 if (*src_devicep == NULL)
4329 return EINVAL;
4331 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4332 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4333 *src_devicep = NULL;
4336 return 0;
4339 static int
4340 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4341 size_t dst_offset, size_t src_offset,
4342 struct gomp_device_descr *dst_devicep,
4343 struct gomp_device_descr *src_devicep)
4345 bool ret;
4346 if (src_devicep == NULL && dst_devicep == NULL)
4348 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4349 return 0;
4351 if (src_devicep == NULL)
4353 gomp_mutex_lock (&dst_devicep->lock);
4354 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4355 (char *) dst + dst_offset,
4356 (char *) src + src_offset, length);
4357 gomp_mutex_unlock (&dst_devicep->lock);
4358 return (ret ? 0 : EINVAL);
4360 if (dst_devicep == NULL)
4362 gomp_mutex_lock (&src_devicep->lock);
4363 ret = src_devicep->dev2host_func (src_devicep->target_id,
4364 (char *) dst + dst_offset,
4365 (char *) src + src_offset, length);
4366 gomp_mutex_unlock (&src_devicep->lock);
4367 return (ret ? 0 : EINVAL);
4369 if (src_devicep == dst_devicep)
4371 gomp_mutex_lock (&src_devicep->lock);
4372 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4373 (char *) dst + dst_offset,
4374 (char *) src + src_offset, length);
4375 gomp_mutex_unlock (&src_devicep->lock);
4376 return (ret ? 0 : EINVAL);
4378 return EINVAL;
4382 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4383 size_t src_offset, int dst_device_num, int src_device_num)
4385 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4386 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4387 &dst_devicep, &src_devicep);
4389 if (ret)
4390 return ret;
4392 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4393 dst_devicep, src_devicep);
4395 return ret;
4398 typedef struct
4400 void *dst;
4401 const void *src;
4402 size_t length;
4403 size_t dst_offset;
4404 size_t src_offset;
4405 struct gomp_device_descr *dst_devicep;
4406 struct gomp_device_descr *src_devicep;
4407 } omp_target_memcpy_data;
4409 static void
4410 omp_target_memcpy_async_helper (void *args)
4412 omp_target_memcpy_data *a = args;
4413 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4414 a->src_offset, a->dst_devicep, a->src_devicep))
4415 gomp_fatal ("omp_target_memcpy failed");
4419 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4420 size_t dst_offset, size_t src_offset,
4421 int dst_device_num, int src_device_num,
4422 int depobj_count, omp_depend_t *depobj_list)
4424 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4425 unsigned int flags = 0;
4426 void *depend[depobj_count + 5];
4427 int i;
4428 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4429 &dst_devicep, &src_devicep);
4431 omp_target_memcpy_data s = {
4432 .dst = dst,
4433 .src = src,
4434 .length = length,
4435 .dst_offset = dst_offset,
4436 .src_offset = src_offset,
4437 .dst_devicep = dst_devicep,
4438 .src_devicep = src_devicep
4441 if (check)
4442 return check;
4444 if (depobj_count > 0 && depobj_list != NULL)
4446 flags |= GOMP_TASK_FLAG_DEPEND;
4447 depend[0] = 0;
4448 depend[1] = (void *) (uintptr_t) depobj_count;
4449 depend[2] = depend[3] = depend[4] = 0;
4450 for (i = 0; i < depobj_count; ++i)
4451 depend[i + 5] = &depobj_list[i];
4454 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4455 __alignof__ (s), true, flags, depend, 0, NULL);
4457 return 0;
4460 static int
4461 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4462 int num_dims, const size_t *volume,
4463 const size_t *dst_offsets,
4464 const size_t *src_offsets,
4465 const size_t *dst_dimensions,
4466 const size_t *src_dimensions,
4467 struct gomp_device_descr *dst_devicep,
4468 struct gomp_device_descr *src_devicep)
4470 size_t dst_slice = element_size;
4471 size_t src_slice = element_size;
4472 size_t j, dst_off, src_off, length;
4473 int i, ret;
4475 if (num_dims == 1)
4477 if (__builtin_mul_overflow (element_size, volume[0], &length)
4478 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4479 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4480 return EINVAL;
4481 if (dst_devicep == NULL && src_devicep == NULL)
4483 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4484 length);
4485 ret = 1;
4487 else if (src_devicep == NULL)
4488 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4489 (char *) dst + dst_off,
4490 (const char *) src + src_off,
4491 length);
4492 else if (dst_devicep == NULL)
4493 ret = src_devicep->dev2host_func (src_devicep->target_id,
4494 (char *) dst + dst_off,
4495 (const char *) src + src_off,
4496 length);
4497 else if (src_devicep == dst_devicep)
4498 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4499 (char *) dst + dst_off,
4500 (const char *) src + src_off,
4501 length);
4502 else
4503 ret = 0;
4504 return ret ? 0 : EINVAL;
4507 /* FIXME: it would be nice to have some plugin function to handle
4508 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4509 be handled in the generic recursion below, and for host-host it
4510 should be used even for any num_dims >= 2. */
4512 for (i = 1; i < num_dims; i++)
4513 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4514 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4515 return EINVAL;
4516 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4517 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4518 return EINVAL;
4519 for (j = 0; j < volume[0]; j++)
4521 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4522 (const char *) src + src_off,
4523 element_size, num_dims - 1,
4524 volume + 1, dst_offsets + 1,
4525 src_offsets + 1, dst_dimensions + 1,
4526 src_dimensions + 1, dst_devicep,
4527 src_devicep);
4528 if (ret)
4529 return ret;
4530 dst_off += dst_slice;
4531 src_off += src_slice;
4533 return 0;
4536 static int
4537 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4538 int src_device_num,
4539 struct gomp_device_descr **dst_devicep,
4540 struct gomp_device_descr **src_devicep)
4542 if (!dst && !src)
4543 return INT_MAX;
4545 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4546 dst_devicep, src_devicep);
4547 if (ret)
4548 return ret;
4550 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
4551 return EINVAL;
4553 return 0;
4556 static int
4557 omp_target_memcpy_rect_copy (void *dst, const void *src,
4558 size_t element_size, int num_dims,
4559 const size_t *volume, const size_t *dst_offsets,
4560 const size_t *src_offsets,
4561 const size_t *dst_dimensions,
4562 const size_t *src_dimensions,
4563 struct gomp_device_descr *dst_devicep,
4564 struct gomp_device_descr *src_devicep)
4566 if (src_devicep)
4567 gomp_mutex_lock (&src_devicep->lock);
4568 else if (dst_devicep)
4569 gomp_mutex_lock (&dst_devicep->lock);
4570 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4571 volume, dst_offsets, src_offsets,
4572 dst_dimensions, src_dimensions,
4573 dst_devicep, src_devicep);
4574 if (src_devicep)
4575 gomp_mutex_unlock (&src_devicep->lock);
4576 else if (dst_devicep)
4577 gomp_mutex_unlock (&dst_devicep->lock);
4579 return ret;
4583 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4584 int num_dims, const size_t *volume,
4585 const size_t *dst_offsets,
4586 const size_t *src_offsets,
4587 const size_t *dst_dimensions,
4588 const size_t *src_dimensions,
4589 int dst_device_num, int src_device_num)
4591 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4593 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4594 src_device_num, &dst_devicep,
4595 &src_devicep);
4597 if (check)
4598 return check;
4600 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4601 volume, dst_offsets, src_offsets,
4602 dst_dimensions, src_dimensions,
4603 dst_devicep, src_devicep);
4605 return ret;
4608 typedef struct
4610 void *dst;
4611 const void *src;
4612 size_t element_size;
4613 const size_t *volume;
4614 const size_t *dst_offsets;
4615 const size_t *src_offsets;
4616 const size_t *dst_dimensions;
4617 const size_t *src_dimensions;
4618 struct gomp_device_descr *dst_devicep;
4619 struct gomp_device_descr *src_devicep;
4620 int num_dims;
4621 } omp_target_memcpy_rect_data;
4623 static void
4624 omp_target_memcpy_rect_async_helper (void *args)
4626 omp_target_memcpy_rect_data *a = args;
4627 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4628 a->num_dims, a->volume, a->dst_offsets,
4629 a->src_offsets, a->dst_dimensions,
4630 a->src_dimensions, a->dst_devicep,
4631 a->src_devicep);
4632 if (ret)
4633 gomp_fatal ("omp_target_memcpy_rect failed");
4637 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4638 int num_dims, const size_t *volume,
4639 const size_t *dst_offsets,
4640 const size_t *src_offsets,
4641 const size_t *dst_dimensions,
4642 const size_t *src_dimensions,
4643 int dst_device_num, int src_device_num,
4644 int depobj_count, omp_depend_t *depobj_list)
4646 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4647 unsigned flags = 0;
4648 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4649 src_device_num, &dst_devicep,
4650 &src_devicep);
4651 void *depend[depobj_count + 5];
4652 int i;
4654 omp_target_memcpy_rect_data s = {
4655 .dst = dst,
4656 .src = src,
4657 .element_size = element_size,
4658 .num_dims = num_dims,
4659 .volume = volume,
4660 .dst_offsets = dst_offsets,
4661 .src_offsets = src_offsets,
4662 .dst_dimensions = dst_dimensions,
4663 .src_dimensions = src_dimensions,
4664 .dst_devicep = dst_devicep,
4665 .src_devicep = src_devicep
4668 if (check)
4669 return check;
4671 if (depobj_count > 0 && depobj_list != NULL)
4673 flags |= GOMP_TASK_FLAG_DEPEND;
4674 depend[0] = 0;
4675 depend[1] = (void *) (uintptr_t) depobj_count;
4676 depend[2] = depend[3] = depend[4] = 0;
4677 for (i = 0; i < depobj_count; ++i)
4678 depend[i + 5] = &depobj_list[i];
4681 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4682 __alignof__ (s), true, flags, depend, 0, NULL);
4684 return 0;
4688 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4689 size_t size, size_t device_offset, int device_num)
4691 if (device_num == omp_initial_device
4692 || device_num == gomp_get_num_devices ())
4693 return EINVAL;
4695 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4696 if (devicep == NULL)
4697 return EINVAL;
4699 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4700 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4701 return EINVAL;
4703 gomp_mutex_lock (&devicep->lock);
4705 struct splay_tree_s *mem_map = &devicep->mem_map;
4706 struct splay_tree_key_s cur_node;
4707 int ret = EINVAL;
4709 cur_node.host_start = (uintptr_t) host_ptr;
4710 cur_node.host_end = cur_node.host_start + size;
4711 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4712 if (n)
4714 if (n->tgt->tgt_start + n->tgt_offset
4715 == (uintptr_t) device_ptr + device_offset
4716 && n->host_start <= cur_node.host_start
4717 && n->host_end >= cur_node.host_end)
4718 ret = 0;
4720 else
4722 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4723 tgt->array = gomp_malloc (sizeof (*tgt->array));
4724 tgt->refcount = 1;
4725 tgt->tgt_start = 0;
4726 tgt->tgt_end = 0;
4727 tgt->to_free = NULL;
4728 tgt->prev = NULL;
4729 tgt->list_count = 0;
4730 tgt->device_descr = devicep;
4731 splay_tree_node array = tgt->array;
4732 splay_tree_key k = &array->key;
4733 k->host_start = cur_node.host_start;
4734 k->host_end = cur_node.host_end;
4735 k->tgt = tgt;
4736 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4737 k->refcount = REFCOUNT_INFINITY;
4738 k->dynamic_refcount = 0;
4739 k->aux = NULL;
4740 array->left = NULL;
4741 array->right = NULL;
4742 splay_tree_insert (&devicep->mem_map, array);
4743 ret = 0;
4745 gomp_mutex_unlock (&devicep->lock);
4746 return ret;
4750 omp_target_disassociate_ptr (const void *ptr, int device_num)
4752 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4753 if (devicep == NULL)
4754 return EINVAL;
4756 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4757 return EINVAL;
4759 gomp_mutex_lock (&devicep->lock);
4761 struct splay_tree_s *mem_map = &devicep->mem_map;
4762 struct splay_tree_key_s cur_node;
4763 int ret = EINVAL;
4765 cur_node.host_start = (uintptr_t) ptr;
4766 cur_node.host_end = cur_node.host_start;
4767 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4768 if (n
4769 && n->host_start == cur_node.host_start
4770 && n->refcount == REFCOUNT_INFINITY
4771 && n->tgt->tgt_start == 0
4772 && n->tgt->to_free == NULL
4773 && n->tgt->refcount == 1
4774 && n->tgt->list_count == 0)
4776 splay_tree_remove (&devicep->mem_map, n);
4777 gomp_unmap_tgt (n->tgt);
4778 ret = 0;
4781 gomp_mutex_unlock (&devicep->lock);
4782 return ret;
4785 void *
4786 omp_get_mapped_ptr (const void *ptr, int device_num)
4788 if (device_num == omp_initial_device
4789 || device_num == omp_get_initial_device ())
4790 return (void *) ptr;
4792 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4793 if (devicep == NULL)
4794 return NULL;
4796 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4797 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4798 return (void *) ptr;
4800 gomp_mutex_lock (&devicep->lock);
4802 struct splay_tree_s *mem_map = &devicep->mem_map;
4803 struct splay_tree_key_s cur_node;
4804 void *ret = NULL;
4806 cur_node.host_start = (uintptr_t) ptr;
4807 cur_node.host_end = cur_node.host_start;
4808 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4810 if (n)
4812 uintptr_t offset = cur_node.host_start - n->host_start;
4813 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4816 gomp_mutex_unlock (&devicep->lock);
4818 return ret;
4822 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4824 if (device_num == omp_initial_device
4825 || device_num == gomp_get_num_devices ())
4826 return true;
4828 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4829 if (devicep == NULL)
4830 return false;
4832 /* TODO: Unified shared memory must be handled when available. */
4834 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4838 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4840 (void) kind;
4841 if (device_num == omp_initial_device
4842 || device_num == gomp_get_num_devices ())
4843 return gomp_pause_host ();
4845 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4846 if (devicep == NULL)
4847 return -1;
4849 /* Do nothing for target devices for now. */
4850 return 0;
4854 omp_pause_resource_all (omp_pause_resource_t kind)
4856 (void) kind;
4857 if (gomp_pause_host ())
4858 return -1;
4859 /* Do nothing for target devices for now. */
4860 return 0;
4863 ialias (omp_pause_resource)
4864 ialias (omp_pause_resource_all)
4866 #ifdef PLUGIN_SUPPORT
4868 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4869 in PLUGIN_NAME.
4870 The handles of the found functions are stored in the corresponding fields
4871 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4873 static bool
4874 gomp_load_plugin_for_device (struct gomp_device_descr *device,
4875 const char *plugin_name)
4877 const char *err = NULL, *last_missing = NULL;
4879 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4880 if (!plugin_handle)
4881 #if OFFLOAD_DEFAULTED
4882 return 0;
4883 #else
4884 goto dl_fail;
4885 #endif
4887 /* Check if all required functions are available in the plugin and store
4888 their handlers. None of the symbols can legitimately be NULL,
4889 so we don't need to check dlerror all the time. */
4890 #define DLSYM(f) \
4891 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4892 goto dl_fail
4893 /* Similar, but missing functions are not an error. Return false if
4894 failed, true otherwise. */
4895 #define DLSYM_OPT(f, n) \
4896 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4897 || (last_missing = #n, 0))
4899 DLSYM (version);
4900 if (device->version_func () != GOMP_VERSION)
4902 err = "plugin version mismatch";
4903 goto fail;
4906 DLSYM (get_name);
4907 DLSYM (get_caps);
4908 DLSYM (get_type);
4909 DLSYM (get_num_devices);
4910 DLSYM (init_device);
4911 DLSYM (fini_device);
4912 DLSYM (load_image);
4913 DLSYM (unload_image);
4914 DLSYM (alloc);
4915 DLSYM (free);
4916 DLSYM (dev2host);
4917 DLSYM (host2dev);
4918 device->capabilities = device->get_caps_func ();
4919 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4921 DLSYM (run);
4922 DLSYM_OPT (async_run, async_run);
4923 DLSYM_OPT (can_run, can_run);
4924 DLSYM (dev2dev);
4926 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4928 if (!DLSYM_OPT (openacc.exec, openacc_exec)
4929 || !DLSYM_OPT (openacc.create_thread_data,
4930 openacc_create_thread_data)
4931 || !DLSYM_OPT (openacc.destroy_thread_data,
4932 openacc_destroy_thread_data)
4933 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4934 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4935 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4936 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4937 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4938 || !DLSYM_OPT (openacc.async.queue_callback,
4939 openacc_async_queue_callback)
4940 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4941 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4942 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4943 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4945 /* Require all the OpenACC handlers if we have
4946 GOMP_OFFLOAD_CAP_OPENACC_200. */
4947 err = "plugin missing OpenACC handler function";
4948 goto fail;
4951 unsigned cuda = 0;
4952 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4953 openacc_cuda_get_current_device);
4954 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4955 openacc_cuda_get_current_context);
4956 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4957 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4958 if (cuda && cuda != 4)
4960 /* Make sure all the CUDA functions are there if any of them are. */
4961 err = "plugin missing OpenACC CUDA handler function";
4962 goto fail;
4965 #undef DLSYM
4966 #undef DLSYM_OPT
4968 return 1;
4970 dl_fail:
4971 err = dlerror ();
4972 fail:
4973 gomp_error ("while loading %s: %s", plugin_name, err);
4974 if (last_missing)
4975 gomp_error ("missing function was %s", last_missing);
4976 if (plugin_handle)
4977 dlclose (plugin_handle);
4979 return 0;
4982 /* This function finalizes all initialized devices. */
4984 static void
4985 gomp_target_fini (void)
4987 int i;
4988 for (i = 0; i < num_devices; i++)
4990 bool ret = true;
4991 struct gomp_device_descr *devicep = &devices[i];
4992 gomp_mutex_lock (&devicep->lock);
4993 if (devicep->state == GOMP_DEVICE_INITIALIZED)
4994 ret = gomp_fini_device (devicep);
4995 gomp_mutex_unlock (&devicep->lock);
4996 if (!ret)
4997 gomp_fatal ("device finalization failed");
5001 /* This function initializes the runtime for offloading.
5002 It parses the list of offload plugins, and tries to load these.
5003 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5004 will be set, and the array DEVICES initialized, containing descriptors for
5005 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5006 by the others. */
5008 static void
5009 gomp_target_init (void)
5011 const char *prefix ="libgomp-plugin-";
5012 const char *suffix = SONAME_SUFFIX (1);
5013 const char *cur, *next;
5014 char *plugin_name;
5015 int i, new_num_devs;
5016 int num_devs = 0, num_devs_openmp;
5017 struct gomp_device_descr *devs = NULL;
5019 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5020 return;
5022 cur = OFFLOAD_PLUGINS;
5023 if (*cur)
5026 struct gomp_device_descr current_device;
5027 size_t prefix_len, suffix_len, cur_len;
5029 next = strchr (cur, ',');
5031 prefix_len = strlen (prefix);
5032 cur_len = next ? next - cur : strlen (cur);
5033 suffix_len = strlen (suffix);
5035 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5036 if (!plugin_name)
5038 num_devs = 0;
5039 break;
5042 memcpy (plugin_name, prefix, prefix_len);
5043 memcpy (plugin_name + prefix_len, cur, cur_len);
5044 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5046 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5048 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5049 new_num_devs = current_device.get_num_devices_func (omp_req);
5050 if (gomp_debug_var > 0 && new_num_devs < 0)
5052 bool found = false;
5053 int type = current_device.get_type_func ();
5054 for (int img = 0; img < num_offload_images; img++)
5055 if (type == offload_images[img].type)
5056 found = true;
5057 if (found)
5059 char buf[sizeof ("unified_address, unified_shared_memory, "
5060 "reverse_offload")];
5061 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5062 char *name = (char *) malloc (cur_len + 1);
5063 memcpy (name, cur, cur_len);
5064 name[cur_len] = '\0';
5065 gomp_debug (1,
5066 "%s devices present but 'omp requires %s' "
5067 "cannot be fulfilled\n", name, buf);
5068 free (name);
5071 else if (new_num_devs >= 1)
5073 /* Augment DEVICES and NUM_DEVICES. */
5075 devs = realloc (devs, (num_devs + new_num_devs)
5076 * sizeof (struct gomp_device_descr));
5077 if (!devs)
5079 num_devs = 0;
5080 free (plugin_name);
5081 break;
5084 current_device.name = current_device.get_name_func ();
5085 /* current_device.capabilities has already been set. */
5086 current_device.type = current_device.get_type_func ();
5087 current_device.mem_map.root = NULL;
5088 current_device.mem_map_rev.root = NULL;
5089 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5090 for (i = 0; i < new_num_devs; i++)
5092 current_device.target_id = i;
5093 devs[num_devs] = current_device;
5094 gomp_mutex_init (&devs[num_devs].lock);
5095 num_devs++;
5100 free (plugin_name);
5101 cur = next + 1;
5103 while (next);
5105 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5106 NUM_DEVICES_OPENMP. */
5107 struct gomp_device_descr *devs_s
5108 = malloc (num_devs * sizeof (struct gomp_device_descr));
5109 if (!devs_s)
5111 num_devs = 0;
5112 free (devs);
5113 devs = NULL;
5115 num_devs_openmp = 0;
5116 for (i = 0; i < num_devs; i++)
5117 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5118 devs_s[num_devs_openmp++] = devs[i];
5119 int num_devs_after_openmp = num_devs_openmp;
5120 for (i = 0; i < num_devs; i++)
5121 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5122 devs_s[num_devs_after_openmp++] = devs[i];
5123 free (devs);
5124 devs = devs_s;
5126 for (i = 0; i < num_devs; i++)
5128 /* The 'devices' array can be moved (by the realloc call) until we have
5129 found all the plugins, so registering with the OpenACC runtime (which
5130 takes a copy of the pointer argument) must be delayed until now. */
5131 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5132 goacc_register (&devs[i]);
5135 num_devices = num_devs;
5136 num_devices_openmp = num_devs_openmp;
5137 devices = devs;
5138 if (atexit (gomp_target_fini) != 0)
5139 gomp_fatal ("atexit failed");
5142 #else /* PLUGIN_SUPPORT */
5143 /* If dlfcn.h is unavailable we always fallback to host execution.
5144 GOMP_target* routines are just stubs for this case. */
5145 static void
5146 gomp_target_init (void)
5149 #endif /* PLUGIN_SUPPORT */