testsuite: powerpc: fix dg-do run typo
[official-gcc.git] / libgomp / target.c
blobaa01c1367b986bb03170f40638b5b2aaa893c7ec
1 /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
51 #define splay_tree_c
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type;
56 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
57 static inline void htab_free (void *ptr) { free (ptr); }
58 #include "hashtab.h"
60 ialias_redirect (GOMP_task)
62 static inline hashval_t
63 htab_hash (hash_entry_type element)
65 return hash_pointer ((void *) element);
68 static inline bool
69 htab_eq (hash_entry_type x, hash_entry_type y)
71 return x == y;
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr {
88 unsigned version;
89 enum offload_target_type type;
90 const void *host_table;
91 const void *target_data;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr *offload_images;
97 /* Total number of offload images. */
98 static int num_offload_images;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr *devices;
103 /* Total number of available devices. */
104 static int num_devices;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 static void *
115 gomp_realloc_unlock (void *old, size_t size)
117 void *ret = realloc (old, size);
118 if (ret == NULL)
120 gomp_mutex_unlock (&register_lock);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
123 return ret;
126 attribute_hidden void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
132 attribute_hidden int
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp;
139 static struct gomp_device_descr *
140 resolve_device (int device_id, bool remapped)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices = gomp_get_num_devices ();
146 if (remapped && device_id == GOMP_DEVICE_ICV)
148 struct gomp_task_icv *icv = gomp_icv (false);
149 device_id = icv->default_device_var;
150 remapped = false;
153 if (device_id < 0)
155 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device))
157 return NULL;
158 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
159 && num_devices == 0)
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id == omp_invalid_device)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
168 return NULL;
170 else if (device_id >= num_devices)
172 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id != num_devices)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
177 return NULL;
180 gomp_mutex_lock (&devices[device_id].lock);
181 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
182 gomp_init_device (&devices[device_id]);
183 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
185 gomp_mutex_unlock (&devices[device_id].lock);
187 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
191 return NULL;
193 gomp_mutex_unlock (&devices[device_id].lock);
195 return &devices[device_id];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
202 if (key->host_start != key->host_end)
203 return splay_tree_lookup (mem_map, key);
205 key->host_end++;
206 splay_tree_key n = splay_tree_lookup (mem_map, key);
207 key->host_end--;
208 if (n)
209 return n;
210 key->host_start--;
211 n = splay_tree_lookup (mem_map, key);
212 key->host_start++;
213 if (n)
214 return n;
215 return splay_tree_lookup (mem_map, key);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
221 return reverse_splay_tree_lookup (mem_map_rev, key);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
227 if (key->host_start != key->host_end)
228 return splay_tree_lookup (mem_map, key);
230 key->host_end++;
231 splay_tree_key n = splay_tree_lookup (mem_map, key);
232 key->host_end--;
233 return n;
236 static inline void
237 gomp_device_copy (struct gomp_device_descr *devicep,
238 bool (*copy_func) (int, void *, const void *, size_t),
239 const char *dst, void *dstaddr,
240 const char *src, const void *srcaddr,
241 size_t size)
243 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
245 gomp_mutex_unlock (&devicep->lock);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
251 static inline void
252 goacc_device_copy_async (struct gomp_device_descr *devicep,
253 bool (*copy_func) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue *),
255 const char *dst, void *dstaddr,
256 const char *src, const void *srcaddr,
257 const void *srcaddr_orig,
258 size_t size, struct goacc_asyncqueue *aq)
260 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
262 gomp_mutex_unlock (&devicep->lock);
263 if (srcaddr_orig && srcaddr_orig != srcaddr)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src, srcaddr_orig, srcaddr_orig + size,
268 src, srcaddr, srcaddr + size,
269 dst, dstaddr, dstaddr + size);
270 else
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src, srcaddr, srcaddr + size,
274 dst, dstaddr, dstaddr + size);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
284 size_t start, end;
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
291 void *buf;
292 struct target_mem_desc *tgt;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk *chunks;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
298 be performed. */
299 long chunk_cnt;
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
303 long use_cnt;
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
325 static inline void
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
328 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
329 return;
330 if (cbuf->chunk_cnt)
332 if (cbuf->chunk_cnt < 0)
333 return;
334 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
336 cbuf->chunk_cnt = -1;
337 return;
339 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
341 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
342 cbuf->use_cnt++;
343 return;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf->use_cnt == 1)
349 cbuf->chunk_cnt--;
351 cbuf->chunks[cbuf->chunk_cnt].start = start;
352 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
353 cbuf->chunk_cnt++;
354 cbuf->use_cnt = 1;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
360 static inline bool
361 gomp_to_device_kind_p (int kind)
363 switch (kind)
365 case GOMP_MAP_ALLOC:
366 case GOMP_MAP_FROM:
367 case GOMP_MAP_FORCE_ALLOC:
368 case GOMP_MAP_FORCE_FROM:
369 case GOMP_MAP_ALWAYS_FROM:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM:
371 case GOMP_MAP_FORCE_PRESENT:
372 return false;
373 default:
374 return true;
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
381 TRUE. */
383 attribute_hidden void
384 gomp_copy_host2dev (struct gomp_device_descr *devicep,
385 struct goacc_asyncqueue *aq,
386 void *d, const void *h, size_t sz,
387 bool ephemeral, struct gomp_coalesce_buf *cbuf)
389 if (cbuf)
391 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
392 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
394 long first = 0;
395 long last = cbuf->chunk_cnt - 1;
396 while (first <= last)
398 long middle = (first + last) >> 1;
399 if (cbuf->chunks[middle].end <= doff)
400 first = middle + 1;
401 else if (cbuf->chunks[middle].start <= doff)
403 if (doff + sz > cbuf->chunks[middle].end)
405 gomp_mutex_unlock (&devicep->lock);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq != NULL, 0))
412 assert (ephemeral);
414 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
415 h, sz);
416 return;
418 else
419 last = middle - 1;
424 if (__builtin_expect (aq != NULL, 0))
426 void *h_buf = (void *) h;
427 if (ephemeral)
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
433 temporary buffer. */
434 h_buf = gomp_malloc (sz);
435 memcpy (h_buf, h, sz);
437 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
438 "dev", d, "host", h_buf, h, sz, aq);
439 if (ephemeral)
440 /* Free once the transfer has completed. */
441 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
443 else
444 gomp_device_copy (devicep, devicep->host2dev_func,
445 "dev", d, "host", h, sz);
448 attribute_hidden void
449 gomp_copy_dev2host (struct gomp_device_descr *devicep,
450 struct goacc_asyncqueue *aq,
451 void *h, const void *d, size_t sz)
453 if (__builtin_expect (aq != NULL, 0))
454 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
455 "host", h, "dev", d, NULL, sz, aq);
456 else
457 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
460 static void
461 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
463 if (!devicep->free_func (devicep->target_id, devptr))
465 gomp_mutex_unlock (&devicep->lock);
466 gomp_fatal ("error in freeing device memory block at %p", devptr);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
476 static inline void
477 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
479 if (k == NULL
480 || k->refcount == REFCOUNT_INFINITY
481 || k->refcount == REFCOUNT_ACC_MAP_DATA)
482 return;
484 uintptr_t *refcount_ptr = &k->refcount;
486 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
487 refcount_ptr = &k->structelem_refcount;
488 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
489 refcount_ptr = k->structelem_refcount_ptr;
491 if (refcount_set)
493 if (htab_find (*refcount_set, refcount_ptr))
494 return;
495 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
496 *slot = refcount_ptr;
499 *refcount_ptr += 1;
500 return;
503 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
504 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
505 track already seen refcounts, and only adjust the value if refcount is not
506 yet contained in the set (like gomp_increment_refcount).
508 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
509 it is already zero and we know we decremented it earlier. This signals that
510 associated maps should be copied back to host.
512 *DO_REMOVE is set to true when we this is the first handling of this refcount
513 and we are setting it to zero. This signals a removal of this key from the
514 splay-tree map.
516 Copy and removal are separated due to cases like handling of structure
517 elements, e.g. each map of a structure element representing a possible copy
518 out of a structure field has to be handled individually, but we only signal
519 removal for one (the first encountered) sibing map. */
521 static inline void
522 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
523 bool *do_copy, bool *do_remove)
525 if (k == NULL
526 || k->refcount == REFCOUNT_INFINITY
527 || k->refcount == REFCOUNT_ACC_MAP_DATA)
529 *do_copy = *do_remove = false;
530 return;
533 uintptr_t *refcount_ptr = &k->refcount;
535 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
536 refcount_ptr = &k->structelem_refcount;
537 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
538 refcount_ptr = k->structelem_refcount_ptr;
540 bool new_encountered_refcount;
541 bool set_to_zero = false;
542 bool is_zero = false;
544 uintptr_t orig_refcount = *refcount_ptr;
546 if (refcount_set)
548 if (htab_find (*refcount_set, refcount_ptr))
550 new_encountered_refcount = false;
551 goto end;
554 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
555 *slot = refcount_ptr;
556 new_encountered_refcount = true;
558 else
559 /* If no refcount_set being used, assume all keys are being decremented
560 for the first time. */
561 new_encountered_refcount = true;
563 if (delete_p)
564 *refcount_ptr = 0;
565 else if (*refcount_ptr > 0)
566 *refcount_ptr -= 1;
568 end:
569 if (*refcount_ptr == 0)
571 if (orig_refcount > 0)
572 set_to_zero = true;
574 is_zero = true;
577 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
578 *do_remove = (new_encountered_refcount && set_to_zero);
581 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
582 gomp_map_0len_lookup found oldn for newn.
583 Helper function of gomp_map_vars. */
585 static inline void
586 gomp_map_vars_existing (struct gomp_device_descr *devicep,
587 struct goacc_asyncqueue *aq, splay_tree_key oldn,
588 splay_tree_key newn, struct target_var_desc *tgt_var,
589 unsigned char kind, bool always_to_flag, bool implicit,
590 struct gomp_coalesce_buf *cbuf,
591 htab_t *refcount_set)
593 assert (kind != GOMP_MAP_ATTACH
594 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
596 tgt_var->key = oldn;
597 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
598 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
599 tgt_var->is_attach = false;
600 tgt_var->offset = newn->host_start - oldn->host_start;
602 /* For implicit maps, old contained in new is valid. */
603 bool implicit_subset = (implicit
604 && newn->host_start <= oldn->host_start
605 && oldn->host_end <= newn->host_end);
606 if (implicit_subset)
607 tgt_var->length = oldn->host_end - oldn->host_start;
608 else
609 tgt_var->length = newn->host_end - newn->host_start;
611 if (GOMP_MAP_FORCE_P (kind)
612 /* For implicit maps, old contained in new is valid. */
613 || !(implicit_subset
614 /* Otherwise, new contained inside old is considered valid. */
615 || (oldn->host_start <= newn->host_start
616 && newn->host_end <= oldn->host_end)))
618 gomp_mutex_unlock (&devicep->lock);
619 gomp_fatal ("Trying to map into device [%p..%p) object when "
620 "[%p..%p) is already mapped",
621 (void *) newn->host_start, (void *) newn->host_end,
622 (void *) oldn->host_start, (void *) oldn->host_end);
625 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
627 /* Implicit + always should not happen. If this does occur, below
628 address/length adjustment is a TODO. */
629 assert (!implicit_subset);
631 if (oldn->aux && oldn->aux->attach_count)
633 /* We have to be careful not to overwrite still attached pointers
634 during the copyback to host. */
635 uintptr_t addr = newn->host_start;
636 while (addr < newn->host_end)
638 size_t i = (addr - oldn->host_start) / sizeof (void *);
639 if (oldn->aux->attach_count[i] == 0)
640 gomp_copy_host2dev (devicep, aq,
641 (void *) (oldn->tgt->tgt_start
642 + oldn->tgt_offset
643 + addr - oldn->host_start),
644 (void *) addr,
645 sizeof (void *), false, cbuf);
646 addr += sizeof (void *);
649 else
650 gomp_copy_host2dev (devicep, aq,
651 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
652 + newn->host_start - oldn->host_start),
653 (void *) newn->host_start,
654 newn->host_end - newn->host_start, false, cbuf);
657 gomp_increment_refcount (oldn, refcount_set);
660 static int
661 get_kind (bool short_mapkind, void *kinds, int idx)
663 if (!short_mapkind)
664 return ((unsigned char *) kinds)[idx];
666 int val = ((unsigned short *) kinds)[idx];
667 if (GOMP_MAP_IMPLICIT_P (val))
668 val &= ~GOMP_MAP_IMPLICIT;
669 return val;
673 static bool
674 get_implicit (bool short_mapkind, void *kinds, int idx)
676 if (!short_mapkind)
677 return false;
679 int val = ((unsigned short *) kinds)[idx];
680 return GOMP_MAP_IMPLICIT_P (val);
683 static void
684 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
685 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
686 struct gomp_coalesce_buf *cbuf,
687 bool allow_zero_length_array_sections)
689 struct gomp_device_descr *devicep = tgt->device_descr;
690 struct splay_tree_s *mem_map = &devicep->mem_map;
691 struct splay_tree_key_s cur_node;
693 cur_node.host_start = host_ptr;
694 if (cur_node.host_start == (uintptr_t) NULL)
696 cur_node.tgt_offset = (uintptr_t) NULL;
697 gomp_copy_host2dev (devicep, aq,
698 (void *) (tgt->tgt_start + target_offset),
699 (void *) &cur_node.tgt_offset, sizeof (void *),
700 true, cbuf);
701 return;
703 /* Add bias to the pointer value. */
704 cur_node.host_start += bias;
705 cur_node.host_end = cur_node.host_start;
706 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
707 if (n == NULL)
709 if (allow_zero_length_array_sections)
710 cur_node.tgt_offset = cur_node.host_start;
711 else
713 gomp_mutex_unlock (&devicep->lock);
714 gomp_fatal ("Pointer target of array section wasn't mapped");
717 else
719 cur_node.host_start -= n->host_start;
720 cur_node.tgt_offset
721 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
722 /* At this point tgt_offset is target address of the
723 array section. Now subtract bias to get what we want
724 to initialize the pointer with. */
725 cur_node.tgt_offset -= bias;
727 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
728 (void *) &cur_node.tgt_offset, sizeof (void *),
729 true, cbuf);
732 static void
733 gomp_map_fields_existing (struct target_mem_desc *tgt,
734 struct goacc_asyncqueue *aq, splay_tree_key n,
735 size_t first, size_t i, void **hostaddrs,
736 size_t *sizes, void *kinds,
737 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
739 struct gomp_device_descr *devicep = tgt->device_descr;
740 struct splay_tree_s *mem_map = &devicep->mem_map;
741 struct splay_tree_key_s cur_node;
742 int kind;
743 bool implicit;
744 const bool short_mapkind = true;
745 const int typemask = short_mapkind ? 0xff : 0x7;
747 cur_node.host_start = (uintptr_t) hostaddrs[i];
748 cur_node.host_end = cur_node.host_start + sizes[i];
749 splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
750 kind = get_kind (short_mapkind, kinds, i);
751 implicit = get_implicit (short_mapkind, kinds, i);
752 if (n2
753 && n2->tgt == n->tgt
754 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
756 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
757 kind & typemask, false, implicit, cbuf,
758 refcount_set);
759 return;
761 if (sizes[i] == 0)
763 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
765 cur_node.host_start--;
766 n2 = splay_tree_lookup (mem_map, &cur_node);
767 cur_node.host_start++;
768 if (n2
769 && n2->tgt == n->tgt
770 && n2->host_start - n->host_start
771 == n2->tgt_offset - n->tgt_offset)
773 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
774 kind & typemask, false, implicit, cbuf,
775 refcount_set);
776 return;
779 cur_node.host_end++;
780 n2 = splay_tree_lookup (mem_map, &cur_node);
781 cur_node.host_end--;
782 if (n2
783 && n2->tgt == n->tgt
784 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
786 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
787 kind & typemask, false, implicit, cbuf,
788 refcount_set);
789 return;
792 gomp_mutex_unlock (&devicep->lock);
793 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
794 "other mapped elements from the same structure weren't mapped "
795 "together with it", (void *) cur_node.host_start,
796 (void *) cur_node.host_end);
799 attribute_hidden void
800 gomp_attach_pointer (struct gomp_device_descr *devicep,
801 struct goacc_asyncqueue *aq, splay_tree mem_map,
802 splay_tree_key n, uintptr_t attach_to, size_t bias,
803 struct gomp_coalesce_buf *cbufp,
804 bool allow_zero_length_array_sections)
806 struct splay_tree_key_s s;
807 size_t size, idx;
809 if (n == NULL)
811 gomp_mutex_unlock (&devicep->lock);
812 gomp_fatal ("enclosing struct not mapped for attach");
815 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
816 /* We might have a pointer in a packed struct: however we cannot have more
817 than one such pointer in each pointer-sized portion of the struct, so
818 this is safe. */
819 idx = (attach_to - n->host_start) / sizeof (void *);
821 if (!n->aux)
822 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
824 if (!n->aux->attach_count)
825 n->aux->attach_count
826 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
828 if (n->aux->attach_count[idx] < UINTPTR_MAX)
829 n->aux->attach_count[idx]++;
830 else
832 gomp_mutex_unlock (&devicep->lock);
833 gomp_fatal ("attach count overflow");
836 if (n->aux->attach_count[idx] == 1)
838 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
839 - n->host_start;
840 uintptr_t target = (uintptr_t) *(void **) attach_to;
841 splay_tree_key tn;
842 uintptr_t data;
844 if ((void *) target == NULL)
846 /* As a special case, allow attaching NULL host pointers. This
847 allows e.g. unassociated Fortran pointers to be mapped
848 properly. */
849 data = 0;
851 gomp_debug (1,
852 "%s: attaching NULL host pointer, target %p "
853 "(struct base %p)\n", __FUNCTION__, (void *) devptr,
854 (void *) (n->tgt->tgt_start + n->tgt_offset));
856 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
857 sizeof (void *), true, cbufp);
859 return;
862 s.host_start = target + bias;
863 s.host_end = s.host_start + 1;
864 tn = splay_tree_lookup (mem_map, &s);
866 if (!tn)
868 if (allow_zero_length_array_sections)
869 /* When allowing attachment to zero-length array sections, we
870 copy the host pointer when the target region is not mapped. */
871 data = target;
872 else
874 gomp_mutex_unlock (&devicep->lock);
875 gomp_fatal ("pointer target not mapped for attach");
878 else
879 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
881 gomp_debug (1,
882 "%s: attaching host %p, target %p (struct base %p) to %p\n",
883 __FUNCTION__, (void *) attach_to, (void *) devptr,
884 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
886 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
887 sizeof (void *), true, cbufp);
889 else
890 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
891 (void *) attach_to, (int) n->aux->attach_count[idx]);
894 attribute_hidden void
895 gomp_detach_pointer (struct gomp_device_descr *devicep,
896 struct goacc_asyncqueue *aq, splay_tree_key n,
897 uintptr_t detach_from, bool finalize,
898 struct gomp_coalesce_buf *cbufp)
900 size_t idx;
902 if (n == NULL)
904 gomp_mutex_unlock (&devicep->lock);
905 gomp_fatal ("enclosing struct not mapped for detach");
908 idx = (detach_from - n->host_start) / sizeof (void *);
910 if (!n->aux || !n->aux->attach_count)
912 gomp_mutex_unlock (&devicep->lock);
913 gomp_fatal ("no attachment counters for struct");
916 if (finalize)
917 n->aux->attach_count[idx] = 1;
919 if (n->aux->attach_count[idx] == 0)
921 gomp_mutex_unlock (&devicep->lock);
922 gomp_fatal ("attach count underflow");
924 else
925 n->aux->attach_count[idx]--;
927 if (n->aux->attach_count[idx] == 0)
929 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
930 - n->host_start;
931 uintptr_t target = (uintptr_t) *(void **) detach_from;
933 gomp_debug (1,
934 "%s: detaching host %p, target %p (struct base %p) to %p\n",
935 __FUNCTION__, (void *) detach_from, (void *) devptr,
936 (void *) (n->tgt->tgt_start + n->tgt_offset),
937 (void *) target);
939 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
940 sizeof (void *), true, cbufp);
942 else
943 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
944 (void *) detach_from, (int) n->aux->attach_count[idx]);
947 attribute_hidden uintptr_t
948 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
950 if (tgt->list[i].key != NULL)
951 return tgt->list[i].key->tgt->tgt_start
952 + tgt->list[i].key->tgt_offset
953 + tgt->list[i].offset;
955 switch (tgt->list[i].offset)
957 case OFFSET_INLINED:
958 return (uintptr_t) hostaddrs[i];
960 case OFFSET_POINTER:
961 return 0;
963 case OFFSET_STRUCT:
964 return tgt->list[i + 1].key->tgt->tgt_start
965 + tgt->list[i + 1].key->tgt_offset
966 + tgt->list[i + 1].offset
967 + (uintptr_t) hostaddrs[i]
968 - (uintptr_t) hostaddrs[i + 1];
970 default:
971 return tgt->tgt_start + tgt->list[i].offset;
975 static inline __attribute__((always_inline)) struct target_mem_desc *
976 gomp_map_vars_internal (struct gomp_device_descr *devicep,
977 struct goacc_asyncqueue *aq, size_t mapnum,
978 void **hostaddrs, void **devaddrs, size_t *sizes,
979 void *kinds, bool short_mapkind,
980 htab_t *refcount_set,
981 enum gomp_map_vars_kind pragma_kind)
983 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
984 bool has_firstprivate = false;
985 bool has_always_ptrset = false;
986 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
987 const int rshift = short_mapkind ? 8 : 3;
988 const int typemask = short_mapkind ? 0xff : 0x7;
989 struct splay_tree_s *mem_map = &devicep->mem_map;
990 struct splay_tree_key_s cur_node;
991 struct target_mem_desc *tgt
992 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
993 tgt->list_count = mapnum;
994 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
995 tgt->device_descr = devicep;
996 tgt->prev = NULL;
997 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
999 if (mapnum == 0)
1001 tgt->tgt_start = 0;
1002 tgt->tgt_end = 0;
1003 return tgt;
1006 tgt_align = sizeof (void *);
1007 tgt_size = 0;
1008 cbuf.chunks = NULL;
1009 cbuf.chunk_cnt = -1;
1010 cbuf.use_cnt = 0;
1011 cbuf.buf = NULL;
1012 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1014 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1015 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1016 cbuf.chunk_cnt = 0;
1018 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1020 size_t align = 4 * sizeof (void *);
1021 tgt_align = align;
1022 tgt_size = mapnum * sizeof (void *);
1023 cbuf.chunk_cnt = 1;
1024 cbuf.use_cnt = 1 + (mapnum > 1);
1025 cbuf.chunks[0].start = 0;
1026 cbuf.chunks[0].end = tgt_size;
1029 gomp_mutex_lock (&devicep->lock);
1030 if (devicep->state == GOMP_DEVICE_FINALIZED)
1032 gomp_mutex_unlock (&devicep->lock);
1033 free (tgt);
1034 return NULL;
1037 for (i = 0; i < mapnum; i++)
1039 int kind = get_kind (short_mapkind, kinds, i);
1040 bool implicit = get_implicit (short_mapkind, kinds, i);
1041 if (hostaddrs[i] == NULL
1042 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1044 tgt->list[i].key = NULL;
1045 tgt->list[i].offset = OFFSET_INLINED;
1046 continue;
1048 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1049 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1051 tgt->list[i].key = NULL;
1052 if (!not_found_cnt)
1054 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1055 on a separate construct prior to using use_device_{addr,ptr}.
1056 In OpenMP 5.0, map directives need to be ordered by the
1057 middle-end before the use_device_* clauses. If
1058 !not_found_cnt, all mappings requested (if any) are already
1059 mapped, so use_device_{addr,ptr} can be resolved right away.
1060 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1061 now but would succeed after performing the mappings in the
1062 following loop. We can't defer this always to the second
1063 loop, because it is not even invoked when !not_found_cnt
1064 after the first loop. */
1065 cur_node.host_start = (uintptr_t) hostaddrs[i];
1066 cur_node.host_end = cur_node.host_start;
1067 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1068 if (n != NULL)
1070 cur_node.host_start -= n->host_start;
1071 hostaddrs[i]
1072 = (void *) (n->tgt->tgt_start + n->tgt_offset
1073 + cur_node.host_start);
1075 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1077 gomp_mutex_unlock (&devicep->lock);
1078 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1080 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1081 /* If not present, continue using the host address. */
1083 else
1084 __builtin_unreachable ();
1085 tgt->list[i].offset = OFFSET_INLINED;
1087 else
1088 tgt->list[i].offset = 0;
1089 continue;
1091 else if ((kind & typemask) == GOMP_MAP_STRUCT
1092 || (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
1094 size_t first = i + 1;
1095 size_t last = i + sizes[i];
1096 cur_node.host_start = (uintptr_t) hostaddrs[i];
1097 cur_node.host_end = (uintptr_t) hostaddrs[last]
1098 + sizes[last];
1099 tgt->list[i].key = NULL;
1100 tgt->list[i].offset = OFFSET_STRUCT;
1101 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1102 if (n == NULL)
1104 size_t align = (size_t) 1 << (kind >> rshift);
1105 if (tgt_align < align)
1106 tgt_align = align;
1107 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1108 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1109 tgt_size += cur_node.host_end - cur_node.host_start;
1110 not_found_cnt += last - i;
1111 for (i = first; i <= last; i++)
1113 tgt->list[i].key = NULL;
1114 if (!aq
1115 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1116 & typemask)
1117 && sizes[i] != 0)
1118 gomp_coalesce_buf_add (&cbuf,
1119 tgt_size - cur_node.host_end
1120 + (uintptr_t) hostaddrs[i],
1121 sizes[i]);
1123 i--;
1124 continue;
1126 for (i = first; i <= last; i++)
1127 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1128 sizes, kinds, NULL, refcount_set);
1129 i--;
1130 continue;
1132 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1134 tgt->list[i].key = NULL;
1135 tgt->list[i].offset = OFFSET_POINTER;
1136 has_firstprivate = true;
1137 continue;
1139 else if ((kind & typemask) == GOMP_MAP_ATTACH
1140 || ((kind & typemask)
1141 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1143 tgt->list[i].key = NULL;
1144 has_firstprivate = true;
1145 continue;
1147 cur_node.host_start = (uintptr_t) hostaddrs[i];
1148 if (!GOMP_MAP_POINTER_P (kind & typemask))
1149 cur_node.host_end = cur_node.host_start + sizes[i];
1150 else
1151 cur_node.host_end = cur_node.host_start + sizeof (void *);
1152 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1154 tgt->list[i].key = NULL;
1156 size_t align = (size_t) 1 << (kind >> rshift);
1157 if (tgt_align < align)
1158 tgt_align = align;
1159 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1160 if (!aq)
1161 gomp_coalesce_buf_add (&cbuf, tgt_size,
1162 cur_node.host_end - cur_node.host_start);
1163 tgt_size += cur_node.host_end - cur_node.host_start;
1164 has_firstprivate = true;
1165 continue;
1167 splay_tree_key n;
1168 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1170 n = gomp_map_0len_lookup (mem_map, &cur_node);
1171 if (!n)
1173 tgt->list[i].key = NULL;
1174 tgt->list[i].offset = OFFSET_INLINED;
1175 continue;
1178 else
1179 n = splay_tree_lookup (mem_map, &cur_node);
1180 if (n && n->refcount != REFCOUNT_LINK)
1182 int always_to_cnt = 0;
1183 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1185 bool has_nullptr = false;
1186 size_t j;
1187 for (j = 0; j < n->tgt->list_count; j++)
1188 if (n->tgt->list[j].key == n)
1190 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1191 break;
1193 if (n->tgt->list_count == 0)
1195 /* 'declare target'; assume has_nullptr; it could also be
1196 statically assigned pointer, but that it should be to
1197 the equivalent variable on the host. */
1198 assert (n->refcount == REFCOUNT_INFINITY);
1199 has_nullptr = true;
1201 else
1202 assert (j < n->tgt->list_count);
1203 /* Re-map the data if there is an 'always' modifier or if it a
1204 null pointer was there and non a nonnull has been found; that
1205 permits transparent re-mapping for Fortran array descriptors
1206 which were previously mapped unallocated. */
1207 for (j = i + 1; j < mapnum; j++)
1209 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1210 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1211 && (!has_nullptr
1212 || !GOMP_MAP_POINTER_P (ptr_kind)
1213 || *(void **) hostaddrs[j] == NULL))
1214 break;
1215 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1216 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1217 > cur_node.host_end))
1218 break;
1219 else
1221 has_always_ptrset = true;
1222 ++always_to_cnt;
1226 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1227 kind & typemask, always_to_cnt > 0, implicit,
1228 NULL, refcount_set);
1229 i += always_to_cnt;
1231 else
1233 tgt->list[i].key = NULL;
1235 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1237 /* Not present, hence, skip entry - including its MAP_POINTER,
1238 when existing. */
1239 tgt->list[i].offset = OFFSET_INLINED;
1240 if (i + 1 < mapnum
1241 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1242 == GOMP_MAP_POINTER))
1244 ++i;
1245 tgt->list[i].key = NULL;
1246 tgt->list[i].offset = 0;
1248 continue;
1250 size_t align = (size_t) 1 << (kind >> rshift);
1251 not_found_cnt++;
1252 if (tgt_align < align)
1253 tgt_align = align;
1254 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1255 if (!aq
1256 && gomp_to_device_kind_p (kind & typemask))
1257 gomp_coalesce_buf_add (&cbuf, tgt_size,
1258 cur_node.host_end - cur_node.host_start);
1259 tgt_size += cur_node.host_end - cur_node.host_start;
1260 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1262 size_t j;
1263 int kind;
1264 for (j = i + 1; j < mapnum; j++)
1265 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1266 kinds, j)) & typemask))
1267 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1268 break;
1269 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1270 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1271 > cur_node.host_end))
1272 break;
1273 else
1275 tgt->list[j].key = NULL;
1276 i++;
1282 if (devaddrs)
1284 if (mapnum != 1)
1286 gomp_mutex_unlock (&devicep->lock);
1287 gomp_fatal ("unexpected aggregation");
1289 tgt->to_free = devaddrs[0];
1290 tgt->tgt_start = (uintptr_t) tgt->to_free;
1291 tgt->tgt_end = tgt->tgt_start + sizes[0];
1293 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1295 /* Allocate tgt_align aligned tgt_size block of memory. */
1296 /* FIXME: Perhaps change interface to allocate properly aligned
1297 memory. */
1298 tgt->to_free = devicep->alloc_func (devicep->target_id,
1299 tgt_size + tgt_align - 1);
1300 if (!tgt->to_free)
1302 gomp_mutex_unlock (&devicep->lock);
1303 gomp_fatal ("device memory allocation fail");
1306 tgt->tgt_start = (uintptr_t) tgt->to_free;
1307 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1308 tgt->tgt_end = tgt->tgt_start + tgt_size;
1310 if (cbuf.use_cnt == 1)
1311 cbuf.chunk_cnt--;
1312 if (cbuf.chunk_cnt > 0)
1314 cbuf.buf
1315 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1316 if (cbuf.buf)
1318 cbuf.tgt = tgt;
1319 cbufp = &cbuf;
1323 else
1325 tgt->to_free = NULL;
1326 tgt->tgt_start = 0;
1327 tgt->tgt_end = 0;
1330 tgt_size = 0;
1331 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1332 tgt_size = mapnum * sizeof (void *);
1334 tgt->array = NULL;
1335 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1337 if (not_found_cnt)
1338 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1339 splay_tree_node array = tgt->array;
1340 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1341 uintptr_t field_tgt_base = 0;
1342 splay_tree_key field_tgt_structelem_first = NULL;
1344 for (i = 0; i < mapnum; i++)
1345 if (has_always_ptrset
1346 && tgt->list[i].key
1347 && (get_kind (short_mapkind, kinds, i) & typemask)
1348 == GOMP_MAP_TO_PSET)
1350 splay_tree_key k = tgt->list[i].key;
1351 bool has_nullptr = false;
1352 size_t j;
1353 for (j = 0; j < k->tgt->list_count; j++)
1354 if (k->tgt->list[j].key == k)
1356 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1357 break;
1359 if (k->tgt->list_count == 0)
1360 has_nullptr = true;
1361 else
1362 assert (j < k->tgt->list_count);
1364 tgt->list[i].has_null_ptr_assoc = false;
1365 for (j = i + 1; j < mapnum; j++)
1367 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1368 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1369 && (!has_nullptr
1370 || !GOMP_MAP_POINTER_P (ptr_kind)
1371 || *(void **) hostaddrs[j] == NULL))
1372 break;
1373 else if ((uintptr_t) hostaddrs[j] < k->host_start
1374 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1375 > k->host_end))
1376 break;
1377 else
1379 if (*(void **) hostaddrs[j] == NULL)
1380 tgt->list[i].has_null_ptr_assoc = true;
1381 tgt->list[j].key = k;
1382 tgt->list[j].copy_from = false;
1383 tgt->list[j].always_copy_from = false;
1384 tgt->list[j].is_attach = false;
1385 gomp_increment_refcount (k, refcount_set);
1386 gomp_map_pointer (k->tgt, aq,
1387 (uintptr_t) *(void **) hostaddrs[j],
1388 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1389 - k->host_start),
1390 sizes[j], cbufp, false);
1393 i = j - 1;
1395 else if (tgt->list[i].key == NULL)
1397 int kind = get_kind (short_mapkind, kinds, i);
1398 bool implicit = get_implicit (short_mapkind, kinds, i);
1399 if (hostaddrs[i] == NULL)
1400 continue;
1401 switch (kind & typemask)
1403 size_t align, len, first, last;
1404 splay_tree_key n;
1405 case GOMP_MAP_FIRSTPRIVATE:
1406 align = (size_t) 1 << (kind >> rshift);
1407 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1408 tgt->list[i].offset = tgt_size;
1409 len = sizes[i];
1410 gomp_copy_host2dev (devicep, aq,
1411 (void *) (tgt->tgt_start + tgt_size),
1412 (void *) hostaddrs[i], len, false, cbufp);
1413 /* Save device address in hostaddr to permit latter availablity
1414 when doing a deep-firstprivate with pointer attach. */
1415 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1416 tgt_size += len;
1418 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1419 firstprivate to hostaddrs[i+1], which is assumed to contain a
1420 device address. */
1421 if (i + 1 < mapnum
1422 && (GOMP_MAP_ATTACH
1423 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1425 uintptr_t target = (uintptr_t) hostaddrs[i];
1426 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1427 /* Per
1428 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1429 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1430 this probably needs revision for 'aq' usage. */
1431 assert (!aq);
1432 gomp_copy_host2dev (devicep, aq, devptr, &target,
1433 sizeof (void *), false, cbufp);
1434 ++i;
1436 continue;
1437 case GOMP_MAP_FIRSTPRIVATE_INT:
1438 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1439 continue;
1440 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1441 /* The OpenACC 'host_data' construct only allows 'use_device'
1442 "mapping" clauses, so in the first loop, 'not_found_cnt'
1443 must always have been zero, so all OpenACC 'use_device'
1444 clauses have already been handled. (We can only easily test
1445 'use_device' with 'if_present' clause here.) */
1446 assert (tgt->list[i].offset == OFFSET_INLINED);
1447 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1448 code conceptually simple, similar to the first loop. */
1449 case GOMP_MAP_USE_DEVICE_PTR:
1450 if (tgt->list[i].offset == 0)
1452 cur_node.host_start = (uintptr_t) hostaddrs[i];
1453 cur_node.host_end = cur_node.host_start;
1454 n = gomp_map_lookup (mem_map, &cur_node);
1455 if (n != NULL)
1457 cur_node.host_start -= n->host_start;
1458 hostaddrs[i]
1459 = (void *) (n->tgt->tgt_start + n->tgt_offset
1460 + cur_node.host_start);
1462 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1464 gomp_mutex_unlock (&devicep->lock);
1465 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1467 else if ((kind & typemask)
1468 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1469 /* If not present, continue using the host address. */
1471 else
1472 __builtin_unreachable ();
1473 tgt->list[i].offset = OFFSET_INLINED;
1475 continue;
1476 case GOMP_MAP_STRUCT_UNORD:
1477 if (sizes[i] > 1)
1479 void *first = hostaddrs[i + 1];
1480 for (size_t j = i + 1; j < i + sizes[i]; j++)
1481 if (hostaddrs[j + 1] != first)
1483 gomp_mutex_unlock (&devicep->lock);
1484 gomp_fatal ("Mapped array elements must be the "
1485 "same (%p vs %p)", first,
1486 hostaddrs[j + 1]);
1489 /* Fallthrough. */
1490 case GOMP_MAP_STRUCT:
1491 first = i + 1;
1492 last = i + sizes[i];
1493 cur_node.host_start = (uintptr_t) hostaddrs[i];
1494 cur_node.host_end = (uintptr_t) hostaddrs[last]
1495 + sizes[last];
1496 if (tgt->list[first].key != NULL)
1497 continue;
1498 if (sizes[last] == 0)
1499 cur_node.host_end++;
1500 n = splay_tree_lookup (mem_map, &cur_node);
1501 if (sizes[last] == 0)
1502 cur_node.host_end--;
1503 if (n == NULL && cur_node.host_start == cur_node.host_end)
1505 gomp_mutex_unlock (&devicep->lock);
1506 gomp_fatal ("Struct pointer member not mapped (%p)",
1507 (void*) hostaddrs[first]);
1509 if (n == NULL)
1511 size_t align = (size_t) 1 << (kind >> rshift);
1512 tgt_size -= (uintptr_t) hostaddrs[first]
1513 - (uintptr_t) hostaddrs[i];
1514 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1515 tgt_size += (uintptr_t) hostaddrs[first]
1516 - (uintptr_t) hostaddrs[i];
1517 field_tgt_base = (uintptr_t) hostaddrs[first];
1518 field_tgt_offset = tgt_size;
1519 field_tgt_clear = last;
1520 field_tgt_structelem_first = NULL;
1521 tgt_size += cur_node.host_end
1522 - (uintptr_t) hostaddrs[first];
1523 continue;
1525 for (i = first; i <= last; i++)
1526 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1527 sizes, kinds, cbufp, refcount_set);
1528 i--;
1529 continue;
1530 case GOMP_MAP_ALWAYS_POINTER:
1531 cur_node.host_start = (uintptr_t) hostaddrs[i];
1532 cur_node.host_end = cur_node.host_start + sizeof (void *);
1533 n = splay_tree_lookup (mem_map, &cur_node);
1534 if (n == NULL
1535 || n->host_start > cur_node.host_start
1536 || n->host_end < cur_node.host_end)
1538 gomp_mutex_unlock (&devicep->lock);
1539 gomp_fatal ("always pointer not mapped");
1541 if (i > 0
1542 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1543 != GOMP_MAP_ALWAYS_POINTER))
1544 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1545 if (cur_node.tgt_offset)
1546 cur_node.tgt_offset -= sizes[i];
1547 gomp_copy_host2dev (devicep, aq,
1548 (void *) (n->tgt->tgt_start
1549 + n->tgt_offset
1550 + cur_node.host_start
1551 - n->host_start),
1552 (void *) &cur_node.tgt_offset,
1553 sizeof (void *), true, cbufp);
1554 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1555 + cur_node.host_start - n->host_start;
1556 continue;
1557 case GOMP_MAP_IF_PRESENT:
1558 /* Not present - otherwise handled above. Skip over its
1559 MAP_POINTER as well. */
1560 if (i + 1 < mapnum
1561 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1562 == GOMP_MAP_POINTER))
1563 ++i;
1564 continue;
1565 case GOMP_MAP_ATTACH:
1566 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1568 cur_node.host_start = (uintptr_t) hostaddrs[i];
1569 cur_node.host_end = cur_node.host_start + sizeof (void *);
1570 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1571 if (n != NULL)
1573 tgt->list[i].key = n;
1574 tgt->list[i].offset = cur_node.host_start - n->host_start;
1575 tgt->list[i].length = n->host_end - n->host_start;
1576 tgt->list[i].copy_from = false;
1577 tgt->list[i].always_copy_from = false;
1578 tgt->list[i].is_attach = true;
1579 /* OpenACC 'attach'/'detach' doesn't affect
1580 structured/dynamic reference counts ('n->refcount',
1581 'n->dynamic_refcount'). */
1583 bool zlas
1584 = ((kind & typemask)
1585 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1586 gomp_attach_pointer (devicep, aq, mem_map, n,
1587 (uintptr_t) hostaddrs[i], sizes[i],
1588 cbufp, zlas);
1590 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1592 gomp_mutex_unlock (&devicep->lock);
1593 gomp_fatal ("outer struct not mapped for attach");
1595 continue;
1597 default:
1598 break;
1600 splay_tree_key k = &array->key;
1601 k->host_start = (uintptr_t) hostaddrs[i];
1602 if (!GOMP_MAP_POINTER_P (kind & typemask))
1603 k->host_end = k->host_start + sizes[i];
1604 else
1605 k->host_end = k->host_start + sizeof (void *);
1606 splay_tree_key n = splay_tree_lookup (mem_map, k);
1607 if (n && n->refcount != REFCOUNT_LINK)
1609 if (field_tgt_clear != FIELD_TGT_EMPTY)
1611 /* For this condition to be true, there must be a
1612 duplicate struct element mapping. This can happen with
1613 GOMP_MAP_STRUCT_UNORD mappings, for example. */
1614 tgt->list[i].key = n;
1615 if (openmp_p)
1617 assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
1618 assert (field_tgt_structelem_first != NULL);
1620 if (i == field_tgt_clear)
1622 n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1623 field_tgt_structelem_first = NULL;
1626 if (i == field_tgt_clear)
1627 field_tgt_clear = FIELD_TGT_EMPTY;
1628 gomp_increment_refcount (n, refcount_set);
1629 tgt->list[i].copy_from
1630 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1631 tgt->list[i].always_copy_from
1632 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1633 tgt->list[i].is_attach = false;
1634 tgt->list[i].offset = 0;
1635 tgt->list[i].length = k->host_end - k->host_start;
1637 else
1638 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1639 kind & typemask, false, implicit,
1640 cbufp, refcount_set);
1642 else
1644 k->aux = NULL;
1645 if (n && n->refcount == REFCOUNT_LINK)
1647 /* Replace target address of the pointer with target address
1648 of mapped object in the splay tree. */
1649 splay_tree_remove (mem_map, n);
1650 k->aux
1651 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1652 k->aux->link_key = n;
1654 size_t align = (size_t) 1 << (kind >> rshift);
1655 tgt->list[i].key = k;
1656 k->tgt = tgt;
1657 k->refcount = 0;
1658 k->dynamic_refcount = 0;
1659 if (field_tgt_clear != FIELD_TGT_EMPTY)
1661 k->tgt_offset = k->host_start - field_tgt_base
1662 + field_tgt_offset;
1663 if (openmp_p)
1665 k->refcount = REFCOUNT_STRUCTELEM;
1666 if (field_tgt_structelem_first == NULL)
1668 /* Set to first structure element of sequence. */
1669 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1670 field_tgt_structelem_first = k;
1672 else
1673 /* Point to refcount of leading element, but do not
1674 increment again. */
1675 k->structelem_refcount_ptr
1676 = &field_tgt_structelem_first->structelem_refcount;
1678 if (i == field_tgt_clear)
1680 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1681 field_tgt_structelem_first = NULL;
1684 if (i == field_tgt_clear)
1685 field_tgt_clear = FIELD_TGT_EMPTY;
1687 else
1689 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1690 k->tgt_offset = tgt_size;
1691 tgt_size += k->host_end - k->host_start;
1693 /* First increment, from 0 to 1. gomp_increment_refcount
1694 encapsulates the different increment cases, so use this
1695 instead of directly setting 1 during initialization. */
1696 gomp_increment_refcount (k, refcount_set);
1698 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1699 tgt->list[i].always_copy_from
1700 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1701 tgt->list[i].is_attach = false;
1702 tgt->list[i].offset = 0;
1703 tgt->list[i].length = k->host_end - k->host_start;
1704 tgt->refcount++;
1705 array->left = NULL;
1706 array->right = NULL;
1707 splay_tree_insert (mem_map, array);
1708 switch (kind & typemask)
1710 case GOMP_MAP_ALLOC:
1711 case GOMP_MAP_FROM:
1712 case GOMP_MAP_FORCE_ALLOC:
1713 case GOMP_MAP_FORCE_FROM:
1714 case GOMP_MAP_ALWAYS_FROM:
1715 break;
1716 case GOMP_MAP_TO:
1717 case GOMP_MAP_TOFROM:
1718 case GOMP_MAP_FORCE_TO:
1719 case GOMP_MAP_FORCE_TOFROM:
1720 case GOMP_MAP_ALWAYS_TO:
1721 case GOMP_MAP_ALWAYS_TOFROM:
1722 gomp_copy_host2dev (devicep, aq,
1723 (void *) (tgt->tgt_start
1724 + k->tgt_offset),
1725 (void *) k->host_start,
1726 k->host_end - k->host_start,
1727 false, cbufp);
1728 break;
1729 case GOMP_MAP_POINTER:
1730 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1731 gomp_map_pointer
1732 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1733 k->tgt_offset, sizes[i], cbufp,
1734 ((kind & typemask)
1735 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1736 break;
1737 case GOMP_MAP_TO_PSET:
1738 gomp_copy_host2dev (devicep, aq,
1739 (void *) (tgt->tgt_start
1740 + k->tgt_offset),
1741 (void *) k->host_start,
1742 k->host_end - k->host_start,
1743 false, cbufp);
1744 tgt->list[i].has_null_ptr_assoc = false;
1746 for (j = i + 1; j < mapnum; j++)
1748 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1749 & typemask);
1750 if (!GOMP_MAP_POINTER_P (ptr_kind)
1751 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1752 break;
1753 else if ((uintptr_t) hostaddrs[j] < k->host_start
1754 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1755 > k->host_end))
1756 break;
1757 else
1759 tgt->list[j].key = k;
1760 tgt->list[j].copy_from = false;
1761 tgt->list[j].always_copy_from = false;
1762 tgt->list[j].is_attach = false;
1763 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1764 /* For OpenMP, the use of refcount_sets causes
1765 errors if we set k->refcount = 1 above but also
1766 increment it again here, for decrementing will
1767 not properly match, since we decrement only once
1768 for each key's refcount. Therefore avoid this
1769 increment for OpenMP constructs. */
1770 if (!openmp_p)
1771 gomp_increment_refcount (k, refcount_set);
1772 gomp_map_pointer (tgt, aq,
1773 (uintptr_t) *(void **) hostaddrs[j],
1774 k->tgt_offset
1775 + ((uintptr_t) hostaddrs[j]
1776 - k->host_start),
1777 sizes[j], cbufp, false);
1780 i = j - 1;
1781 break;
1782 case GOMP_MAP_FORCE_PRESENT:
1783 case GOMP_MAP_ALWAYS_PRESENT_TO:
1784 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1785 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1787 /* We already looked up the memory region above and it
1788 was missing. */
1789 size_t size = k->host_end - k->host_start;
1790 gomp_mutex_unlock (&devicep->lock);
1791 #ifdef HAVE_INTTYPES_H
1792 gomp_fatal ("present clause: not present on the device "
1793 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1794 "dev: %d)", (void *) k->host_start,
1795 (uint64_t) size, (uint64_t) size,
1796 devicep->target_id);
1797 #else
1798 gomp_fatal ("present clause: not present on the device "
1799 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1800 (void *) k->host_start,
1801 (unsigned long) size, (unsigned long) size,
1802 devicep->target_id);
1803 #endif
1805 break;
1806 case GOMP_MAP_FORCE_DEVICEPTR:
1807 assert (k->host_end - k->host_start == sizeof (void *));
1808 gomp_copy_host2dev (devicep, aq,
1809 (void *) (tgt->tgt_start
1810 + k->tgt_offset),
1811 (void *) k->host_start,
1812 sizeof (void *), false, cbufp);
1813 break;
1814 default:
1815 gomp_mutex_unlock (&devicep->lock);
1816 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1817 kind);
1820 if (k->aux && k->aux->link_key)
1822 /* Set link pointer on target to the device address of the
1823 mapped object. */
1824 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1825 /* We intentionally do not use coalescing here, as it's not
1826 data allocated by the current call to this function. */
1827 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1828 &tgt_addr, sizeof (void *), true, NULL);
1830 array++;
1835 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1837 for (i = 0; i < mapnum; i++)
1839 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1840 gomp_copy_host2dev (devicep, aq,
1841 (void *) (tgt->tgt_start + i * sizeof (void *)),
1842 (void *) &cur_node.tgt_offset, sizeof (void *),
1843 true, cbufp);
1847 if (cbufp)
1849 long c = 0;
1850 for (c = 0; c < cbuf.chunk_cnt; ++c)
1851 gomp_copy_host2dev (devicep, aq,
1852 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1853 (char *) cbuf.buf + (cbuf.chunks[c].start
1854 - cbuf.chunks[0].start),
1855 cbuf.chunks[c].end - cbuf.chunks[c].start,
1856 false, NULL);
1857 if (aq)
1858 /* Free once the transfer has completed. */
1859 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1860 else
1861 free (cbuf.buf);
1862 cbuf.buf = NULL;
1863 cbufp = NULL;
1866 /* If the variable from "omp target enter data" map-list was already mapped,
1867 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1868 gomp_exit_data. */
1869 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1871 free (tgt);
1872 tgt = NULL;
1875 gomp_mutex_unlock (&devicep->lock);
1876 return tgt;
1879 static struct target_mem_desc *
1880 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1881 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1882 bool short_mapkind, htab_t *refcount_set,
1883 enum gomp_map_vars_kind pragma_kind)
1885 /* This management of a local refcount_set is for convenience of callers
1886 who do not share a refcount_set over multiple map/unmap uses. */
1887 htab_t local_refcount_set = NULL;
1888 if (refcount_set == NULL)
1890 local_refcount_set = htab_create (mapnum);
1891 refcount_set = &local_refcount_set;
1894 struct target_mem_desc *tgt;
1895 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1896 sizes, kinds, short_mapkind, refcount_set,
1897 pragma_kind);
1898 if (local_refcount_set)
1899 htab_free (local_refcount_set);
1901 return tgt;
1904 attribute_hidden struct target_mem_desc *
1905 goacc_map_vars (struct gomp_device_descr *devicep,
1906 struct goacc_asyncqueue *aq, size_t mapnum,
1907 void **hostaddrs, void **devaddrs, size_t *sizes,
1908 void *kinds, bool short_mapkind,
1909 enum gomp_map_vars_kind pragma_kind)
1911 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1912 sizes, kinds, short_mapkind, NULL,
1913 GOMP_MAP_VARS_OPENACC | pragma_kind);
1916 static void
1917 gomp_unmap_tgt (struct target_mem_desc *tgt)
1919 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1920 if (tgt->tgt_end)
1921 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1923 free (tgt->array);
1924 free (tgt);
1927 static bool
1928 gomp_unref_tgt (void *ptr)
1930 bool is_tgt_unmapped = false;
1932 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1934 if (tgt->refcount > 1)
1935 tgt->refcount--;
1936 else
1938 gomp_unmap_tgt (tgt);
1939 is_tgt_unmapped = true;
1942 return is_tgt_unmapped;
1945 static void
1946 gomp_unref_tgt_void (void *ptr)
1948 (void) gomp_unref_tgt (ptr);
1951 static void
1952 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1954 splay_tree_remove (sp, k);
1955 if (k->aux)
1957 if (k->aux->link_key)
1958 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1959 if (k->aux->attach_count)
1960 free (k->aux->attach_count);
1961 free (k->aux);
1962 k->aux = NULL;
1966 static inline __attribute__((always_inline)) bool
1967 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1968 struct goacc_asyncqueue *aq)
1970 bool is_tgt_unmapped = false;
1972 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1974 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1975 /* Infer the splay_tree_key of the first structelem key using the
1976 pointer to the first structleme_refcount. */
1977 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1978 - offsetof (struct splay_tree_key_s,
1979 structelem_refcount));
1980 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1982 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1983 with the splay_tree_keys embedded inside. */
1984 splay_tree_node node =
1985 (splay_tree_node) ((char *) k
1986 - offsetof (struct splay_tree_node_s, key));
1987 while (true)
1989 /* Starting from the _FIRST key, and continue for all following
1990 sibling keys. */
1991 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1992 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1993 break;
1994 else
1995 k = &(++node)->key;
1998 else
1999 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2001 if (aq)
2002 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2003 (void *) k->tgt);
2004 else
2005 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2006 return is_tgt_unmapped;
2009 attribute_hidden bool
2010 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2012 return gomp_remove_var_internal (devicep, k, NULL);
2015 /* Remove a variable asynchronously. This actually removes the variable
2016 mapping immediately, but retains the linked target_mem_desc until the
2017 asynchronous operation has completed (as it may still refer to target
2018 memory). The device lock must be held before entry, and remains locked on
2019 exit. */
2021 attribute_hidden void
2022 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2023 struct goacc_asyncqueue *aq)
2025 (void) gomp_remove_var_internal (devicep, k, aq);
2028 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2029 variables back from device to host: if it is false, it is assumed that this
2030 has been done already. */
2032 static inline __attribute__((always_inline)) void
2033 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2034 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2036 struct gomp_device_descr *devicep = tgt->device_descr;
2038 if (tgt->list_count == 0)
2040 free (tgt);
2041 return;
2044 gomp_mutex_lock (&devicep->lock);
2045 if (devicep->state == GOMP_DEVICE_FINALIZED)
2047 gomp_mutex_unlock (&devicep->lock);
2048 free (tgt->array);
2049 free (tgt);
2050 return;
2053 size_t i;
2055 /* We must perform detachments before any copies back to the host. */
2056 for (i = 0; i < tgt->list_count; i++)
2058 splay_tree_key k = tgt->list[i].key;
2060 if (k != NULL && tgt->list[i].is_attach)
2061 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2062 + tgt->list[i].offset,
2063 false, NULL);
2066 for (i = 0; i < tgt->list_count; i++)
2068 splay_tree_key k = tgt->list[i].key;
2069 if (k == NULL)
2070 continue;
2072 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2073 counts ('n->refcount', 'n->dynamic_refcount'). */
2074 if (tgt->list[i].is_attach)
2075 continue;
2077 bool do_copy, do_remove;
2078 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2080 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2081 || tgt->list[i].always_copy_from)
2082 gomp_copy_dev2host (devicep, aq,
2083 (void *) (k->host_start + tgt->list[i].offset),
2084 (void *) (k->tgt->tgt_start + k->tgt_offset
2085 + tgt->list[i].offset),
2086 tgt->list[i].length);
2087 if (do_remove)
2089 struct target_mem_desc *k_tgt = k->tgt;
2090 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2091 /* It would be bad if TGT got unmapped while we're still iterating
2092 over its LIST_COUNT, and also expect to use it in the following
2093 code. */
2094 assert (!is_tgt_unmapped
2095 || k_tgt != tgt);
2099 if (aq)
2100 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2101 (void *) tgt);
2102 else
2103 gomp_unref_tgt ((void *) tgt);
2105 gomp_mutex_unlock (&devicep->lock);
2108 static void
2109 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2110 htab_t *refcount_set)
2112 /* This management of a local refcount_set is for convenience of callers
2113 who do not share a refcount_set over multiple map/unmap uses. */
2114 htab_t local_refcount_set = NULL;
2115 if (refcount_set == NULL)
2117 local_refcount_set = htab_create (tgt->list_count);
2118 refcount_set = &local_refcount_set;
2121 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2123 if (local_refcount_set)
2124 htab_free (local_refcount_set);
2127 attribute_hidden void
2128 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2129 struct goacc_asyncqueue *aq)
2131 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2134 static void
2135 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2136 size_t *sizes, void *kinds, bool short_mapkind)
2138 size_t i;
2139 struct splay_tree_key_s cur_node;
2140 const int typemask = short_mapkind ? 0xff : 0x7;
2142 if (!devicep)
2143 return;
2145 if (mapnum == 0)
2146 return;
2148 gomp_mutex_lock (&devicep->lock);
2149 if (devicep->state == GOMP_DEVICE_FINALIZED)
2151 gomp_mutex_unlock (&devicep->lock);
2152 return;
2155 for (i = 0; i < mapnum; i++)
2156 if (sizes[i])
2158 cur_node.host_start = (uintptr_t) hostaddrs[i];
2159 cur_node.host_end = cur_node.host_start + sizes[i];
2160 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2161 if (n)
2163 int kind = get_kind (short_mapkind, kinds, i);
2164 if (n->host_start > cur_node.host_start
2165 || n->host_end < cur_node.host_end)
2167 gomp_mutex_unlock (&devicep->lock);
2168 gomp_fatal ("Trying to update [%p..%p) object when "
2169 "only [%p..%p) is mapped",
2170 (void *) cur_node.host_start,
2171 (void *) cur_node.host_end,
2172 (void *) n->host_start,
2173 (void *) n->host_end);
2176 if (n->aux && n->aux->attach_count)
2178 uintptr_t addr = cur_node.host_start;
2179 while (addr < cur_node.host_end)
2181 /* We have to be careful not to overwrite still attached
2182 pointers during host<->device updates. */
2183 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2184 if (n->aux->attach_count[i] == 0)
2186 void *devaddr = (void *) (n->tgt->tgt_start
2187 + n->tgt_offset
2188 + addr - n->host_start);
2189 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2190 gomp_copy_host2dev (devicep, NULL,
2191 devaddr, (void *) addr,
2192 sizeof (void *), false, NULL);
2193 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2194 gomp_copy_dev2host (devicep, NULL,
2195 (void *) addr, devaddr,
2196 sizeof (void *));
2198 addr += sizeof (void *);
2201 else
2203 void *hostaddr = (void *) cur_node.host_start;
2204 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2205 + cur_node.host_start
2206 - n->host_start);
2207 size_t size = cur_node.host_end - cur_node.host_start;
2209 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2210 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2211 false, NULL);
2212 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2213 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2216 else
2218 int kind = get_kind (short_mapkind, kinds, i);
2220 if (GOMP_MAP_PRESENT_P (kind))
2222 /* We already looked up the memory region above and it
2223 was missing. */
2224 gomp_mutex_unlock (&devicep->lock);
2225 #ifdef HAVE_INTTYPES_H
2226 gomp_fatal ("present clause: not present on the device "
2227 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2228 "dev: %d)", (void *) hostaddrs[i],
2229 (uint64_t) sizes[i], (uint64_t) sizes[i],
2230 devicep->target_id);
2231 #else
2232 gomp_fatal ("present clause: not present on the device "
2233 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2234 (void *) hostaddrs[i], (unsigned long) sizes[i],
2235 (unsigned long) sizes[i], devicep->target_id);
2236 #endif
2240 gomp_mutex_unlock (&devicep->lock);
2243 static struct gomp_offload_icv_list *
2244 gomp_get_offload_icv_item (int dev_num)
2246 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2247 while (l != NULL && l->device_num != dev_num)
2248 l = l->next;
2250 return l;
2253 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2254 depending on the device num and the variable hierarchy
2255 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2256 device and thus no item with that device number is contained in
2257 gomp_offload_icv_list, then a new item is created and added to the list. */
2259 static struct gomp_offload_icvs *
2260 get_gomp_offload_icvs (int dev_num)
2262 struct gomp_icv_list *dev
2263 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2264 struct gomp_icv_list *all
2265 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2266 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2267 struct gomp_offload_icv_list *offload_icvs
2268 = gomp_get_offload_icv_item (dev_num);
2270 if (offload_icvs != NULL)
2271 return &offload_icvs->icvs;
2273 struct gomp_offload_icv_list *new
2274 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2276 new->device_num = dev_num;
2277 new->icvs.device_num = dev_num;
2278 new->next = gomp_offload_icv_list;
2280 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2281 new->icvs.nteams = dev_x->icvs.nteams_var;
2282 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2283 new->icvs.nteams = dev->icvs.nteams_var;
2284 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2285 new->icvs.nteams = all->icvs.nteams_var;
2286 else
2287 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2289 if (dev_x != NULL
2290 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2291 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2292 else if (dev != NULL
2293 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2294 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2295 else if (all != NULL
2296 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2297 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2298 else
2299 new->icvs.teams_thread_limit
2300 = gomp_default_icv_values.teams_thread_limit_var;
2302 if (dev_x != NULL
2303 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2304 new->icvs.default_device = dev_x->icvs.default_device_var;
2305 else if (dev != NULL
2306 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2307 new->icvs.default_device = dev->icvs.default_device_var;
2308 else if (all != NULL
2309 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2310 new->icvs.default_device = all->icvs.default_device_var;
2311 else
2312 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2314 gomp_offload_icv_list = new;
2315 return &new->icvs;
2318 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2319 And insert to splay tree the mapping between addresses from HOST_TABLE and
2320 from loaded target image. We rely in the host and device compiler
2321 emitting variable and functions in the same order. */
2323 static void
2324 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2325 const void *host_table, const void *target_data,
2326 bool is_register_lock)
2328 void **host_func_table = ((void ***) host_table)[0];
2329 void **host_funcs_end = ((void ***) host_table)[1];
2330 void **host_var_table = ((void ***) host_table)[2];
2331 void **host_vars_end = ((void ***) host_table)[3];
2332 void **host_ind_func_table = NULL;
2333 void **host_ind_funcs_end = NULL;
2335 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2337 host_ind_func_table = ((void ***) host_table)[4];
2338 host_ind_funcs_end = ((void ***) host_table)[5];
2341 /* The func and ind_func tables contain only addresses, the var table
2342 contains addresses and corresponding sizes. */
2343 int num_funcs = host_funcs_end - host_func_table;
2344 int num_vars = (host_vars_end - host_var_table) / 2;
2345 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2347 /* Load image to device and get target addresses for the image. */
2348 struct addr_pair *target_table = NULL;
2349 uint64_t *rev_target_fn_table = NULL;
2350 int i, num_target_entries;
2352 /* With reverse offload, insert also target-host addresses. */
2353 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2355 num_target_entries
2356 = devicep->load_image_func (devicep->target_id, version,
2357 target_data, &target_table,
2358 rev_lookup ? &rev_target_fn_table : NULL,
2359 num_ind_funcs
2360 ? (uint64_t *) host_ind_func_table : NULL);
2362 if (num_target_entries != num_funcs + num_vars
2363 /* "+1" due to the additional ICV struct. */
2364 && num_target_entries != num_funcs + num_vars + 1)
2366 gomp_mutex_unlock (&devicep->lock);
2367 if (is_register_lock)
2368 gomp_mutex_unlock (&register_lock);
2369 gomp_fatal ("Cannot map target functions or variables"
2370 " (expected %u, have %u)", num_funcs + num_vars,
2371 num_target_entries);
2374 /* Insert host-target address mapping into splay tree. */
2375 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2376 /* "+1" due to the additional ICV struct. */
2377 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2378 * sizeof (*tgt->array));
2379 if (rev_target_fn_table)
2380 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2381 else
2382 tgt->rev_array = NULL;
2383 tgt->refcount = REFCOUNT_INFINITY;
2384 tgt->tgt_start = 0;
2385 tgt->tgt_end = 0;
2386 tgt->to_free = NULL;
2387 tgt->prev = NULL;
2388 tgt->list_count = 0;
2389 tgt->device_descr = devicep;
2390 splay_tree_node array = tgt->array;
2391 reverse_splay_tree_node rev_array = tgt->rev_array;
2393 for (i = 0; i < num_funcs; i++)
2395 splay_tree_key k = &array->key;
2396 k->host_start = (uintptr_t) host_func_table[i];
2397 k->host_end = k->host_start + 1;
2398 k->tgt = tgt;
2399 k->tgt_offset = target_table[i].start;
2400 k->refcount = REFCOUNT_INFINITY;
2401 k->dynamic_refcount = 0;
2402 k->aux = NULL;
2403 array->left = NULL;
2404 array->right = NULL;
2405 splay_tree_insert (&devicep->mem_map, array);
2406 if (rev_target_fn_table)
2408 reverse_splay_tree_key k2 = &rev_array->key;
2409 k2->dev = rev_target_fn_table[i];
2410 k2->k = k;
2411 rev_array->left = NULL;
2412 rev_array->right = NULL;
2413 if (k2->dev != 0)
2414 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2415 rev_array++;
2417 array++;
2420 /* Most significant bit of the size in host and target tables marks
2421 "omp declare target link" variables. */
2422 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2423 const uintptr_t size_mask = ~link_bit;
2425 for (i = 0; i < num_vars; i++)
2427 struct addr_pair *target_var = &target_table[num_funcs + i];
2428 uintptr_t target_size = target_var->end - target_var->start;
2429 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2431 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2433 gomp_mutex_unlock (&devicep->lock);
2434 if (is_register_lock)
2435 gomp_mutex_unlock (&register_lock);
2436 gomp_fatal ("Cannot map target variables (size mismatch)");
2439 splay_tree_key k = &array->key;
2440 k->host_start = (uintptr_t) host_var_table[i * 2];
2441 k->host_end
2442 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2443 k->tgt = tgt;
2444 k->tgt_offset = target_var->start;
2445 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2446 k->dynamic_refcount = 0;
2447 k->aux = NULL;
2448 array->left = NULL;
2449 array->right = NULL;
2450 splay_tree_insert (&devicep->mem_map, array);
2451 array++;
2454 /* Last entry is for a ICVs variable.
2455 Tolerate case where plugin does not return those entries. */
2456 if (num_funcs + num_vars < num_target_entries)
2458 struct addr_pair *var = &target_table[num_funcs + num_vars];
2460 /* Start address will be non-zero for the ICVs variable if
2461 the variable was found in this image. */
2462 if (var->start != 0)
2464 /* The index of the devicep within devices[] is regarded as its
2465 'device number', which is different from the per-device type
2466 devicep->target_id. */
2467 int dev_num = (int) (devicep - &devices[0]);
2468 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2469 size_t var_size = var->end - var->start;
2470 if (var_size != sizeof (struct gomp_offload_icvs))
2472 gomp_mutex_unlock (&devicep->lock);
2473 if (is_register_lock)
2474 gomp_mutex_unlock (&register_lock);
2475 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2476 "format");
2478 /* Copy the ICVs variable to place on device memory, hereby
2479 actually designating its device number into effect. */
2480 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2481 var_size, false, NULL);
2482 splay_tree_key k = &array->key;
2483 k->host_start = (uintptr_t) icvs;
2484 k->host_end =
2485 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2486 k->tgt = tgt;
2487 k->tgt_offset = var->start;
2488 k->refcount = REFCOUNT_INFINITY;
2489 k->dynamic_refcount = 0;
2490 k->aux = NULL;
2491 array->left = NULL;
2492 array->right = NULL;
2493 splay_tree_insert (&devicep->mem_map, array);
2494 array++;
2498 free (target_table);
2501 /* Unload the mappings described by target_data from device DEVICE_P.
2502 The device must be locked. */
2504 static void
2505 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2506 unsigned version,
2507 const void *host_table, const void *target_data)
2509 void **host_func_table = ((void ***) host_table)[0];
2510 void **host_funcs_end = ((void ***) host_table)[1];
2511 void **host_var_table = ((void ***) host_table)[2];
2512 void **host_vars_end = ((void ***) host_table)[3];
2514 /* The func table contains only addresses, the var table contains addresses
2515 and corresponding sizes. */
2516 int num_funcs = host_funcs_end - host_func_table;
2517 int num_vars = (host_vars_end - host_var_table) / 2;
2519 struct splay_tree_key_s k;
2520 splay_tree_key node = NULL;
2522 /* Find mapping at start of node array */
2523 if (num_funcs || num_vars)
2525 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2526 : (uintptr_t) host_var_table[0]);
2527 k.host_end = k.host_start + 1;
2528 node = splay_tree_lookup (&devicep->mem_map, &k);
2531 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2533 gomp_mutex_unlock (&devicep->lock);
2534 gomp_fatal ("image unload fail");
2536 if (devicep->mem_map_rev.root)
2538 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2539 real allocation. */
2540 assert (node && node->tgt && node->tgt->rev_array);
2541 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2542 free (node->tgt->rev_array);
2543 devicep->mem_map_rev.root = NULL;
2546 /* Remove mappings from splay tree. */
2547 int i;
2548 for (i = 0; i < num_funcs; i++)
2550 k.host_start = (uintptr_t) host_func_table[i];
2551 k.host_end = k.host_start + 1;
2552 splay_tree_remove (&devicep->mem_map, &k);
2555 /* Most significant bit of the size in host and target tables marks
2556 "omp declare target link" variables. */
2557 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2558 const uintptr_t size_mask = ~link_bit;
2559 bool is_tgt_unmapped = false;
2561 for (i = 0; i < num_vars; i++)
2563 k.host_start = (uintptr_t) host_var_table[i * 2];
2564 k.host_end
2565 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2567 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2568 splay_tree_remove (&devicep->mem_map, &k);
2569 else
2571 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2572 is_tgt_unmapped = gomp_remove_var (devicep, n);
2576 if (node && !is_tgt_unmapped)
2578 free (node->tgt);
2579 free (node);
2583 static void
2584 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2586 char *end = buf + size, *p = buf;
2587 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2588 p += snprintf (p, end - p, "unified_address");
2589 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2590 p += snprintf (p, end - p, "%sunified_shared_memory",
2591 (p == buf ? "" : ", "));
2592 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2593 p += snprintf (p, end - p, "%sreverse_offload",
2594 (p == buf ? "" : ", "));
2597 /* This function should be called from every offload image while loading.
2598 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2599 the target, and DATA. */
2601 void
2602 GOMP_offload_register_ver (unsigned version, const void *host_table,
2603 int target_type, const void *data)
2605 int i;
2607 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2608 gomp_fatal ("Library too old for offload (version %u < %u)",
2609 GOMP_VERSION, GOMP_VERSION_LIB (version));
2611 int omp_req;
2612 const void *target_data;
2613 if (GOMP_VERSION_LIB (version) > 1)
2615 omp_req = (int) (size_t) ((void **) data)[0];
2616 target_data = &((void **) data)[1];
2618 else
2620 omp_req = 0;
2621 target_data = data;
2624 gomp_mutex_lock (&register_lock);
2626 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2628 char buf1[sizeof ("unified_address, unified_shared_memory, "
2629 "reverse_offload")];
2630 char buf2[sizeof ("unified_address, unified_shared_memory, "
2631 "reverse_offload")];
2632 gomp_requires_to_name (buf2, sizeof (buf2),
2633 omp_req != GOMP_REQUIRES_TARGET_USED
2634 ? omp_req : omp_requires_mask);
2635 if (omp_req != GOMP_REQUIRES_TARGET_USED
2636 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2638 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2639 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2640 "in multiple compilation units: '%s' vs. '%s'",
2641 buf1, buf2);
2643 else
2644 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2645 "some compilation units", buf2);
2647 omp_requires_mask = omp_req;
2649 /* Load image to all initialized devices. */
2650 for (i = 0; i < num_devices; i++)
2652 struct gomp_device_descr *devicep = &devices[i];
2653 gomp_mutex_lock (&devicep->lock);
2654 if (devicep->type == target_type
2655 && devicep->state == GOMP_DEVICE_INITIALIZED)
2656 gomp_load_image_to_device (devicep, version,
2657 host_table, target_data, true);
2658 gomp_mutex_unlock (&devicep->lock);
2661 /* Insert image to array of pending images. */
2662 offload_images
2663 = gomp_realloc_unlock (offload_images,
2664 (num_offload_images + 1)
2665 * sizeof (struct offload_image_descr));
2666 offload_images[num_offload_images].version = version;
2667 offload_images[num_offload_images].type = target_type;
2668 offload_images[num_offload_images].host_table = host_table;
2669 offload_images[num_offload_images].target_data = target_data;
2671 num_offload_images++;
2672 gomp_mutex_unlock (&register_lock);
2675 /* Legacy entry point. */
2677 void
2678 GOMP_offload_register (const void *host_table, int target_type,
2679 const void *target_data)
2681 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2684 /* This function should be called from every offload image while unloading.
2685 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2686 the target, and DATA. */
2688 void
2689 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2690 int target_type, const void *data)
2692 int i;
2694 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2695 gomp_fatal ("Library too old for offload (version %u < %u)",
2696 GOMP_VERSION, GOMP_VERSION_LIB (version));
2698 const void *target_data;
2699 if (GOMP_VERSION_LIB (version) > 1)
2700 target_data = &((void **) data)[1];
2701 else
2702 target_data = data;
2704 gomp_mutex_lock (&register_lock);
2706 /* Unload image from all initialized devices. */
2707 for (i = 0; i < num_devices; i++)
2709 struct gomp_device_descr *devicep = &devices[i];
2710 gomp_mutex_lock (&devicep->lock);
2711 if (devicep->type == target_type
2712 && devicep->state == GOMP_DEVICE_INITIALIZED)
2713 gomp_unload_image_from_device (devicep, version,
2714 host_table, target_data);
2715 gomp_mutex_unlock (&devicep->lock);
2718 /* Remove image from array of pending images. */
2719 for (i = 0; i < num_offload_images; i++)
2720 if (offload_images[i].target_data == target_data)
2722 offload_images[i] = offload_images[--num_offload_images];
2723 break;
2726 gomp_mutex_unlock (&register_lock);
2729 /* Legacy entry point. */
2731 void
2732 GOMP_offload_unregister (const void *host_table, int target_type,
2733 const void *target_data)
2735 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2738 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2739 must be locked on entry, and remains locked on return. */
2741 attribute_hidden void
2742 gomp_init_device (struct gomp_device_descr *devicep)
2744 int i;
2745 if (!devicep->init_device_func (devicep->target_id))
2747 gomp_mutex_unlock (&devicep->lock);
2748 gomp_fatal ("device initialization failed");
2751 /* Load to device all images registered by the moment. */
2752 for (i = 0; i < num_offload_images; i++)
2754 struct offload_image_descr *image = &offload_images[i];
2755 if (image->type == devicep->type)
2756 gomp_load_image_to_device (devicep, image->version,
2757 image->host_table, image->target_data,
2758 false);
2761 /* Initialize OpenACC asynchronous queues. */
2762 goacc_init_asyncqueues (devicep);
2764 devicep->state = GOMP_DEVICE_INITIALIZED;
2767 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2768 must be locked on entry, and remains locked on return. */
2770 attribute_hidden bool
2771 gomp_fini_device (struct gomp_device_descr *devicep)
2773 bool ret = goacc_fini_asyncqueues (devicep);
2774 ret &= devicep->fini_device_func (devicep->target_id);
2775 devicep->state = GOMP_DEVICE_FINALIZED;
2776 return ret;
2779 attribute_hidden void
2780 gomp_unload_device (struct gomp_device_descr *devicep)
2782 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2784 unsigned i;
2786 /* Unload from device all images registered at the moment. */
2787 for (i = 0; i < num_offload_images; i++)
2789 struct offload_image_descr *image = &offload_images[i];
2790 if (image->type == devicep->type)
2791 gomp_unload_image_from_device (devicep, image->version,
2792 image->host_table,
2793 image->target_data);
2798 /* Host fallback for GOMP_target{,_ext} routines. */
2800 static void
2801 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2802 struct gomp_device_descr *devicep, void **args)
2804 struct gomp_thread old_thr, *thr = gomp_thread ();
2806 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2807 && devicep != NULL)
2808 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2809 "be used for offloading");
2811 old_thr = *thr;
2812 memset (thr, '\0', sizeof (*thr));
2813 if (gomp_places_list)
2815 thr->place = old_thr.place;
2816 thr->ts.place_partition_len = gomp_places_list_len;
2818 if (args)
2819 while (*args)
2821 intptr_t id = (intptr_t) *args++, val;
2822 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2823 val = (intptr_t) *args++;
2824 else
2825 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2826 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2827 continue;
2828 id &= GOMP_TARGET_ARG_ID_MASK;
2829 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2830 continue;
2831 val = val > INT_MAX ? INT_MAX : val;
2832 if (val)
2833 gomp_icv (true)->thread_limit_var = val;
2834 break;
2837 fn (hostaddrs);
2838 gomp_free_thread (thr);
2839 *thr = old_thr;
2842 /* Calculate alignment and size requirements of a private copy of data shared
2843 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2845 static inline void
2846 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2847 unsigned short *kinds, size_t *tgt_align,
2848 size_t *tgt_size)
2850 size_t i;
2851 for (i = 0; i < mapnum; i++)
2852 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2854 size_t align = (size_t) 1 << (kinds[i] >> 8);
2855 if (*tgt_align < align)
2856 *tgt_align = align;
2857 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2858 *tgt_size += sizes[i];
2862 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2864 static inline void
2865 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2866 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2867 size_t tgt_size)
2869 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2870 if (al)
2871 tgt += tgt_align - al;
2872 tgt_size = 0;
2873 size_t i;
2874 for (i = 0; i < mapnum; i++)
2875 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2877 size_t align = (size_t) 1 << (kinds[i] >> 8);
2878 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2879 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2880 hostaddrs[i] = tgt + tgt_size;
2881 tgt_size = tgt_size + sizes[i];
2882 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2884 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2885 ++i;
2890 /* Helper function of GOMP_target{,_ext} routines. */
2892 static void *
2893 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2894 void (*host_fn) (void *))
2896 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2897 return (void *) host_fn;
2898 else
2900 gomp_mutex_lock (&devicep->lock);
2901 if (devicep->state == GOMP_DEVICE_FINALIZED)
2903 gomp_mutex_unlock (&devicep->lock);
2904 return NULL;
2907 struct splay_tree_key_s k;
2908 k.host_start = (uintptr_t) host_fn;
2909 k.host_end = k.host_start + 1;
2910 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2911 gomp_mutex_unlock (&devicep->lock);
2912 if (tgt_fn == NULL)
2913 return NULL;
2915 return (void *) tgt_fn->tgt_offset;
2919 /* Called when encountering a target directive. If DEVICE
2920 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2921 GOMP_DEVICE_HOST_FALLBACK (or any value
2922 larger than last available hw device), use host fallback.
2923 FN is address of host code, UNUSED is part of the current ABI, but
2924 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2925 with MAPNUM entries, with addresses of the host objects,
2926 sizes of the host objects (resp. for pointer kind pointer bias
2927 and assumed sizeof (void *) size) and kinds. */
2929 void
2930 GOMP_target (int device, void (*fn) (void *), const void *unused,
2931 size_t mapnum, void **hostaddrs, size_t *sizes,
2932 unsigned char *kinds)
2934 struct gomp_device_descr *devicep = resolve_device (device, true);
2936 void *fn_addr;
2937 if (devicep == NULL
2938 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2939 /* All shared memory devices should use the GOMP_target_ext function. */
2940 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2941 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2942 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2944 htab_t refcount_set = htab_create (mapnum);
2945 struct target_mem_desc *tgt_vars
2946 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2947 &refcount_set, GOMP_MAP_VARS_TARGET);
2948 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2949 NULL);
2950 htab_clear (refcount_set);
2951 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2952 htab_free (refcount_set);
2955 static inline unsigned int
2956 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2958 /* If we cannot run asynchronously, simply ignore nowait. */
2959 if (devicep != NULL && devicep->async_run_func == NULL)
2960 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2962 return flags;
2965 static void
2966 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2968 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2969 if (item == NULL)
2970 return;
2972 gomp_mutex_lock (&devicep->lock);
2974 struct splay_tree_s *mem_map = &devicep->mem_map;
2975 struct splay_tree_key_s cur_node;
2976 void *dev_ptr = NULL;
2978 void *host_ptr = &item->icvs;
2979 cur_node.host_start = (uintptr_t) host_ptr;
2980 cur_node.host_end = cur_node.host_start;
2981 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2983 if (n)
2985 uintptr_t offset = cur_node.host_start - n->host_start;
2986 dev_ptr = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
2989 gomp_mutex_unlock (&devicep->lock);
2991 if (dev_ptr != NULL)
2992 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2993 sizeof (struct gomp_offload_icvs));
2996 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2997 and several arguments have been added:
2998 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2999 DEPEND is array of dependencies, see GOMP_task for details.
3001 ARGS is a pointer to an array consisting of a variable number of both
3002 device-independent and device-specific arguments, which can take one two
3003 elements where the first specifies for which device it is intended, the type
3004 and optionally also the value. If the value is not present in the first
3005 one, the whole second element the actual value. The last element of the
3006 array is a single NULL. Among the device independent can be for example
3007 NUM_TEAMS and THREAD_LIMIT.
3009 NUM_TEAMS is positive if GOMP_teams will be called in the body with
3010 that value, or 1 if teams construct is not present, or 0, if
3011 teams construct does not have num_teams clause and so the choice is
3012 implementation defined, and -1 if it can't be determined on the host
3013 what value will GOMP_teams have on the device.
3014 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
3015 body with that value, or 0, if teams construct does not have thread_limit
3016 clause or the teams construct is not present, or -1 if it can't be
3017 determined on the host what value will GOMP_teams have on the device. */
3019 void
3020 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3021 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3022 unsigned int flags, void **depend, void **args)
3024 struct gomp_device_descr *devicep = resolve_device (device, true);
3025 size_t tgt_align = 0, tgt_size = 0;
3026 bool fpc_done = false;
3028 /* Obtain the original TEAMS and THREADS values from ARGS. */
3029 intptr_t orig_teams = 1, orig_threads = 0;
3030 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3031 void **tmpargs = args;
3032 while (*tmpargs)
3034 intptr_t id = (intptr_t) *tmpargs++, val;
3035 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3037 val = (intptr_t) *tmpargs++;
3038 len = 2;
3040 else
3042 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3043 len = 1;
3045 num_args += len;
3046 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3047 continue;
3048 val = val > INT_MAX ? INT_MAX : val;
3049 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3051 orig_teams = val;
3052 teams_len = len;
3054 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3056 orig_threads = val;
3057 threads_len = len;
3061 intptr_t new_teams = orig_teams, new_threads = orig_threads;
3062 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3063 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3064 value could not be determined. No change.
3065 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3066 Set device-specific value.
3067 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3068 No change. */
3069 if (orig_teams == -2)
3070 new_teams = 1;
3071 else if (orig_teams == 0)
3073 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3074 if (item != NULL)
3075 new_teams = item->icvs.nteams;
3077 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3078 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3079 e.g. a THREAD_LIMIT clause. */
3080 if (orig_teams > -2 && orig_threads == 0)
3082 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3083 if (item != NULL)
3084 new_threads = item->icvs.teams_thread_limit;
3087 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3088 updated. */
3089 void **new_args = args;
3090 if (orig_teams != new_teams || orig_threads != new_threads)
3092 size_t tms_len = (orig_teams == new_teams
3093 ? teams_len
3094 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3095 ? 1 : 2));
3096 size_t ths_len = (orig_threads == new_threads
3097 ? threads_len
3098 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3099 ? 1 : 2));
3100 /* One additional item after the last arg must be NULL. */
3101 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3102 + ths_len + 1;
3103 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3105 tmpargs = args;
3106 void **tmp_new_args = new_args;
3107 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3108 too if they have not been changed and skipped otherwise. */
3109 while (*tmpargs)
3111 intptr_t id = (intptr_t) *tmpargs;
3112 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3113 && orig_teams != new_teams)
3114 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3115 && orig_threads != new_threads))
3117 tmpargs++;
3118 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3119 tmpargs++;
3121 else
3123 *tmp_new_args++ = *tmpargs++;
3124 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3125 *tmp_new_args++ = *tmpargs++;
3129 /* Add the new TEAMS arg to the new args list if it has been changed. */
3130 if (orig_teams != new_teams)
3132 intptr_t new_val = new_teams;
3133 if (tms_len == 1)
3135 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3136 | GOMP_TARGET_ARG_NUM_TEAMS;
3137 *tmp_new_args++ = (void *) new_val;
3139 else
3141 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3142 | GOMP_TARGET_ARG_NUM_TEAMS);
3143 *tmp_new_args++ = (void *) new_val;
3147 /* Add the new THREADS arg to the new args list if it has been changed. */
3148 if (orig_threads != new_threads)
3150 intptr_t new_val = new_threads;
3151 if (ths_len == 1)
3153 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3154 | GOMP_TARGET_ARG_THREAD_LIMIT;
3155 *tmp_new_args++ = (void *) new_val;
3157 else
3159 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3160 | GOMP_TARGET_ARG_THREAD_LIMIT);
3161 *tmp_new_args++ = (void *) new_val;
3165 *tmp_new_args = NULL;
3168 flags = clear_unsupported_flags (devicep, flags);
3170 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3172 struct gomp_thread *thr = gomp_thread ();
3173 /* Create a team if we don't have any around, as nowait
3174 target tasks make sense to run asynchronously even when
3175 outside of any parallel. */
3176 if (__builtin_expect (thr->ts.team == NULL, 0))
3178 struct gomp_team *team = gomp_new_team (1);
3179 struct gomp_task *task = thr->task;
3180 struct gomp_task **implicit_task = &task;
3181 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3182 team->prev_ts = thr->ts;
3183 thr->ts.team = team;
3184 thr->ts.team_id = 0;
3185 thr->ts.work_share = &team->work_shares[0];
3186 thr->ts.last_work_share = NULL;
3187 #ifdef HAVE_SYNC_BUILTINS
3188 thr->ts.single_count = 0;
3189 #endif
3190 thr->ts.static_trip = 0;
3191 thr->task = &team->implicit_task[0];
3192 gomp_init_task (thr->task, NULL, icv);
3193 while (*implicit_task
3194 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3195 implicit_task = &(*implicit_task)->parent;
3196 if (*implicit_task)
3198 thr->task = *implicit_task;
3199 gomp_end_task ();
3200 free (*implicit_task);
3201 thr->task = &team->implicit_task[0];
3203 else
3204 pthread_setspecific (gomp_thread_destructor, thr);
3205 if (implicit_task != &task)
3207 *implicit_task = thr->task;
3208 thr->task = task;
3211 if (thr->ts.team
3212 && !thr->task->final_task)
3214 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3215 sizes, kinds, flags, depend, new_args,
3216 GOMP_TARGET_TASK_BEFORE_MAP);
3217 return;
3221 /* If there are depend clauses, but nowait is not present
3222 (or we are in a final task), block the parent task until the
3223 dependencies are resolved and then just continue with the rest
3224 of the function as if it is a merged task. */
3225 if (depend != NULL)
3227 struct gomp_thread *thr = gomp_thread ();
3228 if (thr->task && thr->task->depend_hash)
3230 /* If we might need to wait, copy firstprivate now. */
3231 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3232 &tgt_align, &tgt_size);
3233 if (tgt_align)
3235 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3236 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3237 tgt_align, tgt_size);
3239 fpc_done = true;
3240 gomp_task_maybe_wait_for_dependencies (depend);
3244 void *fn_addr;
3245 if (devicep == NULL
3246 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3247 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3248 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3250 if (!fpc_done)
3252 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3253 &tgt_align, &tgt_size);
3254 if (tgt_align)
3256 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3257 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3258 tgt_align, tgt_size);
3261 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3262 return;
3265 struct target_mem_desc *tgt_vars;
3266 htab_t refcount_set = NULL;
3268 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3270 if (!fpc_done)
3272 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3273 &tgt_align, &tgt_size);
3274 if (tgt_align)
3276 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3277 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3278 tgt_align, tgt_size);
3281 tgt_vars = NULL;
3283 else
3285 refcount_set = htab_create (mapnum);
3286 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3287 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3289 devicep->run_func (devicep->target_id, fn_addr,
3290 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3291 new_args);
3292 if (tgt_vars)
3294 htab_clear (refcount_set);
3295 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3297 if (refcount_set)
3298 htab_free (refcount_set);
3300 /* Copy back ICVs from device to host.
3301 HOST_PTR is expected to exist since it was added in
3302 gomp_load_image_to_device if not already available. */
3303 gomp_copy_back_icvs (devicep, device);
3308 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3309 keeping track of all variable handling - assuming that reverse offload occurs
3310 ony very rarely. Downside is that the reverse search is slow. */
3312 struct gomp_splay_tree_rev_lookup_data {
3313 uintptr_t tgt_start;
3314 uintptr_t tgt_end;
3315 splay_tree_key key;
3318 static int
3319 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3321 struct gomp_splay_tree_rev_lookup_data *data;
3322 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3323 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3325 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3326 return 0;
3328 size_t j;
3329 for (j = 0; j < key->tgt->list_count; j++)
3330 if (key->tgt->list[j].key == key)
3331 break;
3332 assert (j < key->tgt->list_count);
3333 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3335 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3336 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3338 data->key = key;
3339 return 1;
3341 return 0;
3344 static inline splay_tree_key
3345 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3346 bool zero_len)
3348 struct gomp_splay_tree_rev_lookup_data data;
3349 data.key = NULL;
3350 data.tgt_start = tgt_start;
3351 data.tgt_end = tgt_end;
3353 if (tgt_start != tgt_end)
3355 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3356 return data.key;
3359 data.tgt_end++;
3360 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3361 if (data.key != NULL || zero_len)
3362 return data.key;
3363 data.tgt_end--;
3365 data.tgt_start--;
3366 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3367 return data.key;
3370 struct cpy_data
3372 uint64_t devaddr;
3373 bool present, aligned;
3377 /* Search just mapped reverse-offload data; returns index if found,
3378 otherwise >= n. */
3380 static inline int
3381 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3382 unsigned short *kinds, uint64_t *sizes, size_t n,
3383 uint64_t tgt_start, uint64_t tgt_end)
3385 const bool short_mapkind = true;
3386 const int typemask = short_mapkind ? 0xff : 0x7;
3387 size_t i;
3388 for (i = 0; i < n; i++)
3390 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3391 == GOMP_MAP_STRUCT);
3392 uint64_t dev_end;
3393 if (!is_struct)
3394 dev_end = d[i].devaddr + sizes[i];
3395 else
3397 if (i + sizes[i] < n)
3398 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3399 else
3400 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3402 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3403 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3404 break;
3405 if (is_struct)
3406 i += sizes[i];
3408 return i;
3411 static inline int
3412 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3413 unsigned short *kinds, uint64_t *sizes,
3414 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3415 bool zero_len)
3417 size_t i;
3418 if (tgt_start != tgt_end)
3419 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3420 tgt_start, tgt_end);
3421 tgt_end++;
3422 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3423 tgt_start, tgt_end);
3424 if (i < n || zero_len)
3425 return i;
3426 tgt_end--;
3428 tgt_start--;
3429 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3430 tgt_start, tgt_end);
3433 /* Handle reverse offload. This is called by the device plugins for a
3434 reverse offload; it is not called if the outer target runs on the host.
3435 The mapping is simplified device-affecting constructs (except for target
3436 with device(ancestor:1)) must not be encountered; in particular not
3437 target (enter/exit) data. */
3439 void
3440 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3441 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3442 struct goacc_asyncqueue *aq)
3444 /* Return early if there is no offload code. */
3445 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3446 return;
3447 /* Currently, this fails because of calculate_firstprivate_requirements
3448 below; it could be fixed but additional code needs to be updated to
3449 handle 32bit hosts - thus, it is not worthwhile. */
3450 if (sizeof (void *) != sizeof (uint64_t))
3451 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3453 struct cpy_data *cdata = NULL;
3454 uint64_t *devaddrs;
3455 uint64_t *sizes;
3456 unsigned short *kinds;
3457 const bool short_mapkind = true;
3458 const int typemask = short_mapkind ? 0xff : 0x7;
3459 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3461 reverse_splay_tree_key n;
3462 struct reverse_splay_tree_key_s k;
3463 k.dev = fn_ptr;
3465 gomp_mutex_lock (&devicep->lock);
3466 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3467 gomp_mutex_unlock (&devicep->lock);
3469 if (n == NULL)
3470 gomp_fatal ("Cannot find reverse-offload function");
3471 void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start;
3473 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3475 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3476 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3477 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3479 else
3481 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3482 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3483 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3484 gomp_copy_dev2host (devicep, aq, devaddrs,
3485 (const void *) (uintptr_t) devaddrs_ptr,
3486 mapnum * sizeof (uint64_t));
3487 gomp_copy_dev2host (devicep, aq, sizes,
3488 (const void *) (uintptr_t) sizes_ptr,
3489 mapnum * sizeof (uint64_t));
3490 gomp_copy_dev2host (devicep, aq, kinds,
3491 (const void *) (uintptr_t) kinds_ptr,
3492 mapnum * sizeof (unsigned short));
3493 if (aq && !devicep->openacc.async.synchronize_func (aq))
3494 exit (EXIT_FAILURE);
3497 size_t tgt_align = 0, tgt_size = 0;
3499 /* If actually executed on 32bit systems, the casts lead to wrong code;
3500 but 32bit with offloading is not supported; see top of this function. */
3501 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3502 (void *) (uintptr_t) kinds,
3503 &tgt_align, &tgt_size);
3505 if (tgt_align)
3507 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3508 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3509 if (al)
3510 tgt += tgt_align - al;
3511 tgt_size = 0;
3512 for (uint64_t i = 0; i < mapnum; i++)
3513 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3514 && devaddrs[i] != 0)
3516 size_t align = (size_t) 1 << (kinds[i] >> 8);
3517 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3518 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3519 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3520 (size_t) sizes[i]);
3521 else
3523 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3524 (void *) (uintptr_t) devaddrs[i],
3525 (size_t) sizes[i]);
3526 if (aq && !devicep->openacc.async.synchronize_func (aq))
3527 exit (EXIT_FAILURE);
3529 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3530 tgt_size = tgt_size + sizes[i];
3531 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3532 && i + 1 < mapnum
3533 && ((get_kind (short_mapkind, kinds, i) & typemask)
3534 == GOMP_MAP_ATTACH))
3536 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3537 = (uint64_t) devaddrs[i];
3538 ++i;
3543 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3545 size_t j, struct_cpy = 0;
3546 splay_tree_key n2;
3547 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3548 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3549 gomp_mutex_lock (&devicep->lock);
3550 for (uint64_t i = 0; i < mapnum; i++)
3552 if (devaddrs[i] == 0)
3553 continue;
3554 n = NULL;
3555 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3556 switch (kind)
3558 case GOMP_MAP_FIRSTPRIVATE:
3559 case GOMP_MAP_FIRSTPRIVATE_INT:
3560 continue;
3562 case GOMP_MAP_DELETE:
3563 case GOMP_MAP_RELEASE:
3564 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3565 /* Assume it is present; look it up - but ignore unless the
3566 present clause is there. */
3567 case GOMP_MAP_ALLOC:
3568 case GOMP_MAP_FROM:
3569 case GOMP_MAP_FORCE_ALLOC:
3570 case GOMP_MAP_FORCE_FROM:
3571 case GOMP_MAP_ALWAYS_FROM:
3572 case GOMP_MAP_TO:
3573 case GOMP_MAP_TOFROM:
3574 case GOMP_MAP_FORCE_TO:
3575 case GOMP_MAP_FORCE_TOFROM:
3576 case GOMP_MAP_ALWAYS_TO:
3577 case GOMP_MAP_ALWAYS_TOFROM:
3578 case GOMP_MAP_FORCE_PRESENT:
3579 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3580 case GOMP_MAP_ALWAYS_PRESENT_TO:
3581 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3582 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3583 cdata[i].devaddr = devaddrs[i];
3584 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3585 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3586 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3587 devaddrs[i],
3588 devaddrs[i] + sizes[i], zero_len);
3589 if (j < i)
3591 n2 = NULL;
3592 cdata[i].present = true;
3593 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3595 else
3597 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3598 devaddrs[i],
3599 devaddrs[i] + sizes[i], zero_len);
3600 cdata[i].present = n2 != NULL;
3602 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3604 gomp_mutex_unlock (&devicep->lock);
3605 #ifdef HAVE_INTTYPES_H
3606 gomp_fatal ("present clause: no corresponding data on "
3607 "parent device at %p with size %"PRIu64,
3608 (void *) (uintptr_t) devaddrs[i],
3609 (uint64_t) sizes[i]);
3610 #else
3611 gomp_fatal ("present clause: no corresponding data on "
3612 "parent device at %p with size %lu",
3613 (void *) (uintptr_t) devaddrs[i],
3614 (unsigned long) sizes[i]);
3615 #endif
3616 break;
3618 else if (!cdata[i].present
3619 && kind != GOMP_MAP_DELETE
3620 && kind != GOMP_MAP_RELEASE
3621 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3623 cdata[i].aligned = true;
3624 size_t align = (size_t) 1 << (kinds[i] >> 8);
3625 devaddrs[i]
3626 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3627 sizes[i]);
3629 else if (n2 != NULL)
3630 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3631 - (n2->tgt->tgt_start + n2->tgt_offset));
3632 if (((!cdata[i].present || struct_cpy)
3633 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3634 || kind == GOMP_MAP_FORCE_TO
3635 || kind == GOMP_MAP_FORCE_TOFROM
3636 || GOMP_MAP_ALWAYS_TO_P (kind))
3638 gomp_copy_dev2host (devicep, aq,
3639 (void *) (uintptr_t) devaddrs[i],
3640 (void *) (uintptr_t) cdata[i].devaddr,
3641 sizes[i]);
3642 if (aq && !devicep->openacc.async.synchronize_func (aq))
3644 gomp_mutex_unlock (&devicep->lock);
3645 exit (EXIT_FAILURE);
3648 if (struct_cpy)
3649 struct_cpy--;
3650 break;
3651 case GOMP_MAP_ATTACH:
3652 case GOMP_MAP_POINTER:
3653 case GOMP_MAP_ALWAYS_POINTER:
3654 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3655 devaddrs[i] + sizes[i],
3656 devaddrs[i] + sizes[i]
3657 + sizeof (void*), false);
3658 cdata[i].present = n2 != NULL;
3659 cdata[i].devaddr = devaddrs[i];
3660 if (n2)
3661 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3662 - (n2->tgt->tgt_start + n2->tgt_offset));
3663 else
3665 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3666 devaddrs[i] + sizes[i],
3667 devaddrs[i] + sizes[i]
3668 + sizeof (void*), false);
3669 if (j < i)
3671 cdata[i].present = true;
3672 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3673 - cdata[j].devaddr);
3676 if (!cdata[i].present)
3677 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3678 /* Assume that when present, the pointer is already correct. */
3679 if (!n2)
3680 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3681 = devaddrs[i-1];
3682 break;
3683 case GOMP_MAP_TO_PSET:
3684 /* Assume that when present, the pointers are fine and no 'to:'
3685 is required. */
3686 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3687 devaddrs[i], devaddrs[i] + sizes[i],
3688 false);
3689 cdata[i].present = n2 != NULL;
3690 cdata[i].devaddr = devaddrs[i];
3691 if (n2)
3692 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3693 - (n2->tgt->tgt_start + n2->tgt_offset));
3694 else
3696 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3697 devaddrs[i],
3698 devaddrs[i] + sizes[i], false);
3699 if (j < i)
3701 cdata[i].present = true;
3702 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3703 - cdata[j].devaddr);
3706 if (!cdata[i].present)
3708 cdata[i].aligned = true;
3709 size_t align = (size_t) 1 << (kinds[i] >> 8);
3710 devaddrs[i]
3711 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3712 sizes[i]);
3713 gomp_copy_dev2host (devicep, aq,
3714 (void *) (uintptr_t) devaddrs[i],
3715 (void *) (uintptr_t) cdata[i].devaddr,
3716 sizes[i]);
3717 if (aq && !devicep->openacc.async.synchronize_func (aq))
3719 gomp_mutex_unlock (&devicep->lock);
3720 exit (EXIT_FAILURE);
3723 for (j = i + 1; j < mapnum; j++)
3725 kind = get_kind (short_mapkind, kinds, j) & typemask;
3726 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3727 && !GOMP_MAP_POINTER_P (kind))
3728 break;
3729 if (devaddrs[j] < devaddrs[i])
3730 break;
3731 if (cdata[i].present)
3732 continue;
3733 if (devaddrs[j] == 0)
3735 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3736 continue;
3738 int k;
3739 n2 = NULL;
3740 /* Dereference devaddrs[j] to get the device addr. */
3741 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3742 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3743 + sizes[j]);
3744 cdata[j].present = true;
3745 cdata[j].devaddr = devaddrs[j];
3746 if (devaddrs[j] == 0)
3747 continue;
3748 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3749 devaddrs[j],
3750 devaddrs[j] + sizeof (void*),
3751 false);
3752 if (k < j)
3753 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3754 - cdata[k].devaddr);
3755 else
3757 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3758 devaddrs[j],
3759 devaddrs[j] + sizeof (void*),
3760 false);
3761 if (n2 == NULL)
3763 gomp_mutex_unlock (&devicep->lock);
3764 gomp_fatal ("Pointer target wasn't mapped");
3766 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3767 - (n2->tgt->tgt_start + n2->tgt_offset));
3769 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3770 = (void *) (uintptr_t) devaddrs[j];
3772 i = j -1;
3773 break;
3774 case GOMP_MAP_STRUCT:
3775 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3776 devaddrs[i + sizes[i]]
3777 + sizes[i + sizes[i]], false);
3778 cdata[i].present = n2 != NULL;
3779 cdata[i].devaddr = devaddrs[i];
3780 struct_cpy = cdata[i].present ? 0 : sizes[i];
3781 if (!n2)
3783 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3784 - devaddrs[i+1]
3785 + sizes[i + sizes[i]]);
3786 size_t align = (size_t) 1 << (kinds[i] >> 8);
3787 cdata[i].aligned = true;
3788 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3789 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3791 else
3792 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3793 - (n2->tgt->tgt_start + n2->tgt_offset));
3794 break;
3795 default:
3796 gomp_mutex_unlock (&devicep->lock);
3797 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3800 gomp_mutex_unlock (&devicep->lock);
3803 host_fn (devaddrs);
3805 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3807 uint64_t struct_cpy = 0;
3808 bool clean_struct = false;
3809 for (uint64_t i = 0; i < mapnum; i++)
3811 if (cdata[i].devaddr == 0)
3812 continue;
3813 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3814 bool copy = !cdata[i].present || struct_cpy;
3815 switch (kind)
3817 case GOMP_MAP_FORCE_FROM:
3818 case GOMP_MAP_FORCE_TOFROM:
3819 case GOMP_MAP_ALWAYS_FROM:
3820 case GOMP_MAP_ALWAYS_TOFROM:
3821 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3822 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3823 copy = true;
3824 /* FALLTHRU */
3825 case GOMP_MAP_FROM:
3826 case GOMP_MAP_TOFROM:
3827 if (copy)
3829 gomp_copy_host2dev (devicep, aq,
3830 (void *) (uintptr_t) cdata[i].devaddr,
3831 (void *) (uintptr_t) devaddrs[i],
3832 sizes[i], false, NULL);
3833 if (aq && !devicep->openacc.async.synchronize_func (aq))
3834 exit (EXIT_FAILURE);
3836 default:
3837 break;
3839 if (struct_cpy)
3841 struct_cpy--;
3842 continue;
3844 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3846 clean_struct = true;
3847 struct_cpy = sizes[i];
3849 else if (!cdata[i].present && cdata[i].aligned)
3850 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3851 else if (!cdata[i].present)
3852 free ((void *) (uintptr_t) devaddrs[i]);
3854 if (clean_struct)
3855 for (uint64_t i = 0; i < mapnum; i++)
3856 if (!cdata[i].present
3857 && ((get_kind (short_mapkind, kinds, i) & typemask)
3858 == GOMP_MAP_STRUCT))
3860 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3861 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3864 free (devaddrs);
3865 free (sizes);
3866 free (kinds);
3870 /* Host fallback for GOMP_target_data{,_ext} routines. */
3872 static void
3873 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3875 struct gomp_task_icv *icv = gomp_icv (false);
3877 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3878 && devicep != NULL)
3879 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3880 "be used for offloading");
3882 if (icv->target_data)
3884 /* Even when doing a host fallback, if there are any active
3885 #pragma omp target data constructs, need to remember the
3886 new #pragma omp target data, otherwise GOMP_target_end_data
3887 would get out of sync. */
3888 struct target_mem_desc *tgt
3889 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3890 NULL, GOMP_MAP_VARS_DATA);
3891 tgt->prev = icv->target_data;
3892 icv->target_data = tgt;
3896 void
3897 GOMP_target_data (int device, const void *unused, size_t mapnum,
3898 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3900 struct gomp_device_descr *devicep = resolve_device (device, true);
3902 if (devicep == NULL
3903 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3904 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3905 return gomp_target_data_fallback (devicep);
3907 struct target_mem_desc *tgt
3908 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3909 NULL, GOMP_MAP_VARS_DATA);
3910 struct gomp_task_icv *icv = gomp_icv (true);
3911 tgt->prev = icv->target_data;
3912 icv->target_data = tgt;
3915 void
3916 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3917 size_t *sizes, unsigned short *kinds)
3919 struct gomp_device_descr *devicep = resolve_device (device, true);
3921 if (devicep == NULL
3922 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3923 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3924 return gomp_target_data_fallback (devicep);
3926 struct target_mem_desc *tgt
3927 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3928 NULL, GOMP_MAP_VARS_DATA);
3929 struct gomp_task_icv *icv = gomp_icv (true);
3930 tgt->prev = icv->target_data;
3931 icv->target_data = tgt;
3934 void
3935 GOMP_target_end_data (void)
3937 struct gomp_task_icv *icv = gomp_icv (false);
3938 if (icv->target_data)
3940 struct target_mem_desc *tgt = icv->target_data;
3941 icv->target_data = tgt->prev;
3942 gomp_unmap_vars (tgt, true, NULL);
3946 void
3947 GOMP_target_update (int device, const void *unused, size_t mapnum,
3948 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3950 struct gomp_device_descr *devicep = resolve_device (device, true);
3952 if (devicep == NULL
3953 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3954 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3955 return;
3957 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3960 void
3961 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3962 size_t *sizes, unsigned short *kinds,
3963 unsigned int flags, void **depend)
3965 struct gomp_device_descr *devicep = resolve_device (device, true);
3967 /* If there are depend clauses, but nowait is not present,
3968 block the parent task until the dependencies are resolved
3969 and then just continue with the rest of the function as if it
3970 is a merged task. Until we are able to schedule task during
3971 variable mapping or unmapping, ignore nowait if depend clauses
3972 are not present. */
3973 if (depend != NULL)
3975 struct gomp_thread *thr = gomp_thread ();
3976 if (thr->task && thr->task->depend_hash)
3978 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3979 && thr->ts.team
3980 && !thr->task->final_task)
3982 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3983 mapnum, hostaddrs, sizes, kinds,
3984 flags | GOMP_TARGET_FLAG_UPDATE,
3985 depend, NULL, GOMP_TARGET_TASK_DATA))
3986 return;
3988 else
3990 struct gomp_team *team = thr->ts.team;
3991 /* If parallel or taskgroup has been cancelled, don't start new
3992 tasks. */
3993 if (__builtin_expect (gomp_cancel_var, 0) && team)
3995 if (gomp_team_barrier_cancelled (&team->barrier))
3996 return;
3997 if (thr->task->taskgroup)
3999 if (thr->task->taskgroup->cancelled)
4000 return;
4001 if (thr->task->taskgroup->workshare
4002 && thr->task->taskgroup->prev
4003 && thr->task->taskgroup->prev->cancelled)
4004 return;
4008 gomp_task_maybe_wait_for_dependencies (depend);
4013 if (devicep == NULL
4014 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4015 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4016 return;
4018 struct gomp_thread *thr = gomp_thread ();
4019 struct gomp_team *team = thr->ts.team;
4020 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4021 if (__builtin_expect (gomp_cancel_var, 0) && team)
4023 if (gomp_team_barrier_cancelled (&team->barrier))
4024 return;
4025 if (thr->task->taskgroup)
4027 if (thr->task->taskgroup->cancelled)
4028 return;
4029 if (thr->task->taskgroup->workshare
4030 && thr->task->taskgroup->prev
4031 && thr->task->taskgroup->prev->cancelled)
4032 return;
4036 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4039 static void
4040 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4041 void **hostaddrs, size_t *sizes, unsigned short *kinds,
4042 htab_t *refcount_set)
4044 const int typemask = 0xff;
4045 size_t i;
4046 gomp_mutex_lock (&devicep->lock);
4047 if (devicep->state == GOMP_DEVICE_FINALIZED)
4049 gomp_mutex_unlock (&devicep->lock);
4050 return;
4053 for (i = 0; i < mapnum; i++)
4054 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4056 struct splay_tree_key_s cur_node;
4057 cur_node.host_start = (uintptr_t) hostaddrs[i];
4058 cur_node.host_end = cur_node.host_start + sizeof (void *);
4059 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4061 if (n)
4062 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4063 false, NULL);
4066 int nrmvars = 0;
4067 splay_tree_key remove_vars[mapnum];
4069 for (i = 0; i < mapnum; i++)
4071 struct splay_tree_key_s cur_node;
4072 unsigned char kind = kinds[i] & typemask;
4073 switch (kind)
4075 case GOMP_MAP_FROM:
4076 case GOMP_MAP_ALWAYS_FROM:
4077 case GOMP_MAP_DELETE:
4078 case GOMP_MAP_RELEASE:
4079 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4080 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4081 cur_node.host_start = (uintptr_t) hostaddrs[i];
4082 cur_node.host_end = cur_node.host_start + sizes[i];
4083 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4084 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4085 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4086 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4087 if (!k)
4088 continue;
4090 bool delete_p = (kind == GOMP_MAP_DELETE
4091 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4092 bool do_copy, do_remove;
4093 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4094 &do_remove);
4096 if ((kind == GOMP_MAP_FROM && do_copy)
4097 || kind == GOMP_MAP_ALWAYS_FROM)
4099 if (k->aux && k->aux->attach_count)
4101 /* We have to be careful not to overwrite still attached
4102 pointers during the copyback to host. */
4103 uintptr_t addr = k->host_start;
4104 while (addr < k->host_end)
4106 size_t i = (addr - k->host_start) / sizeof (void *);
4107 if (k->aux->attach_count[i] == 0)
4108 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4109 (void *) (k->tgt->tgt_start
4110 + k->tgt_offset
4111 + addr - k->host_start),
4112 sizeof (void *));
4113 addr += sizeof (void *);
4116 else
4117 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4118 (void *) (k->tgt->tgt_start + k->tgt_offset
4119 + cur_node.host_start
4120 - k->host_start),
4121 cur_node.host_end - cur_node.host_start);
4124 /* Structure elements lists are removed altogether at once, which
4125 may cause immediate deallocation of the target_mem_desc, causing
4126 errors if we still have following element siblings to copy back.
4127 While we're at it, it also seems more disciplined to simply
4128 queue all removals together for processing below.
4130 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4131 not have this problem, since they maintain an additional
4132 tgt->refcount = 1 reference to the target_mem_desc to start with.
4134 if (do_remove)
4135 remove_vars[nrmvars++] = k;
4136 break;
4138 case GOMP_MAP_DETACH:
4139 break;
4140 default:
4141 gomp_mutex_unlock (&devicep->lock);
4142 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4143 kind);
4147 for (int i = 0; i < nrmvars; i++)
4148 gomp_remove_var (devicep, remove_vars[i]);
4150 gomp_mutex_unlock (&devicep->lock);
4153 void
4154 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4155 size_t *sizes, unsigned short *kinds,
4156 unsigned int flags, void **depend)
4158 struct gomp_device_descr *devicep = resolve_device (device, true);
4160 /* If there are depend clauses, but nowait is not present,
4161 block the parent task until the dependencies are resolved
4162 and then just continue with the rest of the function as if it
4163 is a merged task. Until we are able to schedule task during
4164 variable mapping or unmapping, ignore nowait if depend clauses
4165 are not present. */
4166 if (depend != NULL)
4168 struct gomp_thread *thr = gomp_thread ();
4169 if (thr->task && thr->task->depend_hash)
4171 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4172 && thr->ts.team
4173 && !thr->task->final_task)
4175 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4176 mapnum, hostaddrs, sizes, kinds,
4177 flags, depend, NULL,
4178 GOMP_TARGET_TASK_DATA))
4179 return;
4181 else
4183 struct gomp_team *team = thr->ts.team;
4184 /* If parallel or taskgroup has been cancelled, don't start new
4185 tasks. */
4186 if (__builtin_expect (gomp_cancel_var, 0) && team)
4188 if (gomp_team_barrier_cancelled (&team->barrier))
4189 return;
4190 if (thr->task->taskgroup)
4192 if (thr->task->taskgroup->cancelled)
4193 return;
4194 if (thr->task->taskgroup->workshare
4195 && thr->task->taskgroup->prev
4196 && thr->task->taskgroup->prev->cancelled)
4197 return;
4201 gomp_task_maybe_wait_for_dependencies (depend);
4206 if (devicep == NULL
4207 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4208 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4209 return;
4211 struct gomp_thread *thr = gomp_thread ();
4212 struct gomp_team *team = thr->ts.team;
4213 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4214 if (__builtin_expect (gomp_cancel_var, 0) && team)
4216 if (gomp_team_barrier_cancelled (&team->barrier))
4217 return;
4218 if (thr->task->taskgroup)
4220 if (thr->task->taskgroup->cancelled)
4221 return;
4222 if (thr->task->taskgroup->workshare
4223 && thr->task->taskgroup->prev
4224 && thr->task->taskgroup->prev->cancelled)
4225 return;
4229 htab_t refcount_set = htab_create (mapnum);
4231 /* The variables are mapped separately such that they can be released
4232 independently. */
4233 size_t i, j;
4234 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4235 for (i = 0; i < mapnum; i++)
4236 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
4237 || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4239 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4240 &kinds[i], true, &refcount_set,
4241 GOMP_MAP_VARS_ENTER_DATA);
4242 i += sizes[i];
4244 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4246 for (j = i + 1; j < mapnum; j++)
4247 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4248 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4249 break;
4250 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4251 &kinds[i], true, &refcount_set,
4252 GOMP_MAP_VARS_ENTER_DATA);
4253 i += j - i - 1;
4255 else if (i + 1 < mapnum
4256 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4257 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4258 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4260 /* An attach operation must be processed together with the mapped
4261 base-pointer list item. */
4262 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4263 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4264 i += 1;
4266 else
4267 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4268 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4269 else
4270 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4271 htab_free (refcount_set);
4274 bool
4275 gomp_target_task_fn (void *data)
4277 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4278 struct gomp_device_descr *devicep = ttask->devicep;
4280 if (ttask->fn != NULL)
4282 void *fn_addr;
4283 if (devicep == NULL
4284 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4285 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4286 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4288 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4289 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4290 ttask->args);
4291 return false;
4294 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4296 if (ttask->tgt)
4297 gomp_unmap_vars (ttask->tgt, true, NULL);
4298 return false;
4301 void *actual_arguments;
4302 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4304 ttask->tgt = NULL;
4305 actual_arguments = ttask->hostaddrs;
4307 else
4309 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4310 NULL, ttask->sizes, ttask->kinds, true,
4311 NULL, GOMP_MAP_VARS_TARGET);
4312 actual_arguments = (void *) ttask->tgt->tgt_start;
4314 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4316 assert (devicep->async_run_func);
4317 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4318 ttask->args, (void *) ttask);
4319 return true;
4321 else if (devicep == NULL
4322 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4323 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4324 return false;
4326 size_t i;
4327 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4328 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4329 ttask->kinds, true);
4330 else
4332 htab_t refcount_set = htab_create (ttask->mapnum);
4333 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4334 for (i = 0; i < ttask->mapnum; i++)
4335 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
4336 || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4338 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4339 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4340 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4341 i += ttask->sizes[i];
4343 else
4344 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4345 &ttask->kinds[i], true, &refcount_set,
4346 GOMP_MAP_VARS_ENTER_DATA);
4347 else
4348 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4349 ttask->kinds, &refcount_set);
4350 htab_free (refcount_set);
4352 return false;
4355 /* Implement OpenMP 'teams' construct, legacy entry point. */
4357 void
4358 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4360 if (thread_limit)
4362 struct gomp_task_icv *icv = gomp_icv (true);
4363 icv->thread_limit_var
4364 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4366 (void) num_teams;
4369 /* Implement OpenMP 'teams' construct.
4371 Initialize upon FIRST call. Return whether this invocation is active.
4372 Depending on whether NUM_TEAMS_LOW asks for more teams than are provided
4373 in hardware, we may need to loop multiple times; in that case make sure to
4374 update the team-level variable used by 'omp_get_team_num'. */
4376 bool
4377 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4378 unsigned int thread_limit, bool first)
4380 struct gomp_thread *thr = gomp_thread ();
4381 if (first)
4383 if (thread_limit)
4385 struct gomp_task_icv *icv = gomp_icv (true);
4386 icv->thread_limit_var
4387 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4389 (void) num_teams_high;
4390 if (num_teams_low == 0)
4391 num_teams_low = 1;
4392 thr->num_teams = num_teams_low - 1;
4393 thr->team_num = 0;
4395 else if (thr->team_num == thr->num_teams)
4396 return false;
4397 else
4398 ++thr->team_num;
4399 return true;
4402 void *
4403 omp_target_alloc (size_t size, int device_num)
4405 if (device_num == omp_initial_device
4406 || device_num == gomp_get_num_devices ())
4407 return malloc (size);
4409 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4410 if (devicep == NULL)
4411 return NULL;
4413 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4414 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4415 return malloc (size);
4417 gomp_mutex_lock (&devicep->lock);
4418 void *ret = devicep->alloc_func (devicep->target_id, size);
4419 gomp_mutex_unlock (&devicep->lock);
4420 return ret;
4423 void
4424 omp_target_free (void *device_ptr, int device_num)
4426 if (device_num == omp_initial_device
4427 || device_num == gomp_get_num_devices ())
4429 free (device_ptr);
4430 return;
4433 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4434 if (devicep == NULL || device_ptr == NULL)
4435 return;
4437 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4438 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4440 free (device_ptr);
4441 return;
4444 gomp_mutex_lock (&devicep->lock);
4445 gomp_free_device_memory (devicep, device_ptr);
4446 gomp_mutex_unlock (&devicep->lock);
4450 omp_target_is_present (const void *ptr, int device_num)
4452 if (device_num == omp_initial_device
4453 || device_num == gomp_get_num_devices ())
4454 return 1;
4456 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4457 if (devicep == NULL)
4458 return 0;
4460 if (ptr == NULL)
4461 return 1;
4463 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4464 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4465 return 1;
4467 gomp_mutex_lock (&devicep->lock);
4468 struct splay_tree_s *mem_map = &devicep->mem_map;
4469 struct splay_tree_key_s cur_node;
4471 cur_node.host_start = (uintptr_t) ptr;
4472 cur_node.host_end = cur_node.host_start;
4473 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4474 int ret = n != NULL;
4475 gomp_mutex_unlock (&devicep->lock);
4476 return ret;
4479 static int
4480 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4481 struct gomp_device_descr **dst_devicep,
4482 struct gomp_device_descr **src_devicep)
4484 if (dst_device_num != gomp_get_num_devices ()
4485 /* Above gomp_get_num_devices has to be called unconditionally. */
4486 && dst_device_num != omp_initial_device)
4488 *dst_devicep = resolve_device (dst_device_num, false);
4489 if (*dst_devicep == NULL)
4490 return EINVAL;
4492 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4493 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4494 *dst_devicep = NULL;
4497 if (src_device_num != num_devices_openmp
4498 && src_device_num != omp_initial_device)
4500 *src_devicep = resolve_device (src_device_num, false);
4501 if (*src_devicep == NULL)
4502 return EINVAL;
4504 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4505 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4506 *src_devicep = NULL;
4509 return 0;
4512 static int
4513 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4514 size_t dst_offset, size_t src_offset,
4515 struct gomp_device_descr *dst_devicep,
4516 struct gomp_device_descr *src_devicep)
4518 bool ret;
4519 if (src_devicep == NULL && dst_devicep == NULL)
4521 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4522 return 0;
4524 if (src_devicep == NULL)
4526 gomp_mutex_lock (&dst_devicep->lock);
4527 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4528 (char *) dst + dst_offset,
4529 (char *) src + src_offset, length);
4530 gomp_mutex_unlock (&dst_devicep->lock);
4531 return (ret ? 0 : EINVAL);
4533 if (dst_devicep == NULL)
4535 gomp_mutex_lock (&src_devicep->lock);
4536 ret = src_devicep->dev2host_func (src_devicep->target_id,
4537 (char *) dst + dst_offset,
4538 (char *) src + src_offset, length);
4539 gomp_mutex_unlock (&src_devicep->lock);
4540 return (ret ? 0 : EINVAL);
4542 if (src_devicep == dst_devicep)
4544 gomp_mutex_lock (&src_devicep->lock);
4545 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4546 (char *) dst + dst_offset,
4547 (char *) src + src_offset, length);
4548 gomp_mutex_unlock (&src_devicep->lock);
4549 return (ret ? 0 : EINVAL);
4551 return EINVAL;
4555 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4556 size_t src_offset, int dst_device_num, int src_device_num)
4558 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4559 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4560 &dst_devicep, &src_devicep);
4562 if (ret)
4563 return ret;
4565 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4566 dst_devicep, src_devicep);
4568 return ret;
4571 typedef struct
4573 void *dst;
4574 const void *src;
4575 size_t length;
4576 size_t dst_offset;
4577 size_t src_offset;
4578 struct gomp_device_descr *dst_devicep;
4579 struct gomp_device_descr *src_devicep;
4580 } omp_target_memcpy_data;
4582 static void
4583 omp_target_memcpy_async_helper (void *args)
4585 omp_target_memcpy_data *a = args;
4586 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4587 a->src_offset, a->dst_devicep, a->src_devicep))
4588 gomp_fatal ("omp_target_memcpy failed");
4592 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4593 size_t dst_offset, size_t src_offset,
4594 int dst_device_num, int src_device_num,
4595 int depobj_count, omp_depend_t *depobj_list)
4597 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4598 unsigned int flags = 0;
4599 void *depend[depobj_count + 5];
4600 int i;
4601 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4602 &dst_devicep, &src_devicep);
4604 omp_target_memcpy_data s = {
4605 .dst = dst,
4606 .src = src,
4607 .length = length,
4608 .dst_offset = dst_offset,
4609 .src_offset = src_offset,
4610 .dst_devicep = dst_devicep,
4611 .src_devicep = src_devicep
4614 if (check)
4615 return check;
4617 if (depobj_count > 0 && depobj_list != NULL)
4619 flags |= GOMP_TASK_FLAG_DEPEND;
4620 depend[0] = 0;
4621 depend[1] = (void *) (uintptr_t) depobj_count;
4622 depend[2] = depend[3] = depend[4] = 0;
4623 for (i = 0; i < depobj_count; ++i)
4624 depend[i + 5] = &depobj_list[i];
4627 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4628 __alignof__ (s), true, flags, depend, 0, NULL);
4630 return 0;
4633 static int
4634 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4635 int num_dims, const size_t *volume,
4636 const size_t *dst_offsets,
4637 const size_t *src_offsets,
4638 const size_t *dst_dimensions,
4639 const size_t *src_dimensions,
4640 struct gomp_device_descr *dst_devicep,
4641 struct gomp_device_descr *src_devicep,
4642 size_t *tmp_size, void **tmp)
4644 size_t dst_slice = element_size;
4645 size_t src_slice = element_size;
4646 size_t j, dst_off, src_off, length;
4647 int i, ret;
4649 if (num_dims == 1)
4651 if (__builtin_mul_overflow (element_size, volume[0], &length)
4652 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4653 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4654 return EINVAL;
4655 if (dst_devicep == NULL && src_devicep == NULL)
4657 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4658 length);
4659 ret = 1;
4661 else if (src_devicep == NULL)
4662 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4663 (char *) dst + dst_off,
4664 (const char *) src + src_off,
4665 length);
4666 else if (dst_devicep == NULL)
4667 ret = src_devicep->dev2host_func (src_devicep->target_id,
4668 (char *) dst + dst_off,
4669 (const char *) src + src_off,
4670 length);
4671 else if (src_devicep == dst_devicep)
4672 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4673 (char *) dst + dst_off,
4674 (const char *) src + src_off,
4675 length);
4676 else
4678 if (*tmp_size == 0)
4680 *tmp_size = length;
4681 *tmp = malloc (length);
4682 if (*tmp == NULL)
4683 return ENOMEM;
4685 else if (*tmp_size < length)
4687 *tmp_size = length;
4688 free (*tmp);
4689 *tmp = malloc (length);
4690 if (*tmp == NULL)
4691 return ENOMEM;
4693 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4694 (const char *) src + src_off,
4695 length);
4696 if (ret == 1)
4697 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4698 (char *) dst + dst_off, *tmp,
4699 length);
4701 return ret ? 0 : EINVAL;
4704 /* host->device, device->host and intra device. */
4705 if (num_dims == 2
4706 && ((src_devicep
4707 && src_devicep == dst_devicep
4708 && src_devicep->memcpy2d_func)
4709 || (!src_devicep != !dst_devicep
4710 && ((src_devicep && src_devicep->memcpy2d_func)
4711 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4713 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4714 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4715 int src_id = src_devicep ? src_devicep->target_id : -1;
4716 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4718 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4719 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4720 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4721 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4722 || __builtin_mul_overflow (src_offsets[1], element_size,
4723 &src_off_sz1))
4724 return EINVAL;
4725 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4726 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4727 src, src_off_sz1, src_offsets[0], src_sz1);
4728 if (ret != -1)
4729 return ret ? 0 : EINVAL;
4731 else if (num_dims == 3
4732 && ((src_devicep
4733 && src_devicep == dst_devicep
4734 && src_devicep->memcpy3d_func)
4735 || (!src_devicep != !dst_devicep
4736 && ((src_devicep && src_devicep->memcpy3d_func)
4737 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4739 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4740 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4741 int src_id = src_devicep ? src_devicep->target_id : -1;
4742 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4744 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4745 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4746 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4747 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4748 || __builtin_mul_overflow (src_offsets[2], element_size,
4749 &src_off_sz2))
4750 return EINVAL;
4751 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4752 dst, dst_off_sz2, dst_offsets[1],
4753 dst_offsets[0], dst_sz2, dst_dimensions[1],
4754 src, src_off_sz2, src_offsets[1],
4755 src_offsets[0], src_sz2, src_dimensions[1]);
4756 if (ret != -1)
4757 return ret ? 0 : EINVAL;
4760 for (i = 1; i < num_dims; i++)
4761 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4762 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4763 return EINVAL;
4764 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4765 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4766 return EINVAL;
4767 for (j = 0; j < volume[0]; j++)
4769 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4770 (const char *) src + src_off,
4771 element_size, num_dims - 1,
4772 volume + 1, dst_offsets + 1,
4773 src_offsets + 1, dst_dimensions + 1,
4774 src_dimensions + 1, dst_devicep,
4775 src_devicep, tmp_size, tmp);
4776 if (ret)
4777 return ret;
4778 dst_off += dst_slice;
4779 src_off += src_slice;
4781 return 0;
4784 static int
4785 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4786 int src_device_num,
4787 struct gomp_device_descr **dst_devicep,
4788 struct gomp_device_descr **src_devicep)
4790 if (!dst && !src)
4791 return INT_MAX;
4793 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4794 dst_devicep, src_devicep);
4795 if (ret)
4796 return ret;
4798 return 0;
4801 static int
4802 omp_target_memcpy_rect_copy (void *dst, const void *src,
4803 size_t element_size, int num_dims,
4804 const size_t *volume, const size_t *dst_offsets,
4805 const size_t *src_offsets,
4806 const size_t *dst_dimensions,
4807 const size_t *src_dimensions,
4808 struct gomp_device_descr *dst_devicep,
4809 struct gomp_device_descr *src_devicep)
4811 size_t tmp_size = 0;
4812 void *tmp = NULL;
4813 bool lock_src;
4814 bool lock_dst;
4816 lock_src = src_devicep != NULL;
4817 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4818 if (lock_src)
4819 gomp_mutex_lock (&src_devicep->lock);
4820 if (lock_dst)
4821 gomp_mutex_lock (&dst_devicep->lock);
4822 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4823 volume, dst_offsets, src_offsets,
4824 dst_dimensions, src_dimensions,
4825 dst_devicep, src_devicep,
4826 &tmp_size, &tmp);
4827 if (lock_src)
4828 gomp_mutex_unlock (&src_devicep->lock);
4829 if (lock_dst)
4830 gomp_mutex_unlock (&dst_devicep->lock);
4831 if (tmp)
4832 free (tmp);
4834 return ret;
4838 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4839 int num_dims, const size_t *volume,
4840 const size_t *dst_offsets,
4841 const size_t *src_offsets,
4842 const size_t *dst_dimensions,
4843 const size_t *src_dimensions,
4844 int dst_device_num, int src_device_num)
4846 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4848 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4849 src_device_num, &dst_devicep,
4850 &src_devicep);
4852 if (check)
4853 return check;
4855 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4856 volume, dst_offsets, src_offsets,
4857 dst_dimensions, src_dimensions,
4858 dst_devicep, src_devicep);
4860 return ret;
4863 typedef struct
4865 void *dst;
4866 const void *src;
4867 size_t element_size;
4868 const size_t *volume;
4869 const size_t *dst_offsets;
4870 const size_t *src_offsets;
4871 const size_t *dst_dimensions;
4872 const size_t *src_dimensions;
4873 struct gomp_device_descr *dst_devicep;
4874 struct gomp_device_descr *src_devicep;
4875 int num_dims;
4876 } omp_target_memcpy_rect_data;
4878 static void
4879 omp_target_memcpy_rect_async_helper (void *args)
4881 omp_target_memcpy_rect_data *a = args;
4882 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4883 a->num_dims, a->volume, a->dst_offsets,
4884 a->src_offsets, a->dst_dimensions,
4885 a->src_dimensions, a->dst_devicep,
4886 a->src_devicep);
4887 if (ret)
4888 gomp_fatal ("omp_target_memcpy_rect failed");
4892 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4893 int num_dims, const size_t *volume,
4894 const size_t *dst_offsets,
4895 const size_t *src_offsets,
4896 const size_t *dst_dimensions,
4897 const size_t *src_dimensions,
4898 int dst_device_num, int src_device_num,
4899 int depobj_count, omp_depend_t *depobj_list)
4901 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4902 unsigned flags = 0;
4903 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4904 src_device_num, &dst_devicep,
4905 &src_devicep);
4906 void *depend[depobj_count + 5];
4907 int i;
4909 omp_target_memcpy_rect_data s = {
4910 .dst = dst,
4911 .src = src,
4912 .element_size = element_size,
4913 .num_dims = num_dims,
4914 .volume = volume,
4915 .dst_offsets = dst_offsets,
4916 .src_offsets = src_offsets,
4917 .dst_dimensions = dst_dimensions,
4918 .src_dimensions = src_dimensions,
4919 .dst_devicep = dst_devicep,
4920 .src_devicep = src_devicep
4923 if (check)
4924 return check;
4926 if (depobj_count > 0 && depobj_list != NULL)
4928 flags |= GOMP_TASK_FLAG_DEPEND;
4929 depend[0] = 0;
4930 depend[1] = (void *) (uintptr_t) depobj_count;
4931 depend[2] = depend[3] = depend[4] = 0;
4932 for (i = 0; i < depobj_count; ++i)
4933 depend[i + 5] = &depobj_list[i];
4936 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4937 __alignof__ (s), true, flags, depend, 0, NULL);
4939 return 0;
4943 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4944 size_t size, size_t device_offset, int device_num)
4946 if (device_num == omp_initial_device
4947 || device_num == gomp_get_num_devices ())
4948 return EINVAL;
4950 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4951 if (devicep == NULL)
4952 return EINVAL;
4954 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4955 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4956 return EINVAL;
4958 gomp_mutex_lock (&devicep->lock);
4960 struct splay_tree_s *mem_map = &devicep->mem_map;
4961 struct splay_tree_key_s cur_node;
4962 int ret = EINVAL;
4964 cur_node.host_start = (uintptr_t) host_ptr;
4965 cur_node.host_end = cur_node.host_start + size;
4966 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4967 if (n)
4969 if (n->tgt->tgt_start + n->tgt_offset
4970 == (uintptr_t) device_ptr + device_offset
4971 && n->host_start <= cur_node.host_start
4972 && n->host_end >= cur_node.host_end)
4973 ret = 0;
4975 else
4977 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4978 tgt->array = gomp_malloc (sizeof (*tgt->array));
4979 tgt->refcount = 1;
4980 tgt->tgt_start = 0;
4981 tgt->tgt_end = 0;
4982 tgt->to_free = NULL;
4983 tgt->prev = NULL;
4984 tgt->list_count = 0;
4985 tgt->device_descr = devicep;
4986 splay_tree_node array = tgt->array;
4987 splay_tree_key k = &array->key;
4988 k->host_start = cur_node.host_start;
4989 k->host_end = cur_node.host_end;
4990 k->tgt = tgt;
4991 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4992 k->refcount = REFCOUNT_INFINITY;
4993 k->dynamic_refcount = 0;
4994 k->aux = NULL;
4995 array->left = NULL;
4996 array->right = NULL;
4997 splay_tree_insert (&devicep->mem_map, array);
4998 ret = 0;
5000 gomp_mutex_unlock (&devicep->lock);
5001 return ret;
5005 omp_target_disassociate_ptr (const void *ptr, int device_num)
5007 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5008 if (devicep == NULL)
5009 return EINVAL;
5011 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5012 return EINVAL;
5014 gomp_mutex_lock (&devicep->lock);
5016 struct splay_tree_s *mem_map = &devicep->mem_map;
5017 struct splay_tree_key_s cur_node;
5018 int ret = EINVAL;
5020 cur_node.host_start = (uintptr_t) ptr;
5021 cur_node.host_end = cur_node.host_start;
5022 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
5023 if (n
5024 && n->host_start == cur_node.host_start
5025 && n->refcount == REFCOUNT_INFINITY
5026 && n->tgt->tgt_start == 0
5027 && n->tgt->to_free == NULL
5028 && n->tgt->refcount == 1
5029 && n->tgt->list_count == 0)
5031 splay_tree_remove (&devicep->mem_map, n);
5032 gomp_unmap_tgt (n->tgt);
5033 ret = 0;
5036 gomp_mutex_unlock (&devicep->lock);
5037 return ret;
5040 void *
5041 omp_get_mapped_ptr (const void *ptr, int device_num)
5043 if (device_num == omp_initial_device
5044 || device_num == omp_get_initial_device ())
5045 return (void *) ptr;
5047 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5048 if (devicep == NULL)
5049 return NULL;
5051 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5052 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5053 return (void *) ptr;
5055 gomp_mutex_lock (&devicep->lock);
5057 struct splay_tree_s *mem_map = &devicep->mem_map;
5058 struct splay_tree_key_s cur_node;
5059 void *ret = NULL;
5061 cur_node.host_start = (uintptr_t) ptr;
5062 cur_node.host_end = cur_node.host_start;
5063 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5065 if (n)
5067 uintptr_t offset = cur_node.host_start - n->host_start;
5068 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5071 gomp_mutex_unlock (&devicep->lock);
5073 return ret;
5077 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5079 if (device_num == omp_initial_device
5080 || device_num == gomp_get_num_devices ())
5081 return true;
5083 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5084 if (devicep == NULL)
5085 return false;
5087 /* TODO: Unified shared memory must be handled when available. */
5089 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
5093 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5095 (void) kind;
5096 if (device_num == omp_initial_device
5097 || device_num == gomp_get_num_devices ())
5098 return gomp_pause_host ();
5100 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5101 if (devicep == NULL)
5102 return -1;
5104 /* Do nothing for target devices for now. */
5105 return 0;
5109 omp_pause_resource_all (omp_pause_resource_t kind)
5111 (void) kind;
5112 if (gomp_pause_host ())
5113 return -1;
5114 /* Do nothing for target devices for now. */
5115 return 0;
5118 ialias (omp_pause_resource)
5119 ialias (omp_pause_resource_all)
5121 #ifdef PLUGIN_SUPPORT
5123 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5124 in PLUGIN_NAME.
5125 The handles of the found functions are stored in the corresponding fields
5126 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5128 static bool
5129 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5130 const char *plugin_name)
5132 const char *err = NULL, *last_missing = NULL;
5134 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5135 if (!plugin_handle)
5136 #if OFFLOAD_DEFAULTED
5137 return 0;
5138 #else
5139 goto dl_fail;
5140 #endif
5142 /* Check if all required functions are available in the plugin and store
5143 their handlers. None of the symbols can legitimately be NULL,
5144 so we don't need to check dlerror all the time. */
5145 #define DLSYM(f) \
5146 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5147 goto dl_fail
5148 /* Similar, but missing functions are not an error. Return false if
5149 failed, true otherwise. */
5150 #define DLSYM_OPT(f, n) \
5151 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5152 || (last_missing = #n, 0))
5154 DLSYM (version);
5155 if (device->version_func () != GOMP_VERSION)
5157 err = "plugin version mismatch";
5158 goto fail;
5161 DLSYM (get_name);
5162 DLSYM (get_caps);
5163 DLSYM (get_type);
5164 DLSYM (get_num_devices);
5165 DLSYM (init_device);
5166 DLSYM (fini_device);
5167 DLSYM (load_image);
5168 DLSYM (unload_image);
5169 DLSYM (alloc);
5170 DLSYM (free);
5171 DLSYM (dev2host);
5172 DLSYM (host2dev);
5173 DLSYM_OPT (memcpy2d, memcpy2d);
5174 DLSYM_OPT (memcpy3d, memcpy3d);
5175 device->capabilities = device->get_caps_func ();
5176 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5178 DLSYM (run);
5179 DLSYM_OPT (async_run, async_run);
5180 DLSYM_OPT (can_run, can_run);
5181 DLSYM (dev2dev);
5183 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5185 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5186 || !DLSYM_OPT (openacc.create_thread_data,
5187 openacc_create_thread_data)
5188 || !DLSYM_OPT (openacc.destroy_thread_data,
5189 openacc_destroy_thread_data)
5190 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5191 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5192 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5193 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5194 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5195 || !DLSYM_OPT (openacc.async.queue_callback,
5196 openacc_async_queue_callback)
5197 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5198 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5199 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5200 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5202 /* Require all the OpenACC handlers if we have
5203 GOMP_OFFLOAD_CAP_OPENACC_200. */
5204 err = "plugin missing OpenACC handler function";
5205 goto fail;
5208 unsigned cuda = 0;
5209 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5210 openacc_cuda_get_current_device);
5211 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5212 openacc_cuda_get_current_context);
5213 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5214 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5215 if (cuda && cuda != 4)
5217 /* Make sure all the CUDA functions are there if any of them are. */
5218 err = "plugin missing OpenACC CUDA handler function";
5219 goto fail;
5222 #undef DLSYM
5223 #undef DLSYM_OPT
5225 return 1;
5227 dl_fail:
5228 err = dlerror ();
5229 fail:
5230 gomp_error ("while loading %s: %s", plugin_name, err);
5231 if (last_missing)
5232 gomp_error ("missing function was %s", last_missing);
5233 if (plugin_handle)
5234 dlclose (plugin_handle);
5236 return 0;
5239 /* This function finalizes all initialized devices. */
5241 static void
5242 gomp_target_fini (void)
5244 int i;
5245 for (i = 0; i < num_devices; i++)
5247 bool ret = true;
5248 struct gomp_device_descr *devicep = &devices[i];
5249 gomp_mutex_lock (&devicep->lock);
5250 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5251 ret = gomp_fini_device (devicep);
5252 gomp_mutex_unlock (&devicep->lock);
5253 if (!ret)
5254 gomp_fatal ("device finalization failed");
5258 /* This function initializes the runtime for offloading.
5259 It parses the list of offload plugins, and tries to load these.
5260 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5261 will be set, and the array DEVICES initialized, containing descriptors for
5262 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5263 by the others. */
5265 static void
5266 gomp_target_init (void)
5268 const char *prefix ="libgomp-plugin-";
5269 const char *suffix = SONAME_SUFFIX (1);
5270 const char *cur, *next;
5271 char *plugin_name;
5272 int i, new_num_devs;
5273 int num_devs = 0, num_devs_openmp;
5274 struct gomp_device_descr *devs = NULL;
5276 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5277 return;
5279 cur = OFFLOAD_PLUGINS;
5280 if (*cur)
5283 struct gomp_device_descr current_device;
5284 size_t prefix_len, suffix_len, cur_len;
5286 next = strchr (cur, ',');
5288 prefix_len = strlen (prefix);
5289 cur_len = next ? next - cur : strlen (cur);
5290 suffix_len = strlen (suffix);
5292 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5293 if (!plugin_name)
5295 num_devs = 0;
5296 break;
5299 memcpy (plugin_name, prefix, prefix_len);
5300 memcpy (plugin_name + prefix_len, cur, cur_len);
5301 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5303 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5305 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5306 new_num_devs = current_device.get_num_devices_func (omp_req);
5307 if (gomp_debug_var > 0 && new_num_devs < 0)
5309 bool found = false;
5310 int type = current_device.get_type_func ();
5311 for (int img = 0; img < num_offload_images; img++)
5312 if (type == offload_images[img].type)
5313 found = true;
5314 if (found)
5316 char buf[sizeof ("unified_address, unified_shared_memory, "
5317 "reverse_offload")];
5318 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5319 char *name = (char *) malloc (cur_len + 1);
5320 memcpy (name, cur, cur_len);
5321 name[cur_len] = '\0';
5322 gomp_debug (1,
5323 "%s devices present but 'omp requires %s' "
5324 "cannot be fulfilled\n", name, buf);
5325 free (name);
5328 else if (new_num_devs >= 1)
5330 /* Augment DEVICES and NUM_DEVICES. */
5332 /* If USM has been requested and is supported by all devices
5333 of this type, set the capability accordingly. */
5334 if (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
5335 current_device.capabilities |= GOMP_OFFLOAD_CAP_SHARED_MEM;
5337 devs = realloc (devs, (num_devs + new_num_devs)
5338 * sizeof (struct gomp_device_descr));
5339 if (!devs)
5341 num_devs = 0;
5342 free (plugin_name);
5343 break;
5346 current_device.name = current_device.get_name_func ();
5347 /* current_device.capabilities has already been set. */
5348 current_device.type = current_device.get_type_func ();
5349 current_device.mem_map.root = NULL;
5350 current_device.mem_map_rev.root = NULL;
5351 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5352 for (i = 0; i < new_num_devs; i++)
5354 current_device.target_id = i;
5355 devs[num_devs] = current_device;
5356 gomp_mutex_init (&devs[num_devs].lock);
5357 num_devs++;
5362 free (plugin_name);
5363 cur = next + 1;
5365 while (next);
5367 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5368 NUM_DEVICES_OPENMP. */
5369 struct gomp_device_descr *devs_s
5370 = malloc (num_devs * sizeof (struct gomp_device_descr));
5371 if (!devs_s)
5373 num_devs = 0;
5374 free (devs);
5375 devs = NULL;
5377 num_devs_openmp = 0;
5378 for (i = 0; i < num_devs; i++)
5379 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5380 devs_s[num_devs_openmp++] = devs[i];
5381 int num_devs_after_openmp = num_devs_openmp;
5382 for (i = 0; i < num_devs; i++)
5383 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5384 devs_s[num_devs_after_openmp++] = devs[i];
5385 free (devs);
5386 devs = devs_s;
5388 for (i = 0; i < num_devs; i++)
5390 /* The 'devices' array can be moved (by the realloc call) until we have
5391 found all the plugins, so registering with the OpenACC runtime (which
5392 takes a copy of the pointer argument) must be delayed until now. */
5393 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5394 goacc_register (&devs[i]);
5396 if (gomp_global_icv.default_device_var == INT_MIN)
5398 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5399 struct gomp_icv_list *none;
5400 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5401 gomp_global_icv.default_device_var = (num_devs_openmp
5402 ? 0 : omp_invalid_device);
5403 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5406 num_devices = num_devs;
5407 num_devices_openmp = num_devs_openmp;
5408 devices = devs;
5409 if (atexit (gomp_target_fini) != 0)
5410 gomp_fatal ("atexit failed");
5413 #else /* PLUGIN_SUPPORT */
5414 /* If dlfcn.h is unavailable we always fallback to host execution.
5415 GOMP_target* routines are just stubs for this case. */
5416 static void
5417 gomp_target_init (void)
5420 #endif /* PLUGIN_SUPPORT */