Daily bump.
[official-gcc.git] / libgomp / target.c
blob5ec19ae489ec841e6a002ffd4953dcd6e0139404
1 /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
51 #define splay_tree_c
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type;
56 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
57 static inline void htab_free (void *ptr) { free (ptr); }
58 #include "hashtab.h"
60 ialias_redirect (GOMP_task)
62 static inline hashval_t
63 htab_hash (hash_entry_type element)
65 return hash_pointer ((void *) element);
68 static inline bool
69 htab_eq (hash_entry_type x, hash_entry_type y)
71 return x == y;
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr {
88 unsigned version;
89 enum offload_target_type type;
90 const void *host_table;
91 const void *target_data;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr *offload_images;
97 /* Total number of offload images. */
98 static int num_offload_images;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr *devices;
103 /* Total number of available devices. */
104 static int num_devices;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 static void *
115 gomp_realloc_unlock (void *old, size_t size)
117 void *ret = realloc (old, size);
118 if (ret == NULL)
120 gomp_mutex_unlock (&register_lock);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
123 return ret;
126 attribute_hidden void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
132 attribute_hidden int
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp;
139 static struct gomp_device_descr *
140 resolve_device (int device_id, bool remapped)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices = gomp_get_num_devices ();
146 if (remapped && device_id == GOMP_DEVICE_ICV)
148 struct gomp_task_icv *icv = gomp_icv (false);
149 device_id = icv->default_device_var;
150 remapped = false;
153 if (device_id < 0)
155 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device))
157 return NULL;
158 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
159 && num_devices == 0)
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id == omp_invalid_device)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
168 return NULL;
170 else if (device_id >= num_devices)
172 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id != num_devices)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
177 return NULL;
180 gomp_mutex_lock (&devices[device_id].lock);
181 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
182 gomp_init_device (&devices[device_id]);
183 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
185 gomp_mutex_unlock (&devices[device_id].lock);
187 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
191 return NULL;
193 gomp_mutex_unlock (&devices[device_id].lock);
195 return &devices[device_id];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
202 if (key->host_start != key->host_end)
203 return splay_tree_lookup (mem_map, key);
205 key->host_end++;
206 splay_tree_key n = splay_tree_lookup (mem_map, key);
207 key->host_end--;
208 if (n)
209 return n;
210 key->host_start--;
211 n = splay_tree_lookup (mem_map, key);
212 key->host_start++;
213 if (n)
214 return n;
215 return splay_tree_lookup (mem_map, key);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
221 return reverse_splay_tree_lookup (mem_map_rev, key);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
227 if (key->host_start != key->host_end)
228 return splay_tree_lookup (mem_map, key);
230 key->host_end++;
231 splay_tree_key n = splay_tree_lookup (mem_map, key);
232 key->host_end--;
233 return n;
236 static inline void
237 gomp_device_copy (struct gomp_device_descr *devicep,
238 bool (*copy_func) (int, void *, const void *, size_t),
239 const char *dst, void *dstaddr,
240 const char *src, const void *srcaddr,
241 size_t size)
243 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
245 gomp_mutex_unlock (&devicep->lock);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
251 static inline void
252 goacc_device_copy_async (struct gomp_device_descr *devicep,
253 bool (*copy_func) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue *),
255 const char *dst, void *dstaddr,
256 const char *src, const void *srcaddr,
257 const void *srcaddr_orig,
258 size_t size, struct goacc_asyncqueue *aq)
260 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
262 gomp_mutex_unlock (&devicep->lock);
263 if (srcaddr_orig && srcaddr_orig != srcaddr)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src, srcaddr_orig, srcaddr_orig + size,
268 src, srcaddr, srcaddr + size,
269 dst, dstaddr, dstaddr + size);
270 else
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src, srcaddr, srcaddr + size,
274 dst, dstaddr, dstaddr + size);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
284 size_t start, end;
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
291 void *buf;
292 struct target_mem_desc *tgt;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk *chunks;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
298 be performed. */
299 long chunk_cnt;
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
303 long use_cnt;
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
325 static inline void
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
328 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
329 return;
330 if (cbuf->chunk_cnt)
332 if (cbuf->chunk_cnt < 0)
333 return;
334 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
336 cbuf->chunk_cnt = -1;
337 return;
339 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
341 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
342 cbuf->use_cnt++;
343 return;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf->use_cnt == 1)
349 cbuf->chunk_cnt--;
351 cbuf->chunks[cbuf->chunk_cnt].start = start;
352 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
353 cbuf->chunk_cnt++;
354 cbuf->use_cnt = 1;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
360 static inline bool
361 gomp_to_device_kind_p (int kind)
363 switch (kind)
365 case GOMP_MAP_ALLOC:
366 case GOMP_MAP_FROM:
367 case GOMP_MAP_FORCE_ALLOC:
368 case GOMP_MAP_FORCE_FROM:
369 case GOMP_MAP_ALWAYS_FROM:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM:
371 case GOMP_MAP_FORCE_PRESENT:
372 return false;
373 default:
374 return true;
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
381 TRUE. */
383 attribute_hidden void
384 gomp_copy_host2dev (struct gomp_device_descr *devicep,
385 struct goacc_asyncqueue *aq,
386 void *d, const void *h, size_t sz,
387 bool ephemeral, struct gomp_coalesce_buf *cbuf)
389 if (cbuf)
391 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
392 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
394 long first = 0;
395 long last = cbuf->chunk_cnt - 1;
396 while (first <= last)
398 long middle = (first + last) >> 1;
399 if (cbuf->chunks[middle].end <= doff)
400 first = middle + 1;
401 else if (cbuf->chunks[middle].start <= doff)
403 if (doff + sz > cbuf->chunks[middle].end)
405 gomp_mutex_unlock (&devicep->lock);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq != NULL, 0))
412 assert (ephemeral);
414 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
415 h, sz);
416 return;
418 else
419 last = middle - 1;
424 if (__builtin_expect (aq != NULL, 0))
426 void *h_buf = (void *) h;
427 if (ephemeral)
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
433 temporary buffer. */
434 h_buf = gomp_malloc (sz);
435 memcpy (h_buf, h, sz);
437 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
438 "dev", d, "host", h_buf, h, sz, aq);
439 if (ephemeral)
440 /* Free once the transfer has completed. */
441 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
443 else
444 gomp_device_copy (devicep, devicep->host2dev_func,
445 "dev", d, "host", h, sz);
448 attribute_hidden void
449 gomp_copy_dev2host (struct gomp_device_descr *devicep,
450 struct goacc_asyncqueue *aq,
451 void *h, const void *d, size_t sz)
453 if (__builtin_expect (aq != NULL, 0))
454 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
455 "host", h, "dev", d, NULL, sz, aq);
456 else
457 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
460 static void
461 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
463 if (!devicep->free_func (devicep->target_id, devptr))
465 gomp_mutex_unlock (&devicep->lock);
466 gomp_fatal ("error in freeing device memory block at %p", devptr);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
476 static inline void
477 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
479 if (k == NULL
480 || k->refcount == REFCOUNT_INFINITY
481 || k->refcount == REFCOUNT_ACC_MAP_DATA)
482 return;
484 uintptr_t *refcount_ptr = &k->refcount;
486 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
487 refcount_ptr = &k->structelem_refcount;
488 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
489 refcount_ptr = k->structelem_refcount_ptr;
491 if (refcount_set)
493 if (htab_find (*refcount_set, refcount_ptr))
494 return;
495 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
496 *slot = refcount_ptr;
499 *refcount_ptr += 1;
500 return;
503 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
504 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
505 track already seen refcounts, and only adjust the value if refcount is not
506 yet contained in the set (like gomp_increment_refcount).
508 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
509 it is already zero and we know we decremented it earlier. This signals that
510 associated maps should be copied back to host.
512 *DO_REMOVE is set to true when we this is the first handling of this refcount
513 and we are setting it to zero. This signals a removal of this key from the
514 splay-tree map.
516 Copy and removal are separated due to cases like handling of structure
517 elements, e.g. each map of a structure element representing a possible copy
518 out of a structure field has to be handled individually, but we only signal
519 removal for one (the first encountered) sibing map. */
521 static inline void
522 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
523 bool *do_copy, bool *do_remove)
525 if (k == NULL
526 || k->refcount == REFCOUNT_INFINITY
527 || k->refcount == REFCOUNT_ACC_MAP_DATA)
529 *do_copy = *do_remove = false;
530 return;
533 uintptr_t *refcount_ptr = &k->refcount;
535 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
536 refcount_ptr = &k->structelem_refcount;
537 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
538 refcount_ptr = k->structelem_refcount_ptr;
540 bool new_encountered_refcount;
541 bool set_to_zero = false;
542 bool is_zero = false;
544 uintptr_t orig_refcount = *refcount_ptr;
546 if (refcount_set)
548 if (htab_find (*refcount_set, refcount_ptr))
550 new_encountered_refcount = false;
551 goto end;
554 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
555 *slot = refcount_ptr;
556 new_encountered_refcount = true;
558 else
559 /* If no refcount_set being used, assume all keys are being decremented
560 for the first time. */
561 new_encountered_refcount = true;
563 if (delete_p)
564 *refcount_ptr = 0;
565 else if (*refcount_ptr > 0)
566 *refcount_ptr -= 1;
568 end:
569 if (*refcount_ptr == 0)
571 if (orig_refcount > 0)
572 set_to_zero = true;
574 is_zero = true;
577 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
578 *do_remove = (new_encountered_refcount && set_to_zero);
581 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
582 gomp_map_0len_lookup found oldn for newn.
583 Helper function of gomp_map_vars. */
585 static inline void
586 gomp_map_vars_existing (struct gomp_device_descr *devicep,
587 struct goacc_asyncqueue *aq, splay_tree_key oldn,
588 splay_tree_key newn, struct target_var_desc *tgt_var,
589 unsigned char kind, bool always_to_flag, bool implicit,
590 struct gomp_coalesce_buf *cbuf,
591 htab_t *refcount_set)
593 assert (kind != GOMP_MAP_ATTACH
594 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
596 tgt_var->key = oldn;
597 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
598 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
599 tgt_var->is_attach = false;
600 tgt_var->offset = newn->host_start - oldn->host_start;
602 /* For implicit maps, old contained in new is valid. */
603 bool implicit_subset = (implicit
604 && newn->host_start <= oldn->host_start
605 && oldn->host_end <= newn->host_end);
606 if (implicit_subset)
607 tgt_var->length = oldn->host_end - oldn->host_start;
608 else
609 tgt_var->length = newn->host_end - newn->host_start;
611 if (GOMP_MAP_FORCE_P (kind)
612 /* For implicit maps, old contained in new is valid. */
613 || !(implicit_subset
614 /* Otherwise, new contained inside old is considered valid. */
615 || (oldn->host_start <= newn->host_start
616 && newn->host_end <= oldn->host_end)))
618 gomp_mutex_unlock (&devicep->lock);
619 gomp_fatal ("Trying to map into device [%p..%p) object when "
620 "[%p..%p) is already mapped",
621 (void *) newn->host_start, (void *) newn->host_end,
622 (void *) oldn->host_start, (void *) oldn->host_end);
625 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
627 /* Implicit + always should not happen. If this does occur, below
628 address/length adjustment is a TODO. */
629 assert (!implicit_subset);
631 if (oldn->aux && oldn->aux->attach_count)
633 /* We have to be careful not to overwrite still attached pointers
634 during the copyback to host. */
635 uintptr_t addr = newn->host_start;
636 while (addr < newn->host_end)
638 size_t i = (addr - oldn->host_start) / sizeof (void *);
639 if (oldn->aux->attach_count[i] == 0)
640 gomp_copy_host2dev (devicep, aq,
641 (void *) (oldn->tgt->tgt_start
642 + oldn->tgt_offset
643 + addr - oldn->host_start),
644 (void *) addr,
645 sizeof (void *), false, cbuf);
646 addr += sizeof (void *);
649 else
650 gomp_copy_host2dev (devicep, aq,
651 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
652 + newn->host_start - oldn->host_start),
653 (void *) newn->host_start,
654 newn->host_end - newn->host_start, false, cbuf);
657 gomp_increment_refcount (oldn, refcount_set);
660 static int
661 get_kind (bool short_mapkind, void *kinds, int idx)
663 if (!short_mapkind)
664 return ((unsigned char *) kinds)[idx];
666 int val = ((unsigned short *) kinds)[idx];
667 if (GOMP_MAP_IMPLICIT_P (val))
668 val &= ~GOMP_MAP_IMPLICIT;
669 return val;
673 static bool
674 get_implicit (bool short_mapkind, void *kinds, int idx)
676 if (!short_mapkind)
677 return false;
679 int val = ((unsigned short *) kinds)[idx];
680 return GOMP_MAP_IMPLICIT_P (val);
683 static void
684 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
685 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
686 struct gomp_coalesce_buf *cbuf,
687 bool allow_zero_length_array_sections)
689 struct gomp_device_descr *devicep = tgt->device_descr;
690 struct splay_tree_s *mem_map = &devicep->mem_map;
691 struct splay_tree_key_s cur_node;
693 cur_node.host_start = host_ptr;
694 if (cur_node.host_start == (uintptr_t) NULL)
696 cur_node.tgt_offset = (uintptr_t) NULL;
697 gomp_copy_host2dev (devicep, aq,
698 (void *) (tgt->tgt_start + target_offset),
699 (void *) &cur_node.tgt_offset, sizeof (void *),
700 true, cbuf);
701 return;
703 /* Add bias to the pointer value. */
704 cur_node.host_start += bias;
705 cur_node.host_end = cur_node.host_start;
706 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
707 if (n == NULL)
709 if (allow_zero_length_array_sections)
710 cur_node.tgt_offset = cur_node.host_start;
711 else
713 gomp_mutex_unlock (&devicep->lock);
714 gomp_fatal ("Pointer target of array section wasn't mapped");
717 else
719 cur_node.host_start -= n->host_start;
720 cur_node.tgt_offset
721 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
722 /* At this point tgt_offset is target address of the
723 array section. Now subtract bias to get what we want
724 to initialize the pointer with. */
725 cur_node.tgt_offset -= bias;
727 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
728 (void *) &cur_node.tgt_offset, sizeof (void *),
729 true, cbuf);
732 static void
733 gomp_map_fields_existing (struct target_mem_desc *tgt,
734 struct goacc_asyncqueue *aq, splay_tree_key n,
735 size_t first, size_t i, void **hostaddrs,
736 size_t *sizes, void *kinds,
737 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
739 struct gomp_device_descr *devicep = tgt->device_descr;
740 struct splay_tree_s *mem_map = &devicep->mem_map;
741 struct splay_tree_key_s cur_node;
742 int kind;
743 bool implicit;
744 const bool short_mapkind = true;
745 const int typemask = short_mapkind ? 0xff : 0x7;
747 cur_node.host_start = (uintptr_t) hostaddrs[i];
748 cur_node.host_end = cur_node.host_start + sizes[i];
749 splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
750 kind = get_kind (short_mapkind, kinds, i);
751 implicit = get_implicit (short_mapkind, kinds, i);
752 if (n2
753 && n2->tgt == n->tgt
754 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
756 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
757 kind & typemask, false, implicit, cbuf,
758 refcount_set);
759 return;
761 if (sizes[i] == 0)
763 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
765 cur_node.host_start--;
766 n2 = splay_tree_lookup (mem_map, &cur_node);
767 cur_node.host_start++;
768 if (n2
769 && n2->tgt == n->tgt
770 && n2->host_start - n->host_start
771 == n2->tgt_offset - n->tgt_offset)
773 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
774 kind & typemask, false, implicit, cbuf,
775 refcount_set);
776 return;
779 cur_node.host_end++;
780 n2 = splay_tree_lookup (mem_map, &cur_node);
781 cur_node.host_end--;
782 if (n2
783 && n2->tgt == n->tgt
784 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
786 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
787 kind & typemask, false, implicit, cbuf,
788 refcount_set);
789 return;
792 gomp_mutex_unlock (&devicep->lock);
793 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
794 "other mapped elements from the same structure weren't mapped "
795 "together with it", (void *) cur_node.host_start,
796 (void *) cur_node.host_end);
799 attribute_hidden void
800 gomp_attach_pointer (struct gomp_device_descr *devicep,
801 struct goacc_asyncqueue *aq, splay_tree mem_map,
802 splay_tree_key n, uintptr_t attach_to, size_t bias,
803 struct gomp_coalesce_buf *cbufp,
804 bool allow_zero_length_array_sections)
806 struct splay_tree_key_s s;
807 size_t size, idx;
809 if (n == NULL)
811 gomp_mutex_unlock (&devicep->lock);
812 gomp_fatal ("enclosing struct not mapped for attach");
815 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
816 /* We might have a pointer in a packed struct: however we cannot have more
817 than one such pointer in each pointer-sized portion of the struct, so
818 this is safe. */
819 idx = (attach_to - n->host_start) / sizeof (void *);
821 if (!n->aux)
822 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
824 if (!n->aux->attach_count)
825 n->aux->attach_count
826 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
828 if (n->aux->attach_count[idx] < UINTPTR_MAX)
829 n->aux->attach_count[idx]++;
830 else
832 gomp_mutex_unlock (&devicep->lock);
833 gomp_fatal ("attach count overflow");
836 if (n->aux->attach_count[idx] == 1)
838 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
839 - n->host_start;
840 uintptr_t target = (uintptr_t) *(void **) attach_to;
841 splay_tree_key tn;
842 uintptr_t data;
844 if ((void *) target == NULL)
846 /* As a special case, allow attaching NULL host pointers. This
847 allows e.g. unassociated Fortran pointers to be mapped
848 properly. */
849 data = 0;
851 gomp_debug (1,
852 "%s: attaching NULL host pointer, target %p "
853 "(struct base %p)\n", __FUNCTION__, (void *) devptr,
854 (void *) (n->tgt->tgt_start + n->tgt_offset));
856 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
857 sizeof (void *), true, cbufp);
859 return;
862 s.host_start = target + bias;
863 s.host_end = s.host_start + 1;
864 tn = splay_tree_lookup (mem_map, &s);
866 if (!tn)
868 if (allow_zero_length_array_sections)
869 /* When allowing attachment to zero-length array sections, we
870 copy the host pointer when the target region is not mapped. */
871 data = target;
872 else
874 gomp_mutex_unlock (&devicep->lock);
875 gomp_fatal ("pointer target not mapped for attach");
878 else
879 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
881 gomp_debug (1,
882 "%s: attaching host %p, target %p (struct base %p) to %p\n",
883 __FUNCTION__, (void *) attach_to, (void *) devptr,
884 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
886 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
887 sizeof (void *), true, cbufp);
889 else
890 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
891 (void *) attach_to, (int) n->aux->attach_count[idx]);
894 attribute_hidden void
895 gomp_detach_pointer (struct gomp_device_descr *devicep,
896 struct goacc_asyncqueue *aq, splay_tree_key n,
897 uintptr_t detach_from, bool finalize,
898 struct gomp_coalesce_buf *cbufp)
900 size_t idx;
902 if (n == NULL)
904 gomp_mutex_unlock (&devicep->lock);
905 gomp_fatal ("enclosing struct not mapped for detach");
908 idx = (detach_from - n->host_start) / sizeof (void *);
910 if (!n->aux || !n->aux->attach_count)
912 gomp_mutex_unlock (&devicep->lock);
913 gomp_fatal ("no attachment counters for struct");
916 if (finalize)
917 n->aux->attach_count[idx] = 1;
919 if (n->aux->attach_count[idx] == 0)
921 gomp_mutex_unlock (&devicep->lock);
922 gomp_fatal ("attach count underflow");
924 else
925 n->aux->attach_count[idx]--;
927 if (n->aux->attach_count[idx] == 0)
929 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
930 - n->host_start;
931 uintptr_t target = (uintptr_t) *(void **) detach_from;
933 gomp_debug (1,
934 "%s: detaching host %p, target %p (struct base %p) to %p\n",
935 __FUNCTION__, (void *) detach_from, (void *) devptr,
936 (void *) (n->tgt->tgt_start + n->tgt_offset),
937 (void *) target);
939 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
940 sizeof (void *), true, cbufp);
942 else
943 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
944 (void *) detach_from, (int) n->aux->attach_count[idx]);
947 attribute_hidden uintptr_t
948 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
950 if (tgt->list[i].key != NULL)
951 return tgt->list[i].key->tgt->tgt_start
952 + tgt->list[i].key->tgt_offset
953 + tgt->list[i].offset;
955 switch (tgt->list[i].offset)
957 case OFFSET_INLINED:
958 return (uintptr_t) hostaddrs[i];
960 case OFFSET_POINTER:
961 return 0;
963 case OFFSET_STRUCT:
964 return tgt->list[i + 1].key->tgt->tgt_start
965 + tgt->list[i + 1].key->tgt_offset
966 + tgt->list[i + 1].offset
967 + (uintptr_t) hostaddrs[i]
968 - (uintptr_t) hostaddrs[i + 1];
970 default:
971 return tgt->tgt_start + tgt->list[i].offset;
975 static inline __attribute__((always_inline)) struct target_mem_desc *
976 gomp_map_vars_internal (struct gomp_device_descr *devicep,
977 struct goacc_asyncqueue *aq, size_t mapnum,
978 void **hostaddrs, void **devaddrs, size_t *sizes,
979 void *kinds, bool short_mapkind,
980 htab_t *refcount_set,
981 enum gomp_map_vars_kind pragma_kind)
983 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
984 bool has_firstprivate = false;
985 bool has_always_ptrset = false;
986 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
987 const int rshift = short_mapkind ? 8 : 3;
988 const int typemask = short_mapkind ? 0xff : 0x7;
989 struct splay_tree_s *mem_map = &devicep->mem_map;
990 struct splay_tree_key_s cur_node;
991 struct target_mem_desc *tgt
992 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
993 tgt->list_count = mapnum;
994 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
995 tgt->device_descr = devicep;
996 tgt->prev = NULL;
997 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
999 if (mapnum == 0)
1001 tgt->tgt_start = 0;
1002 tgt->tgt_end = 0;
1003 return tgt;
1006 tgt_align = sizeof (void *);
1007 tgt_size = 0;
1008 cbuf.chunks = NULL;
1009 cbuf.chunk_cnt = -1;
1010 cbuf.use_cnt = 0;
1011 cbuf.buf = NULL;
1012 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1014 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1015 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1016 cbuf.chunk_cnt = 0;
1018 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1020 size_t align = 4 * sizeof (void *);
1021 tgt_align = align;
1022 tgt_size = mapnum * sizeof (void *);
1023 cbuf.chunk_cnt = 1;
1024 cbuf.use_cnt = 1 + (mapnum > 1);
1025 cbuf.chunks[0].start = 0;
1026 cbuf.chunks[0].end = tgt_size;
1029 gomp_mutex_lock (&devicep->lock);
1030 if (devicep->state == GOMP_DEVICE_FINALIZED)
1032 gomp_mutex_unlock (&devicep->lock);
1033 free (tgt);
1034 return NULL;
1037 for (i = 0; i < mapnum; i++)
1039 int kind = get_kind (short_mapkind, kinds, i);
1040 bool implicit = get_implicit (short_mapkind, kinds, i);
1041 if (hostaddrs[i] == NULL
1042 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1044 tgt->list[i].key = NULL;
1045 tgt->list[i].offset = OFFSET_INLINED;
1046 continue;
1048 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1049 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1051 tgt->list[i].key = NULL;
1052 if (!not_found_cnt)
1054 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1055 on a separate construct prior to using use_device_{addr,ptr}.
1056 In OpenMP 5.0, map directives need to be ordered by the
1057 middle-end before the use_device_* clauses. If
1058 !not_found_cnt, all mappings requested (if any) are already
1059 mapped, so use_device_{addr,ptr} can be resolved right away.
1060 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1061 now but would succeed after performing the mappings in the
1062 following loop. We can't defer this always to the second
1063 loop, because it is not even invoked when !not_found_cnt
1064 after the first loop. */
1065 cur_node.host_start = (uintptr_t) hostaddrs[i];
1066 cur_node.host_end = cur_node.host_start;
1067 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1068 if (n != NULL)
1070 cur_node.host_start -= n->host_start;
1071 hostaddrs[i]
1072 = (void *) (n->tgt->tgt_start + n->tgt_offset
1073 + cur_node.host_start);
1075 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1077 gomp_mutex_unlock (&devicep->lock);
1078 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1080 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1081 /* If not present, continue using the host address. */
1083 else
1084 __builtin_unreachable ();
1085 tgt->list[i].offset = OFFSET_INLINED;
1087 else
1088 tgt->list[i].offset = 0;
1089 continue;
1091 else if ((kind & typemask) == GOMP_MAP_STRUCT
1092 || (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
1094 size_t first = i + 1;
1095 size_t last = i + sizes[i];
1096 cur_node.host_start = (uintptr_t) hostaddrs[i];
1097 cur_node.host_end = (uintptr_t) hostaddrs[last]
1098 + sizes[last];
1099 tgt->list[i].key = NULL;
1100 tgt->list[i].offset = OFFSET_STRUCT;
1101 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1102 if (n == NULL)
1104 size_t align = (size_t) 1 << (kind >> rshift);
1105 if (tgt_align < align)
1106 tgt_align = align;
1107 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1108 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1109 tgt_size += cur_node.host_end - cur_node.host_start;
1110 not_found_cnt += last - i;
1111 for (i = first; i <= last; i++)
1113 tgt->list[i].key = NULL;
1114 if (!aq
1115 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1116 & typemask)
1117 && sizes[i] != 0)
1118 gomp_coalesce_buf_add (&cbuf,
1119 tgt_size - cur_node.host_end
1120 + (uintptr_t) hostaddrs[i],
1121 sizes[i]);
1123 i--;
1124 continue;
1126 for (i = first; i <= last; i++)
1127 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1128 sizes, kinds, NULL, refcount_set);
1129 i--;
1130 continue;
1132 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1134 tgt->list[i].key = NULL;
1135 tgt->list[i].offset = OFFSET_POINTER;
1136 has_firstprivate = true;
1137 continue;
1139 else if ((kind & typemask) == GOMP_MAP_ATTACH
1140 || ((kind & typemask)
1141 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1143 tgt->list[i].key = NULL;
1144 has_firstprivate = true;
1145 continue;
1147 cur_node.host_start = (uintptr_t) hostaddrs[i];
1148 if (!GOMP_MAP_POINTER_P (kind & typemask))
1149 cur_node.host_end = cur_node.host_start + sizes[i];
1150 else
1151 cur_node.host_end = cur_node.host_start + sizeof (void *);
1152 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1154 tgt->list[i].key = NULL;
1156 size_t align = (size_t) 1 << (kind >> rshift);
1157 if (tgt_align < align)
1158 tgt_align = align;
1159 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1160 if (!aq)
1161 gomp_coalesce_buf_add (&cbuf, tgt_size,
1162 cur_node.host_end - cur_node.host_start);
1163 tgt_size += cur_node.host_end - cur_node.host_start;
1164 has_firstprivate = true;
1165 continue;
1167 splay_tree_key n;
1168 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1170 n = gomp_map_0len_lookup (mem_map, &cur_node);
1171 if (!n)
1173 tgt->list[i].key = NULL;
1174 tgt->list[i].offset = OFFSET_INLINED;
1175 continue;
1178 else
1179 n = splay_tree_lookup (mem_map, &cur_node);
1180 if (n && n->refcount != REFCOUNT_LINK)
1182 int always_to_cnt = 0;
1183 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1185 bool has_nullptr = false;
1186 size_t j;
1187 for (j = 0; j < n->tgt->list_count; j++)
1188 if (n->tgt->list[j].key == n)
1190 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1191 break;
1193 if (n->tgt->list_count == 0)
1195 /* 'declare target'; assume has_nullptr; it could also be
1196 statically assigned pointer, but that it should be to
1197 the equivalent variable on the host. */
1198 assert (n->refcount == REFCOUNT_INFINITY);
1199 has_nullptr = true;
1201 else
1202 assert (j < n->tgt->list_count);
1203 /* Re-map the data if there is an 'always' modifier or if it a
1204 null pointer was there and non a nonnull has been found; that
1205 permits transparent re-mapping for Fortran array descriptors
1206 which were previously mapped unallocated. */
1207 for (j = i + 1; j < mapnum; j++)
1209 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1210 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1211 && (!has_nullptr
1212 || !GOMP_MAP_POINTER_P (ptr_kind)
1213 || *(void **) hostaddrs[j] == NULL))
1214 break;
1215 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1216 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1217 > cur_node.host_end))
1218 break;
1219 else
1221 has_always_ptrset = true;
1222 ++always_to_cnt;
1226 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1227 kind & typemask, always_to_cnt > 0, implicit,
1228 NULL, refcount_set);
1229 i += always_to_cnt;
1231 else
1233 tgt->list[i].key = NULL;
1235 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1237 /* Not present, hence, skip entry - including its MAP_POINTER,
1238 when existing. */
1239 tgt->list[i].offset = OFFSET_INLINED;
1240 if (i + 1 < mapnum
1241 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1242 == GOMP_MAP_POINTER))
1244 ++i;
1245 tgt->list[i].key = NULL;
1246 tgt->list[i].offset = 0;
1248 continue;
1250 size_t align = (size_t) 1 << (kind >> rshift);
1251 not_found_cnt++;
1252 if (tgt_align < align)
1253 tgt_align = align;
1254 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1255 if (!aq
1256 && gomp_to_device_kind_p (kind & typemask))
1257 gomp_coalesce_buf_add (&cbuf, tgt_size,
1258 cur_node.host_end - cur_node.host_start);
1259 tgt_size += cur_node.host_end - cur_node.host_start;
1260 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1262 size_t j;
1263 int kind;
1264 for (j = i + 1; j < mapnum; j++)
1265 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1266 kinds, j)) & typemask))
1267 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1268 break;
1269 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1270 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1271 > cur_node.host_end))
1272 break;
1273 else
1275 tgt->list[j].key = NULL;
1276 i++;
1282 if (devaddrs)
1284 if (mapnum != 1)
1286 gomp_mutex_unlock (&devicep->lock);
1287 gomp_fatal ("unexpected aggregation");
1289 tgt->to_free = devaddrs[0];
1290 tgt->tgt_start = (uintptr_t) tgt->to_free;
1291 tgt->tgt_end = tgt->tgt_start + sizes[0];
1293 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1295 /* Allocate tgt_align aligned tgt_size block of memory. */
1296 /* FIXME: Perhaps change interface to allocate properly aligned
1297 memory. */
1298 tgt->to_free = devicep->alloc_func (devicep->target_id,
1299 tgt_size + tgt_align - 1);
1300 if (!tgt->to_free)
1302 gomp_mutex_unlock (&devicep->lock);
1303 gomp_fatal ("device memory allocation fail");
1306 tgt->tgt_start = (uintptr_t) tgt->to_free;
1307 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1308 tgt->tgt_end = tgt->tgt_start + tgt_size;
1310 if (cbuf.use_cnt == 1)
1311 cbuf.chunk_cnt--;
1312 if (cbuf.chunk_cnt > 0)
1314 cbuf.buf
1315 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1316 if (cbuf.buf)
1318 cbuf.tgt = tgt;
1319 cbufp = &cbuf;
1323 else
1325 tgt->to_free = NULL;
1326 tgt->tgt_start = 0;
1327 tgt->tgt_end = 0;
1330 tgt_size = 0;
1331 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1332 tgt_size = mapnum * sizeof (void *);
1334 tgt->array = NULL;
1335 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1337 if (not_found_cnt)
1338 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1339 splay_tree_node array = tgt->array;
1340 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1341 uintptr_t field_tgt_base = 0;
1342 splay_tree_key field_tgt_structelem_first = NULL;
1344 for (i = 0; i < mapnum; i++)
1345 if (has_always_ptrset
1346 && tgt->list[i].key
1347 && (get_kind (short_mapkind, kinds, i) & typemask)
1348 == GOMP_MAP_TO_PSET)
1350 splay_tree_key k = tgt->list[i].key;
1351 bool has_nullptr = false;
1352 size_t j;
1353 for (j = 0; j < k->tgt->list_count; j++)
1354 if (k->tgt->list[j].key == k)
1356 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1357 break;
1359 if (k->tgt->list_count == 0)
1360 has_nullptr = true;
1361 else
1362 assert (j < k->tgt->list_count);
1364 tgt->list[i].has_null_ptr_assoc = false;
1365 for (j = i + 1; j < mapnum; j++)
1367 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1368 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1369 && (!has_nullptr
1370 || !GOMP_MAP_POINTER_P (ptr_kind)
1371 || *(void **) hostaddrs[j] == NULL))
1372 break;
1373 else if ((uintptr_t) hostaddrs[j] < k->host_start
1374 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1375 > k->host_end))
1376 break;
1377 else
1379 if (*(void **) hostaddrs[j] == NULL)
1380 tgt->list[i].has_null_ptr_assoc = true;
1381 tgt->list[j].key = k;
1382 tgt->list[j].copy_from = false;
1383 tgt->list[j].always_copy_from = false;
1384 tgt->list[j].is_attach = false;
1385 gomp_increment_refcount (k, refcount_set);
1386 gomp_map_pointer (k->tgt, aq,
1387 (uintptr_t) *(void **) hostaddrs[j],
1388 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1389 - k->host_start),
1390 sizes[j], cbufp, false);
1393 i = j - 1;
1395 else if (tgt->list[i].key == NULL)
1397 int kind = get_kind (short_mapkind, kinds, i);
1398 bool implicit = get_implicit (short_mapkind, kinds, i);
1399 if (hostaddrs[i] == NULL)
1400 continue;
1401 switch (kind & typemask)
1403 size_t align, len, first, last;
1404 splay_tree_key n;
1405 case GOMP_MAP_FIRSTPRIVATE:
1406 align = (size_t) 1 << (kind >> rshift);
1407 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1408 tgt->list[i].offset = tgt_size;
1409 len = sizes[i];
1410 gomp_copy_host2dev (devicep, aq,
1411 (void *) (tgt->tgt_start + tgt_size),
1412 (void *) hostaddrs[i], len, false, cbufp);
1413 /* Save device address in hostaddr to permit latter availablity
1414 when doing a deep-firstprivate with pointer attach. */
1415 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1416 tgt_size += len;
1418 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1419 firstprivate to hostaddrs[i+1], which is assumed to contain a
1420 device address. */
1421 if (i + 1 < mapnum
1422 && (GOMP_MAP_ATTACH
1423 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1425 uintptr_t target = (uintptr_t) hostaddrs[i];
1426 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1427 /* Per
1428 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1429 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1430 this probably needs revision for 'aq' usage. */
1431 assert (!aq);
1432 gomp_copy_host2dev (devicep, aq, devptr, &target,
1433 sizeof (void *), false, cbufp);
1434 ++i;
1436 continue;
1437 case GOMP_MAP_FIRSTPRIVATE_INT:
1438 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1439 continue;
1440 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1441 /* The OpenACC 'host_data' construct only allows 'use_device'
1442 "mapping" clauses, so in the first loop, 'not_found_cnt'
1443 must always have been zero, so all OpenACC 'use_device'
1444 clauses have already been handled. (We can only easily test
1445 'use_device' with 'if_present' clause here.) */
1446 assert (tgt->list[i].offset == OFFSET_INLINED);
1447 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1448 code conceptually simple, similar to the first loop. */
1449 case GOMP_MAP_USE_DEVICE_PTR:
1450 if (tgt->list[i].offset == 0)
1452 cur_node.host_start = (uintptr_t) hostaddrs[i];
1453 cur_node.host_end = cur_node.host_start;
1454 n = gomp_map_lookup (mem_map, &cur_node);
1455 if (n != NULL)
1457 cur_node.host_start -= n->host_start;
1458 hostaddrs[i]
1459 = (void *) (n->tgt->tgt_start + n->tgt_offset
1460 + cur_node.host_start);
1462 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1464 gomp_mutex_unlock (&devicep->lock);
1465 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1467 else if ((kind & typemask)
1468 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1469 /* If not present, continue using the host address. */
1471 else
1472 __builtin_unreachable ();
1473 tgt->list[i].offset = OFFSET_INLINED;
1475 continue;
1476 case GOMP_MAP_STRUCT_UNORD:
1477 if (sizes[i] > 1)
1479 void *first = hostaddrs[i + 1];
1480 for (size_t j = i + 1; j < i + sizes[i]; j++)
1481 if (hostaddrs[j + 1] != first)
1483 gomp_mutex_unlock (&devicep->lock);
1484 gomp_fatal ("Mapped array elements must be the "
1485 "same (%p vs %p)", first,
1486 hostaddrs[j + 1]);
1489 /* Fallthrough. */
1490 case GOMP_MAP_STRUCT:
1491 first = i + 1;
1492 last = i + sizes[i];
1493 cur_node.host_start = (uintptr_t) hostaddrs[i];
1494 cur_node.host_end = (uintptr_t) hostaddrs[last]
1495 + sizes[last];
1496 if (tgt->list[first].key != NULL)
1497 continue;
1498 if (sizes[last] == 0)
1499 cur_node.host_end++;
1500 n = splay_tree_lookup (mem_map, &cur_node);
1501 if (sizes[last] == 0)
1502 cur_node.host_end--;
1503 if (n == NULL && cur_node.host_start == cur_node.host_end)
1505 gomp_mutex_unlock (&devicep->lock);
1506 gomp_fatal ("Struct pointer member not mapped (%p)",
1507 (void*) hostaddrs[first]);
1509 if (n == NULL)
1511 size_t align = (size_t) 1 << (kind >> rshift);
1512 tgt_size -= (uintptr_t) hostaddrs[first]
1513 - (uintptr_t) hostaddrs[i];
1514 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1515 tgt_size += (uintptr_t) hostaddrs[first]
1516 - (uintptr_t) hostaddrs[i];
1517 field_tgt_base = (uintptr_t) hostaddrs[first];
1518 field_tgt_offset = tgt_size;
1519 field_tgt_clear = last;
1520 field_tgt_structelem_first = NULL;
1521 tgt_size += cur_node.host_end
1522 - (uintptr_t) hostaddrs[first];
1523 continue;
1525 for (i = first; i <= last; i++)
1526 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1527 sizes, kinds, cbufp, refcount_set);
1528 i--;
1529 continue;
1530 case GOMP_MAP_ALWAYS_POINTER:
1531 cur_node.host_start = (uintptr_t) hostaddrs[i];
1532 cur_node.host_end = cur_node.host_start + sizeof (void *);
1533 n = splay_tree_lookup (mem_map, &cur_node);
1534 if (n == NULL
1535 || n->host_start > cur_node.host_start
1536 || n->host_end < cur_node.host_end)
1538 gomp_mutex_unlock (&devicep->lock);
1539 gomp_fatal ("always pointer not mapped");
1541 if (i > 0
1542 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1543 != GOMP_MAP_ALWAYS_POINTER))
1544 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1545 if (cur_node.tgt_offset)
1546 cur_node.tgt_offset -= sizes[i];
1547 gomp_copy_host2dev (devicep, aq,
1548 (void *) (n->tgt->tgt_start
1549 + n->tgt_offset
1550 + cur_node.host_start
1551 - n->host_start),
1552 (void *) &cur_node.tgt_offset,
1553 sizeof (void *), true, cbufp);
1554 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1555 + cur_node.host_start - n->host_start;
1556 continue;
1557 case GOMP_MAP_IF_PRESENT:
1558 /* Not present - otherwise handled above. Skip over its
1559 MAP_POINTER as well. */
1560 if (i + 1 < mapnum
1561 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1562 == GOMP_MAP_POINTER))
1563 ++i;
1564 continue;
1565 case GOMP_MAP_ATTACH:
1566 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1568 cur_node.host_start = (uintptr_t) hostaddrs[i];
1569 cur_node.host_end = cur_node.host_start + sizeof (void *);
1570 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1571 if (n != NULL)
1573 tgt->list[i].key = n;
1574 tgt->list[i].offset = cur_node.host_start - n->host_start;
1575 tgt->list[i].length = n->host_end - n->host_start;
1576 tgt->list[i].copy_from = false;
1577 tgt->list[i].always_copy_from = false;
1578 tgt->list[i].is_attach = true;
1579 /* OpenACC 'attach'/'detach' doesn't affect
1580 structured/dynamic reference counts ('n->refcount',
1581 'n->dynamic_refcount'). */
1583 bool zlas
1584 = ((kind & typemask)
1585 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1586 gomp_attach_pointer (devicep, aq, mem_map, n,
1587 (uintptr_t) hostaddrs[i], sizes[i],
1588 cbufp, zlas);
1590 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1592 gomp_mutex_unlock (&devicep->lock);
1593 gomp_fatal ("outer struct not mapped for attach");
1595 continue;
1597 default:
1598 break;
1600 splay_tree_key k = &array->key;
1601 k->host_start = (uintptr_t) hostaddrs[i];
1602 if (!GOMP_MAP_POINTER_P (kind & typemask))
1603 k->host_end = k->host_start + sizes[i];
1604 else
1605 k->host_end = k->host_start + sizeof (void *);
1606 splay_tree_key n = splay_tree_lookup (mem_map, k);
1607 if (n && n->refcount != REFCOUNT_LINK)
1609 if (field_tgt_clear != FIELD_TGT_EMPTY)
1611 /* For this condition to be true, there must be a
1612 duplicate struct element mapping. This can happen with
1613 GOMP_MAP_STRUCT_UNORD mappings, for example. */
1614 tgt->list[i].key = n;
1615 if (openmp_p)
1617 assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
1618 assert (field_tgt_structelem_first != NULL);
1620 if (i == field_tgt_clear)
1622 n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1623 field_tgt_structelem_first = NULL;
1626 if (i == field_tgt_clear)
1627 field_tgt_clear = FIELD_TGT_EMPTY;
1628 gomp_increment_refcount (n, refcount_set);
1629 tgt->list[i].copy_from
1630 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1631 tgt->list[i].always_copy_from
1632 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1633 tgt->list[i].is_attach = false;
1634 tgt->list[i].offset = 0;
1635 tgt->list[i].length = k->host_end - k->host_start;
1637 else
1638 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1639 kind & typemask, false, implicit,
1640 cbufp, refcount_set);
1642 else
1644 k->aux = NULL;
1645 if (n && n->refcount == REFCOUNT_LINK)
1647 /* Replace target address of the pointer with target address
1648 of mapped object in the splay tree. */
1649 splay_tree_remove (mem_map, n);
1650 k->aux
1651 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1652 k->aux->link_key = n;
1654 size_t align = (size_t) 1 << (kind >> rshift);
1655 tgt->list[i].key = k;
1656 k->tgt = tgt;
1657 k->refcount = 0;
1658 k->dynamic_refcount = 0;
1659 if (field_tgt_clear != FIELD_TGT_EMPTY)
1661 k->tgt_offset = k->host_start - field_tgt_base
1662 + field_tgt_offset;
1663 if (openmp_p)
1665 k->refcount = REFCOUNT_STRUCTELEM;
1666 if (field_tgt_structelem_first == NULL)
1668 /* Set to first structure element of sequence. */
1669 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1670 field_tgt_structelem_first = k;
1672 else
1673 /* Point to refcount of leading element, but do not
1674 increment again. */
1675 k->structelem_refcount_ptr
1676 = &field_tgt_structelem_first->structelem_refcount;
1678 if (i == field_tgt_clear)
1680 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1681 field_tgt_structelem_first = NULL;
1684 if (i == field_tgt_clear)
1685 field_tgt_clear = FIELD_TGT_EMPTY;
1687 else
1689 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1690 k->tgt_offset = tgt_size;
1691 tgt_size += k->host_end - k->host_start;
1693 /* First increment, from 0 to 1. gomp_increment_refcount
1694 encapsulates the different increment cases, so use this
1695 instead of directly setting 1 during initialization. */
1696 gomp_increment_refcount (k, refcount_set);
1698 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1699 tgt->list[i].always_copy_from
1700 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1701 tgt->list[i].is_attach = false;
1702 tgt->list[i].offset = 0;
1703 tgt->list[i].length = k->host_end - k->host_start;
1704 tgt->refcount++;
1705 array->left = NULL;
1706 array->right = NULL;
1707 splay_tree_insert (mem_map, array);
1708 switch (kind & typemask)
1710 case GOMP_MAP_ALLOC:
1711 case GOMP_MAP_FROM:
1712 case GOMP_MAP_FORCE_ALLOC:
1713 case GOMP_MAP_FORCE_FROM:
1714 case GOMP_MAP_ALWAYS_FROM:
1715 break;
1716 case GOMP_MAP_TO:
1717 case GOMP_MAP_TOFROM:
1718 case GOMP_MAP_FORCE_TO:
1719 case GOMP_MAP_FORCE_TOFROM:
1720 case GOMP_MAP_ALWAYS_TO:
1721 case GOMP_MAP_ALWAYS_TOFROM:
1722 gomp_copy_host2dev (devicep, aq,
1723 (void *) (tgt->tgt_start
1724 + k->tgt_offset),
1725 (void *) k->host_start,
1726 k->host_end - k->host_start,
1727 false, cbufp);
1728 break;
1729 case GOMP_MAP_POINTER:
1730 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1731 gomp_map_pointer
1732 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1733 k->tgt_offset, sizes[i], cbufp,
1734 ((kind & typemask)
1735 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1736 break;
1737 case GOMP_MAP_TO_PSET:
1738 gomp_copy_host2dev (devicep, aq,
1739 (void *) (tgt->tgt_start
1740 + k->tgt_offset),
1741 (void *) k->host_start,
1742 k->host_end - k->host_start,
1743 false, cbufp);
1744 tgt->list[i].has_null_ptr_assoc = false;
1746 for (j = i + 1; j < mapnum; j++)
1748 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1749 & typemask);
1750 if (!GOMP_MAP_POINTER_P (ptr_kind)
1751 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1752 break;
1753 else if ((uintptr_t) hostaddrs[j] < k->host_start
1754 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1755 > k->host_end))
1756 break;
1757 else
1759 tgt->list[j].key = k;
1760 tgt->list[j].copy_from = false;
1761 tgt->list[j].always_copy_from = false;
1762 tgt->list[j].is_attach = false;
1763 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1764 /* For OpenMP, the use of refcount_sets causes
1765 errors if we set k->refcount = 1 above but also
1766 increment it again here, for decrementing will
1767 not properly match, since we decrement only once
1768 for each key's refcount. Therefore avoid this
1769 increment for OpenMP constructs. */
1770 if (!openmp_p)
1771 gomp_increment_refcount (k, refcount_set);
1772 gomp_map_pointer (tgt, aq,
1773 (uintptr_t) *(void **) hostaddrs[j],
1774 k->tgt_offset
1775 + ((uintptr_t) hostaddrs[j]
1776 - k->host_start),
1777 sizes[j], cbufp, false);
1780 i = j - 1;
1781 break;
1782 case GOMP_MAP_FORCE_PRESENT:
1783 case GOMP_MAP_ALWAYS_PRESENT_TO:
1784 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1785 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1787 /* We already looked up the memory region above and it
1788 was missing. */
1789 size_t size = k->host_end - k->host_start;
1790 gomp_mutex_unlock (&devicep->lock);
1791 #ifdef HAVE_INTTYPES_H
1792 gomp_fatal ("present clause: not present on the device "
1793 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1794 "dev: %d)", (void *) k->host_start,
1795 (uint64_t) size, (uint64_t) size,
1796 devicep->target_id);
1797 #else
1798 gomp_fatal ("present clause: not present on the device "
1799 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1800 (void *) k->host_start,
1801 (unsigned long) size, (unsigned long) size,
1802 devicep->target_id);
1803 #endif
1805 break;
1806 case GOMP_MAP_FORCE_DEVICEPTR:
1807 assert (k->host_end - k->host_start == sizeof (void *));
1808 gomp_copy_host2dev (devicep, aq,
1809 (void *) (tgt->tgt_start
1810 + k->tgt_offset),
1811 (void *) k->host_start,
1812 sizeof (void *), false, cbufp);
1813 break;
1814 default:
1815 gomp_mutex_unlock (&devicep->lock);
1816 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1817 kind);
1820 if (k->aux && k->aux->link_key)
1822 /* Set link pointer on target to the device address of the
1823 mapped object. */
1824 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1825 /* We intentionally do not use coalescing here, as it's not
1826 data allocated by the current call to this function. */
1827 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1828 &tgt_addr, sizeof (void *), true, NULL);
1830 array++;
1835 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1837 for (i = 0; i < mapnum; i++)
1839 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1840 gomp_copy_host2dev (devicep, aq,
1841 (void *) (tgt->tgt_start + i * sizeof (void *)),
1842 (void *) &cur_node.tgt_offset, sizeof (void *),
1843 true, cbufp);
1847 if (cbufp)
1849 long c = 0;
1850 for (c = 0; c < cbuf.chunk_cnt; ++c)
1851 gomp_copy_host2dev (devicep, aq,
1852 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1853 (char *) cbuf.buf + (cbuf.chunks[c].start
1854 - cbuf.chunks[0].start),
1855 cbuf.chunks[c].end - cbuf.chunks[c].start,
1856 false, NULL);
1857 if (aq)
1858 /* Free once the transfer has completed. */
1859 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1860 else
1861 free (cbuf.buf);
1862 cbuf.buf = NULL;
1863 cbufp = NULL;
1866 /* If the variable from "omp target enter data" map-list was already mapped,
1867 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1868 gomp_exit_data. */
1869 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1871 free (tgt);
1872 tgt = NULL;
1875 gomp_mutex_unlock (&devicep->lock);
1876 return tgt;
1879 static struct target_mem_desc *
1880 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1881 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1882 bool short_mapkind, htab_t *refcount_set,
1883 enum gomp_map_vars_kind pragma_kind)
1885 /* This management of a local refcount_set is for convenience of callers
1886 who do not share a refcount_set over multiple map/unmap uses. */
1887 htab_t local_refcount_set = NULL;
1888 if (refcount_set == NULL)
1890 local_refcount_set = htab_create (mapnum);
1891 refcount_set = &local_refcount_set;
1894 struct target_mem_desc *tgt;
1895 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1896 sizes, kinds, short_mapkind, refcount_set,
1897 pragma_kind);
1898 if (local_refcount_set)
1899 htab_free (local_refcount_set);
1901 return tgt;
1904 attribute_hidden struct target_mem_desc *
1905 goacc_map_vars (struct gomp_device_descr *devicep,
1906 struct goacc_asyncqueue *aq, size_t mapnum,
1907 void **hostaddrs, void **devaddrs, size_t *sizes,
1908 void *kinds, bool short_mapkind,
1909 enum gomp_map_vars_kind pragma_kind)
1911 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1912 sizes, kinds, short_mapkind, NULL,
1913 GOMP_MAP_VARS_OPENACC | pragma_kind);
1916 static void
1917 gomp_unmap_tgt (struct target_mem_desc *tgt)
1919 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1920 if (tgt->tgt_end)
1921 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1923 free (tgt->array);
1924 free (tgt);
1927 static bool
1928 gomp_unref_tgt (void *ptr)
1930 bool is_tgt_unmapped = false;
1932 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1934 if (tgt->refcount > 1)
1935 tgt->refcount--;
1936 else
1938 gomp_unmap_tgt (tgt);
1939 is_tgt_unmapped = true;
1942 return is_tgt_unmapped;
1945 static void
1946 gomp_unref_tgt_void (void *ptr)
1948 (void) gomp_unref_tgt (ptr);
1951 static void
1952 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1954 splay_tree_remove (sp, k);
1955 if (k->aux)
1957 if (k->aux->link_key)
1958 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1959 if (k->aux->attach_count)
1960 free (k->aux->attach_count);
1961 free (k->aux);
1962 k->aux = NULL;
1966 static inline __attribute__((always_inline)) bool
1967 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1968 struct goacc_asyncqueue *aq)
1970 bool is_tgt_unmapped = false;
1972 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1974 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1975 /* Infer the splay_tree_key of the first structelem key using the
1976 pointer to the first structleme_refcount. */
1977 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1978 - offsetof (struct splay_tree_key_s,
1979 structelem_refcount));
1980 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1982 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1983 with the splay_tree_keys embedded inside. */
1984 splay_tree_node node =
1985 (splay_tree_node) ((char *) k
1986 - offsetof (struct splay_tree_node_s, key));
1987 while (true)
1989 /* Starting from the _FIRST key, and continue for all following
1990 sibling keys. */
1991 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1992 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1993 break;
1994 else
1995 k = &(++node)->key;
1998 else
1999 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2001 if (aq)
2002 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2003 (void *) k->tgt);
2004 else
2005 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2006 return is_tgt_unmapped;
2009 attribute_hidden bool
2010 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2012 return gomp_remove_var_internal (devicep, k, NULL);
2015 /* Remove a variable asynchronously. This actually removes the variable
2016 mapping immediately, but retains the linked target_mem_desc until the
2017 asynchronous operation has completed (as it may still refer to target
2018 memory). The device lock must be held before entry, and remains locked on
2019 exit. */
2021 attribute_hidden void
2022 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2023 struct goacc_asyncqueue *aq)
2025 (void) gomp_remove_var_internal (devicep, k, aq);
2028 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2029 variables back from device to host: if it is false, it is assumed that this
2030 has been done already. */
2032 static inline __attribute__((always_inline)) void
2033 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2034 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2036 struct gomp_device_descr *devicep = tgt->device_descr;
2038 if (tgt->list_count == 0)
2040 free (tgt);
2041 return;
2044 gomp_mutex_lock (&devicep->lock);
2045 if (devicep->state == GOMP_DEVICE_FINALIZED)
2047 gomp_mutex_unlock (&devicep->lock);
2048 free (tgt->array);
2049 free (tgt);
2050 return;
2053 size_t i;
2055 /* We must perform detachments before any copies back to the host. */
2056 for (i = 0; i < tgt->list_count; i++)
2058 splay_tree_key k = tgt->list[i].key;
2060 if (k != NULL && tgt->list[i].is_attach)
2061 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2062 + tgt->list[i].offset,
2063 false, NULL);
2066 for (i = 0; i < tgt->list_count; i++)
2068 splay_tree_key k = tgt->list[i].key;
2069 if (k == NULL)
2070 continue;
2072 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2073 counts ('n->refcount', 'n->dynamic_refcount'). */
2074 if (tgt->list[i].is_attach)
2075 continue;
2077 bool do_copy, do_remove;
2078 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2080 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2081 || tgt->list[i].always_copy_from)
2082 gomp_copy_dev2host (devicep, aq,
2083 (void *) (k->host_start + tgt->list[i].offset),
2084 (void *) (k->tgt->tgt_start + k->tgt_offset
2085 + tgt->list[i].offset),
2086 tgt->list[i].length);
2087 if (do_remove)
2089 struct target_mem_desc *k_tgt = k->tgt;
2090 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2091 /* It would be bad if TGT got unmapped while we're still iterating
2092 over its LIST_COUNT, and also expect to use it in the following
2093 code. */
2094 assert (!is_tgt_unmapped
2095 || k_tgt != tgt);
2099 if (aq)
2100 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2101 (void *) tgt);
2102 else
2103 gomp_unref_tgt ((void *) tgt);
2105 gomp_mutex_unlock (&devicep->lock);
2108 static void
2109 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2110 htab_t *refcount_set)
2112 /* This management of a local refcount_set is for convenience of callers
2113 who do not share a refcount_set over multiple map/unmap uses. */
2114 htab_t local_refcount_set = NULL;
2115 if (refcount_set == NULL)
2117 local_refcount_set = htab_create (tgt->list_count);
2118 refcount_set = &local_refcount_set;
2121 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2123 if (local_refcount_set)
2124 htab_free (local_refcount_set);
2127 attribute_hidden void
2128 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2129 struct goacc_asyncqueue *aq)
2131 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2134 static void
2135 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2136 size_t *sizes, void *kinds, bool short_mapkind)
2138 size_t i;
2139 struct splay_tree_key_s cur_node;
2140 const int typemask = short_mapkind ? 0xff : 0x7;
2142 if (!devicep)
2143 return;
2145 if (mapnum == 0)
2146 return;
2148 gomp_mutex_lock (&devicep->lock);
2149 if (devicep->state == GOMP_DEVICE_FINALIZED)
2151 gomp_mutex_unlock (&devicep->lock);
2152 return;
2155 for (i = 0; i < mapnum; i++)
2156 if (sizes[i])
2158 cur_node.host_start = (uintptr_t) hostaddrs[i];
2159 cur_node.host_end = cur_node.host_start + sizes[i];
2160 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2161 if (n)
2163 int kind = get_kind (short_mapkind, kinds, i);
2164 if (n->host_start > cur_node.host_start
2165 || n->host_end < cur_node.host_end)
2167 gomp_mutex_unlock (&devicep->lock);
2168 gomp_fatal ("Trying to update [%p..%p) object when "
2169 "only [%p..%p) is mapped",
2170 (void *) cur_node.host_start,
2171 (void *) cur_node.host_end,
2172 (void *) n->host_start,
2173 (void *) n->host_end);
2176 if (n->aux && n->aux->attach_count)
2178 uintptr_t addr = cur_node.host_start;
2179 while (addr < cur_node.host_end)
2181 /* We have to be careful not to overwrite still attached
2182 pointers during host<->device updates. */
2183 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2184 if (n->aux->attach_count[i] == 0)
2186 void *devaddr = (void *) (n->tgt->tgt_start
2187 + n->tgt_offset
2188 + addr - n->host_start);
2189 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2190 gomp_copy_host2dev (devicep, NULL,
2191 devaddr, (void *) addr,
2192 sizeof (void *), false, NULL);
2193 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2194 gomp_copy_dev2host (devicep, NULL,
2195 (void *) addr, devaddr,
2196 sizeof (void *));
2198 addr += sizeof (void *);
2201 else
2203 void *hostaddr = (void *) cur_node.host_start;
2204 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2205 + cur_node.host_start
2206 - n->host_start);
2207 size_t size = cur_node.host_end - cur_node.host_start;
2209 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2210 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2211 false, NULL);
2212 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2213 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2216 else
2218 int kind = get_kind (short_mapkind, kinds, i);
2220 if (GOMP_MAP_PRESENT_P (kind))
2222 /* We already looked up the memory region above and it
2223 was missing. */
2224 gomp_mutex_unlock (&devicep->lock);
2225 #ifdef HAVE_INTTYPES_H
2226 gomp_fatal ("present clause: not present on the device "
2227 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2228 "dev: %d)", (void *) hostaddrs[i],
2229 (uint64_t) sizes[i], (uint64_t) sizes[i],
2230 devicep->target_id);
2231 #else
2232 gomp_fatal ("present clause: not present on the device "
2233 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2234 (void *) hostaddrs[i], (unsigned long) sizes[i],
2235 (unsigned long) sizes[i], devicep->target_id);
2236 #endif
2240 gomp_mutex_unlock (&devicep->lock);
2243 static struct gomp_offload_icv_list *
2244 gomp_get_offload_icv_item (int dev_num)
2246 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2247 while (l != NULL && l->device_num != dev_num)
2248 l = l->next;
2250 return l;
2253 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2254 depending on the device num and the variable hierarchy
2255 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2256 device and thus no item with that device number is contained in
2257 gomp_offload_icv_list, then a new item is created and added to the list. */
2259 static struct gomp_offload_icvs *
2260 get_gomp_offload_icvs (int dev_num)
2262 struct gomp_icv_list *dev
2263 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2264 struct gomp_icv_list *all
2265 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2266 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2267 struct gomp_offload_icv_list *offload_icvs
2268 = gomp_get_offload_icv_item (dev_num);
2270 if (offload_icvs != NULL)
2271 return &offload_icvs->icvs;
2273 struct gomp_offload_icv_list *new
2274 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2276 new->device_num = dev_num;
2277 new->icvs.device_num = dev_num;
2278 new->next = gomp_offload_icv_list;
2280 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2281 new->icvs.nteams = dev_x->icvs.nteams_var;
2282 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2283 new->icvs.nteams = dev->icvs.nteams_var;
2284 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2285 new->icvs.nteams = all->icvs.nteams_var;
2286 else
2287 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2289 if (dev_x != NULL
2290 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2291 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2292 else if (dev != NULL
2293 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2294 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2295 else if (all != NULL
2296 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2297 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2298 else
2299 new->icvs.teams_thread_limit
2300 = gomp_default_icv_values.teams_thread_limit_var;
2302 if (dev_x != NULL
2303 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2304 new->icvs.default_device = dev_x->icvs.default_device_var;
2305 else if (dev != NULL
2306 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2307 new->icvs.default_device = dev->icvs.default_device_var;
2308 else if (all != NULL
2309 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2310 new->icvs.default_device = all->icvs.default_device_var;
2311 else
2312 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2314 gomp_offload_icv_list = new;
2315 return &new->icvs;
2318 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2319 And insert to splay tree the mapping between addresses from HOST_TABLE and
2320 from loaded target image. We rely in the host and device compiler
2321 emitting variable and functions in the same order. */
2323 static void
2324 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2325 const void *host_table, const void *target_data,
2326 bool is_register_lock)
2328 void **host_func_table = ((void ***) host_table)[0];
2329 void **host_funcs_end = ((void ***) host_table)[1];
2330 void **host_var_table = ((void ***) host_table)[2];
2331 void **host_vars_end = ((void ***) host_table)[3];
2332 void **host_ind_func_table = NULL;
2333 void **host_ind_funcs_end = NULL;
2335 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2337 host_ind_func_table = ((void ***) host_table)[4];
2338 host_ind_funcs_end = ((void ***) host_table)[5];
2341 /* The func and ind_func tables contain only addresses, the var table
2342 contains addresses and corresponding sizes. */
2343 int num_funcs = host_funcs_end - host_func_table;
2344 int num_vars = (host_vars_end - host_var_table) / 2;
2345 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2347 /* Load image to device and get target addresses for the image. */
2348 struct addr_pair *target_table = NULL;
2349 uint64_t *rev_target_fn_table = NULL;
2350 int i, num_target_entries;
2352 /* With reverse offload, insert also target-host addresses. */
2353 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2355 num_target_entries
2356 = devicep->load_image_func (devicep->target_id, version,
2357 target_data, &target_table,
2358 rev_lookup ? &rev_target_fn_table : NULL,
2359 num_ind_funcs
2360 ? (uint64_t *) host_ind_func_table : NULL);
2362 if (num_target_entries != num_funcs + num_vars
2363 /* "+1" due to the additional ICV struct. */
2364 && num_target_entries != num_funcs + num_vars + 1)
2366 gomp_mutex_unlock (&devicep->lock);
2367 if (is_register_lock)
2368 gomp_mutex_unlock (&register_lock);
2369 gomp_fatal ("Cannot map target functions or variables"
2370 " (expected %u, have %u)", num_funcs + num_vars,
2371 num_target_entries);
2374 /* Insert host-target address mapping into splay tree. */
2375 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2376 /* "+1" due to the additional ICV struct. */
2377 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2378 * sizeof (*tgt->array));
2379 if (rev_target_fn_table)
2380 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2381 else
2382 tgt->rev_array = NULL;
2383 tgt->refcount = REFCOUNT_INFINITY;
2384 tgt->tgt_start = 0;
2385 tgt->tgt_end = 0;
2386 tgt->to_free = NULL;
2387 tgt->prev = NULL;
2388 tgt->list_count = 0;
2389 tgt->device_descr = devicep;
2390 splay_tree_node array = tgt->array;
2391 reverse_splay_tree_node rev_array = tgt->rev_array;
2393 for (i = 0; i < num_funcs; i++)
2395 splay_tree_key k = &array->key;
2396 k->host_start = (uintptr_t) host_func_table[i];
2397 k->host_end = k->host_start + 1;
2398 k->tgt = tgt;
2399 k->tgt_offset = target_table[i].start;
2400 k->refcount = REFCOUNT_INFINITY;
2401 k->dynamic_refcount = 0;
2402 k->aux = NULL;
2403 array->left = NULL;
2404 array->right = NULL;
2405 splay_tree_insert (&devicep->mem_map, array);
2406 if (rev_target_fn_table)
2408 reverse_splay_tree_key k2 = &rev_array->key;
2409 k2->dev = rev_target_fn_table[i];
2410 k2->k = k;
2411 rev_array->left = NULL;
2412 rev_array->right = NULL;
2413 if (k2->dev != 0)
2414 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2415 rev_array++;
2417 array++;
2420 /* Most significant bit of the size in host and target tables marks
2421 "omp declare target link" variables. */
2422 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2423 const uintptr_t size_mask = ~link_bit;
2425 for (i = 0; i < num_vars; i++)
2427 struct addr_pair *target_var = &target_table[num_funcs + i];
2428 uintptr_t target_size = target_var->end - target_var->start;
2429 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2431 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2433 gomp_mutex_unlock (&devicep->lock);
2434 if (is_register_lock)
2435 gomp_mutex_unlock (&register_lock);
2436 gomp_fatal ("Cannot map target variables (size mismatch)");
2439 splay_tree_key k = &array->key;
2440 k->host_start = (uintptr_t) host_var_table[i * 2];
2441 k->host_end
2442 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2443 k->tgt = tgt;
2444 k->tgt_offset = target_var->start;
2445 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2446 k->dynamic_refcount = 0;
2447 k->aux = NULL;
2448 array->left = NULL;
2449 array->right = NULL;
2450 splay_tree_insert (&devicep->mem_map, array);
2451 array++;
2454 /* Last entry is for a ICVs variable.
2455 Tolerate case where plugin does not return those entries. */
2456 if (num_funcs + num_vars < num_target_entries)
2458 struct addr_pair *var = &target_table[num_funcs + num_vars];
2460 /* Start address will be non-zero for the ICVs variable if
2461 the variable was found in this image. */
2462 if (var->start != 0)
2464 /* The index of the devicep within devices[] is regarded as its
2465 'device number', which is different from the per-device type
2466 devicep->target_id. */
2467 int dev_num = (int) (devicep - &devices[0]);
2468 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2469 size_t var_size = var->end - var->start;
2470 if (var_size != sizeof (struct gomp_offload_icvs))
2472 gomp_mutex_unlock (&devicep->lock);
2473 if (is_register_lock)
2474 gomp_mutex_unlock (&register_lock);
2475 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2476 "format");
2478 /* Copy the ICVs variable to place on device memory, hereby
2479 actually designating its device number into effect. */
2480 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2481 var_size, false, NULL);
2482 splay_tree_key k = &array->key;
2483 k->host_start = (uintptr_t) icvs;
2484 k->host_end =
2485 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2486 k->tgt = tgt;
2487 k->tgt_offset = var->start;
2488 k->refcount = REFCOUNT_INFINITY;
2489 k->dynamic_refcount = 0;
2490 k->aux = NULL;
2491 array->left = NULL;
2492 array->right = NULL;
2493 splay_tree_insert (&devicep->mem_map, array);
2494 array++;
2498 free (target_table);
2501 /* Unload the mappings described by target_data from device DEVICE_P.
2502 The device must be locked. */
2504 static void
2505 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2506 unsigned version,
2507 const void *host_table, const void *target_data)
2509 void **host_func_table = ((void ***) host_table)[0];
2510 void **host_funcs_end = ((void ***) host_table)[1];
2511 void **host_var_table = ((void ***) host_table)[2];
2512 void **host_vars_end = ((void ***) host_table)[3];
2514 /* The func table contains only addresses, the var table contains addresses
2515 and corresponding sizes. */
2516 int num_funcs = host_funcs_end - host_func_table;
2517 int num_vars = (host_vars_end - host_var_table) / 2;
2519 struct splay_tree_key_s k;
2520 splay_tree_key node = NULL;
2522 /* Find mapping at start of node array */
2523 if (num_funcs || num_vars)
2525 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2526 : (uintptr_t) host_var_table[0]);
2527 k.host_end = k.host_start + 1;
2528 node = splay_tree_lookup (&devicep->mem_map, &k);
2531 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2533 gomp_mutex_unlock (&devicep->lock);
2534 gomp_fatal ("image unload fail");
2536 if (devicep->mem_map_rev.root)
2538 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2539 real allocation. */
2540 assert (node && node->tgt && node->tgt->rev_array);
2541 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2542 free (node->tgt->rev_array);
2543 devicep->mem_map_rev.root = NULL;
2546 /* Remove mappings from splay tree. */
2547 int i;
2548 for (i = 0; i < num_funcs; i++)
2550 k.host_start = (uintptr_t) host_func_table[i];
2551 k.host_end = k.host_start + 1;
2552 splay_tree_remove (&devicep->mem_map, &k);
2555 /* Most significant bit of the size in host and target tables marks
2556 "omp declare target link" variables. */
2557 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2558 const uintptr_t size_mask = ~link_bit;
2559 bool is_tgt_unmapped = false;
2561 for (i = 0; i < num_vars; i++)
2563 k.host_start = (uintptr_t) host_var_table[i * 2];
2564 k.host_end
2565 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2567 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2568 splay_tree_remove (&devicep->mem_map, &k);
2569 else
2571 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2572 is_tgt_unmapped = gomp_remove_var (devicep, n);
2576 if (node && !is_tgt_unmapped)
2578 free (node->tgt);
2579 free (node);
2583 static void
2584 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2586 char *end = buf + size, *p = buf;
2587 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2588 p += snprintf (p, end - p, "unified_address");
2589 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2590 p += snprintf (p, end - p, "%sunified_shared_memory",
2591 (p == buf ? "" : ", "));
2592 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2593 p += snprintf (p, end - p, "%sreverse_offload",
2594 (p == buf ? "" : ", "));
2597 /* This function should be called from every offload image while loading.
2598 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2599 the target, and DATA. */
2601 void
2602 GOMP_offload_register_ver (unsigned version, const void *host_table,
2603 int target_type, const void *data)
2605 int i;
2607 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2608 gomp_fatal ("Library too old for offload (version %u < %u)",
2609 GOMP_VERSION, GOMP_VERSION_LIB (version));
2611 int omp_req;
2612 const void *target_data;
2613 if (GOMP_VERSION_LIB (version) > 1)
2615 omp_req = (int) (size_t) ((void **) data)[0];
2616 target_data = &((void **) data)[1];
2618 else
2620 omp_req = 0;
2621 target_data = data;
2624 gomp_mutex_lock (&register_lock);
2626 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2628 char buf1[sizeof ("unified_address, unified_shared_memory, "
2629 "reverse_offload")];
2630 char buf2[sizeof ("unified_address, unified_shared_memory, "
2631 "reverse_offload")];
2632 gomp_requires_to_name (buf2, sizeof (buf2),
2633 omp_req != GOMP_REQUIRES_TARGET_USED
2634 ? omp_req : omp_requires_mask);
2635 if (omp_req != GOMP_REQUIRES_TARGET_USED
2636 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2638 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2639 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2640 "in multiple compilation units: '%s' vs. '%s'",
2641 buf1, buf2);
2643 else
2644 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2645 "some compilation units", buf2);
2647 omp_requires_mask = omp_req;
2649 /* Load image to all initialized devices. */
2650 for (i = 0; i < num_devices; i++)
2652 struct gomp_device_descr *devicep = &devices[i];
2653 gomp_mutex_lock (&devicep->lock);
2654 if (devicep->type == target_type
2655 && devicep->state == GOMP_DEVICE_INITIALIZED)
2656 gomp_load_image_to_device (devicep, version,
2657 host_table, target_data, true);
2658 gomp_mutex_unlock (&devicep->lock);
2661 /* Insert image to array of pending images. */
2662 offload_images
2663 = gomp_realloc_unlock (offload_images,
2664 (num_offload_images + 1)
2665 * sizeof (struct offload_image_descr));
2666 offload_images[num_offload_images].version = version;
2667 offload_images[num_offload_images].type = target_type;
2668 offload_images[num_offload_images].host_table = host_table;
2669 offload_images[num_offload_images].target_data = target_data;
2671 num_offload_images++;
2672 gomp_mutex_unlock (&register_lock);
2675 /* Legacy entry point. */
2677 void
2678 GOMP_offload_register (const void *host_table, int target_type,
2679 const void *target_data)
2681 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2684 /* This function should be called from every offload image while unloading.
2685 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2686 the target, and DATA. */
2688 void
2689 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2690 int target_type, const void *data)
2692 int i;
2694 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2695 gomp_fatal ("Library too old for offload (version %u < %u)",
2696 GOMP_VERSION, GOMP_VERSION_LIB (version));
2698 const void *target_data;
2699 if (GOMP_VERSION_LIB (version) > 1)
2700 target_data = &((void **) data)[1];
2701 else
2702 target_data = data;
2704 gomp_mutex_lock (&register_lock);
2706 /* Unload image from all initialized devices. */
2707 for (i = 0; i < num_devices; i++)
2709 struct gomp_device_descr *devicep = &devices[i];
2710 gomp_mutex_lock (&devicep->lock);
2711 if (devicep->type == target_type
2712 && devicep->state == GOMP_DEVICE_INITIALIZED)
2713 gomp_unload_image_from_device (devicep, version,
2714 host_table, target_data);
2715 gomp_mutex_unlock (&devicep->lock);
2718 /* Remove image from array of pending images. */
2719 for (i = 0; i < num_offload_images; i++)
2720 if (offload_images[i].target_data == target_data)
2722 offload_images[i] = offload_images[--num_offload_images];
2723 break;
2726 gomp_mutex_unlock (&register_lock);
2729 /* Legacy entry point. */
2731 void
2732 GOMP_offload_unregister (const void *host_table, int target_type,
2733 const void *target_data)
2735 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2738 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2739 must be locked on entry, and remains locked on return. */
2741 attribute_hidden void
2742 gomp_init_device (struct gomp_device_descr *devicep)
2744 int i;
2745 if (!devicep->init_device_func (devicep->target_id))
2747 gomp_mutex_unlock (&devicep->lock);
2748 gomp_fatal ("device initialization failed");
2751 /* Load to device all images registered by the moment. */
2752 for (i = 0; i < num_offload_images; i++)
2754 struct offload_image_descr *image = &offload_images[i];
2755 if (image->type == devicep->type)
2756 gomp_load_image_to_device (devicep, image->version,
2757 image->host_table, image->target_data,
2758 false);
2761 /* Initialize OpenACC asynchronous queues. */
2762 goacc_init_asyncqueues (devicep);
2764 devicep->state = GOMP_DEVICE_INITIALIZED;
2767 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2768 must be locked on entry, and remains locked on return. */
2770 attribute_hidden bool
2771 gomp_fini_device (struct gomp_device_descr *devicep)
2773 bool ret = goacc_fini_asyncqueues (devicep);
2774 ret &= devicep->fini_device_func (devicep->target_id);
2775 devicep->state = GOMP_DEVICE_FINALIZED;
2776 return ret;
2779 attribute_hidden void
2780 gomp_unload_device (struct gomp_device_descr *devicep)
2782 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2784 unsigned i;
2786 /* Unload from device all images registered at the moment. */
2787 for (i = 0; i < num_offload_images; i++)
2789 struct offload_image_descr *image = &offload_images[i];
2790 if (image->type == devicep->type)
2791 gomp_unload_image_from_device (devicep, image->version,
2792 image->host_table,
2793 image->target_data);
2798 /* Host fallback for GOMP_target{,_ext} routines. */
2800 static void
2801 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2802 struct gomp_device_descr *devicep, void **args)
2804 struct gomp_thread old_thr, *thr = gomp_thread ();
2806 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2807 && devicep != NULL)
2808 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2809 "be used for offloading");
2811 old_thr = *thr;
2812 memset (thr, '\0', sizeof (*thr));
2813 if (gomp_places_list)
2815 thr->place = old_thr.place;
2816 thr->ts.place_partition_len = gomp_places_list_len;
2818 if (args)
2819 while (*args)
2821 intptr_t id = (intptr_t) *args++, val;
2822 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2823 val = (intptr_t) *args++;
2824 else
2825 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2826 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2827 continue;
2828 id &= GOMP_TARGET_ARG_ID_MASK;
2829 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2830 continue;
2831 val = val > INT_MAX ? INT_MAX : val;
2832 if (val)
2833 gomp_icv (true)->thread_limit_var = val;
2834 break;
2837 fn (hostaddrs);
2838 gomp_free_thread (thr);
2839 *thr = old_thr;
2842 /* Calculate alignment and size requirements of a private copy of data shared
2843 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2845 static inline void
2846 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2847 unsigned short *kinds, size_t *tgt_align,
2848 size_t *tgt_size)
2850 size_t i;
2851 for (i = 0; i < mapnum; i++)
2852 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2854 size_t align = (size_t) 1 << (kinds[i] >> 8);
2855 if (*tgt_align < align)
2856 *tgt_align = align;
2857 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2858 *tgt_size += sizes[i];
2862 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2864 static inline void
2865 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2866 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2867 size_t tgt_size)
2869 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2870 if (al)
2871 tgt += tgt_align - al;
2872 tgt_size = 0;
2873 size_t i;
2874 for (i = 0; i < mapnum; i++)
2875 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2877 size_t align = (size_t) 1 << (kinds[i] >> 8);
2878 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2879 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2880 hostaddrs[i] = tgt + tgt_size;
2881 tgt_size = tgt_size + sizes[i];
2882 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2884 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2885 ++i;
2890 /* Helper function of GOMP_target{,_ext} routines. */
2892 static void *
2893 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2894 void (*host_fn) (void *))
2896 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2897 return (void *) host_fn;
2898 else
2900 gomp_mutex_lock (&devicep->lock);
2901 if (devicep->state == GOMP_DEVICE_FINALIZED)
2903 gomp_mutex_unlock (&devicep->lock);
2904 return NULL;
2907 struct splay_tree_key_s k;
2908 k.host_start = (uintptr_t) host_fn;
2909 k.host_end = k.host_start + 1;
2910 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2911 gomp_mutex_unlock (&devicep->lock);
2912 if (tgt_fn == NULL)
2913 return NULL;
2915 return (void *) tgt_fn->tgt_offset;
2919 /* Called when encountering a target directive. If DEVICE
2920 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2921 GOMP_DEVICE_HOST_FALLBACK (or any value
2922 larger than last available hw device), use host fallback.
2923 FN is address of host code, UNUSED is part of the current ABI, but
2924 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2925 with MAPNUM entries, with addresses of the host objects,
2926 sizes of the host objects (resp. for pointer kind pointer bias
2927 and assumed sizeof (void *) size) and kinds. */
2929 void
2930 GOMP_target (int device, void (*fn) (void *), const void *unused,
2931 size_t mapnum, void **hostaddrs, size_t *sizes,
2932 unsigned char *kinds)
2934 struct gomp_device_descr *devicep = resolve_device (device, true);
2936 void *fn_addr;
2937 if (devicep == NULL
2938 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2939 /* All shared memory devices should use the GOMP_target_ext function. */
2940 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2941 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2942 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2944 htab_t refcount_set = htab_create (mapnum);
2945 struct target_mem_desc *tgt_vars
2946 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2947 &refcount_set, GOMP_MAP_VARS_TARGET);
2948 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2949 NULL);
2950 htab_clear (refcount_set);
2951 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2952 htab_free (refcount_set);
2955 static inline unsigned int
2956 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2958 /* If we cannot run asynchronously, simply ignore nowait. */
2959 if (devicep != NULL && devicep->async_run_func == NULL)
2960 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2962 return flags;
2965 static void
2966 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2968 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2969 if (item == NULL)
2970 return;
2972 void *host_ptr = &item->icvs;
2973 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2974 if (dev_ptr != NULL)
2975 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2976 sizeof (struct gomp_offload_icvs));
2979 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2980 and several arguments have been added:
2981 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2982 DEPEND is array of dependencies, see GOMP_task for details.
2984 ARGS is a pointer to an array consisting of a variable number of both
2985 device-independent and device-specific arguments, which can take one two
2986 elements where the first specifies for which device it is intended, the type
2987 and optionally also the value. If the value is not present in the first
2988 one, the whole second element the actual value. The last element of the
2989 array is a single NULL. Among the device independent can be for example
2990 NUM_TEAMS and THREAD_LIMIT.
2992 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2993 that value, or 1 if teams construct is not present, or 0, if
2994 teams construct does not have num_teams clause and so the choice is
2995 implementation defined, and -1 if it can't be determined on the host
2996 what value will GOMP_teams have on the device.
2997 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2998 body with that value, or 0, if teams construct does not have thread_limit
2999 clause or the teams construct is not present, or -1 if it can't be
3000 determined on the host what value will GOMP_teams have on the device. */
3002 void
3003 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3004 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3005 unsigned int flags, void **depend, void **args)
3007 struct gomp_device_descr *devicep = resolve_device (device, true);
3008 size_t tgt_align = 0, tgt_size = 0;
3009 bool fpc_done = false;
3011 /* Obtain the original TEAMS and THREADS values from ARGS. */
3012 intptr_t orig_teams = 1, orig_threads = 0;
3013 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3014 void **tmpargs = args;
3015 while (*tmpargs)
3017 intptr_t id = (intptr_t) *tmpargs++, val;
3018 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3020 val = (intptr_t) *tmpargs++;
3021 len = 2;
3023 else
3025 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3026 len = 1;
3028 num_args += len;
3029 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3030 continue;
3031 val = val > INT_MAX ? INT_MAX : val;
3032 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3034 orig_teams = val;
3035 teams_len = len;
3037 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3039 orig_threads = val;
3040 threads_len = len;
3044 intptr_t new_teams = orig_teams, new_threads = orig_threads;
3045 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3046 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3047 value could not be determined. No change.
3048 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3049 Set device-specific value.
3050 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3051 No change. */
3052 if (orig_teams == -2)
3053 new_teams = 1;
3054 else if (orig_teams == 0)
3056 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3057 if (item != NULL)
3058 new_teams = item->icvs.nteams;
3060 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3061 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3062 e.g. a THREAD_LIMIT clause. */
3063 if (orig_teams > -2 && orig_threads == 0)
3065 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3066 if (item != NULL)
3067 new_threads = item->icvs.teams_thread_limit;
3070 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3071 updated. */
3072 void **new_args = args;
3073 if (orig_teams != new_teams || orig_threads != new_threads)
3075 size_t tms_len = (orig_teams == new_teams
3076 ? teams_len
3077 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3078 ? 1 : 2));
3079 size_t ths_len = (orig_threads == new_threads
3080 ? threads_len
3081 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3082 ? 1 : 2));
3083 /* One additional item after the last arg must be NULL. */
3084 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3085 + ths_len + 1;
3086 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3088 tmpargs = args;
3089 void **tmp_new_args = new_args;
3090 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3091 too if they have not been changed and skipped otherwise. */
3092 while (*tmpargs)
3094 intptr_t id = (intptr_t) *tmpargs;
3095 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3096 && orig_teams != new_teams)
3097 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3098 && orig_threads != new_threads))
3100 tmpargs++;
3101 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3102 tmpargs++;
3104 else
3106 *tmp_new_args++ = *tmpargs++;
3107 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3108 *tmp_new_args++ = *tmpargs++;
3112 /* Add the new TEAMS arg to the new args list if it has been changed. */
3113 if (orig_teams != new_teams)
3115 intptr_t new_val = new_teams;
3116 if (tms_len == 1)
3118 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3119 | GOMP_TARGET_ARG_NUM_TEAMS;
3120 *tmp_new_args++ = (void *) new_val;
3122 else
3124 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3125 | GOMP_TARGET_ARG_NUM_TEAMS);
3126 *tmp_new_args++ = (void *) new_val;
3130 /* Add the new THREADS arg to the new args list if it has been changed. */
3131 if (orig_threads != new_threads)
3133 intptr_t new_val = new_threads;
3134 if (ths_len == 1)
3136 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3137 | GOMP_TARGET_ARG_THREAD_LIMIT;
3138 *tmp_new_args++ = (void *) new_val;
3140 else
3142 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3143 | GOMP_TARGET_ARG_THREAD_LIMIT);
3144 *tmp_new_args++ = (void *) new_val;
3148 *tmp_new_args = NULL;
3151 flags = clear_unsupported_flags (devicep, flags);
3153 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3155 struct gomp_thread *thr = gomp_thread ();
3156 /* Create a team if we don't have any around, as nowait
3157 target tasks make sense to run asynchronously even when
3158 outside of any parallel. */
3159 if (__builtin_expect (thr->ts.team == NULL, 0))
3161 struct gomp_team *team = gomp_new_team (1);
3162 struct gomp_task *task = thr->task;
3163 struct gomp_task **implicit_task = &task;
3164 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3165 team->prev_ts = thr->ts;
3166 thr->ts.team = team;
3167 thr->ts.team_id = 0;
3168 thr->ts.work_share = &team->work_shares[0];
3169 thr->ts.last_work_share = NULL;
3170 #ifdef HAVE_SYNC_BUILTINS
3171 thr->ts.single_count = 0;
3172 #endif
3173 thr->ts.static_trip = 0;
3174 thr->task = &team->implicit_task[0];
3175 gomp_init_task (thr->task, NULL, icv);
3176 while (*implicit_task
3177 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3178 implicit_task = &(*implicit_task)->parent;
3179 if (*implicit_task)
3181 thr->task = *implicit_task;
3182 gomp_end_task ();
3183 free (*implicit_task);
3184 thr->task = &team->implicit_task[0];
3186 else
3187 pthread_setspecific (gomp_thread_destructor, thr);
3188 if (implicit_task != &task)
3190 *implicit_task = thr->task;
3191 thr->task = task;
3194 if (thr->ts.team
3195 && !thr->task->final_task)
3197 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3198 sizes, kinds, flags, depend, new_args,
3199 GOMP_TARGET_TASK_BEFORE_MAP);
3200 return;
3204 /* If there are depend clauses, but nowait is not present
3205 (or we are in a final task), block the parent task until the
3206 dependencies are resolved and then just continue with the rest
3207 of the function as if it is a merged task. */
3208 if (depend != NULL)
3210 struct gomp_thread *thr = gomp_thread ();
3211 if (thr->task && thr->task->depend_hash)
3213 /* If we might need to wait, copy firstprivate now. */
3214 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3215 &tgt_align, &tgt_size);
3216 if (tgt_align)
3218 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3219 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3220 tgt_align, tgt_size);
3222 fpc_done = true;
3223 gomp_task_maybe_wait_for_dependencies (depend);
3227 void *fn_addr;
3228 if (devicep == NULL
3229 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3230 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3231 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3233 if (!fpc_done)
3235 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3236 &tgt_align, &tgt_size);
3237 if (tgt_align)
3239 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3240 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3241 tgt_align, tgt_size);
3244 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3245 return;
3248 struct target_mem_desc *tgt_vars;
3249 htab_t refcount_set = NULL;
3251 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3253 if (!fpc_done)
3255 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3256 &tgt_align, &tgt_size);
3257 if (tgt_align)
3259 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3260 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3261 tgt_align, tgt_size);
3264 tgt_vars = NULL;
3266 else
3268 refcount_set = htab_create (mapnum);
3269 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3270 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3272 devicep->run_func (devicep->target_id, fn_addr,
3273 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3274 new_args);
3275 if (tgt_vars)
3277 htab_clear (refcount_set);
3278 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3280 if (refcount_set)
3281 htab_free (refcount_set);
3283 /* Copy back ICVs from device to host.
3284 HOST_PTR is expected to exist since it was added in
3285 gomp_load_image_to_device if not already available. */
3286 gomp_copy_back_icvs (devicep, device);
3291 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3292 keeping track of all variable handling - assuming that reverse offload occurs
3293 ony very rarely. Downside is that the reverse search is slow. */
3295 struct gomp_splay_tree_rev_lookup_data {
3296 uintptr_t tgt_start;
3297 uintptr_t tgt_end;
3298 splay_tree_key key;
3301 static int
3302 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3304 struct gomp_splay_tree_rev_lookup_data *data;
3305 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3306 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3308 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3309 return 0;
3311 size_t j;
3312 for (j = 0; j < key->tgt->list_count; j++)
3313 if (key->tgt->list[j].key == key)
3314 break;
3315 assert (j < key->tgt->list_count);
3316 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3318 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3319 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3321 data->key = key;
3322 return 1;
3324 return 0;
3327 static inline splay_tree_key
3328 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3329 bool zero_len)
3331 struct gomp_splay_tree_rev_lookup_data data;
3332 data.key = NULL;
3333 data.tgt_start = tgt_start;
3334 data.tgt_end = tgt_end;
3336 if (tgt_start != tgt_end)
3338 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3339 return data.key;
3342 data.tgt_end++;
3343 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3344 if (data.key != NULL || zero_len)
3345 return data.key;
3346 data.tgt_end--;
3348 data.tgt_start--;
3349 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3350 return data.key;
3353 struct cpy_data
3355 uint64_t devaddr;
3356 bool present, aligned;
3360 /* Search just mapped reverse-offload data; returns index if found,
3361 otherwise >= n. */
3363 static inline int
3364 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3365 unsigned short *kinds, uint64_t *sizes, size_t n,
3366 uint64_t tgt_start, uint64_t tgt_end)
3368 const bool short_mapkind = true;
3369 const int typemask = short_mapkind ? 0xff : 0x7;
3370 size_t i;
3371 for (i = 0; i < n; i++)
3373 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3374 == GOMP_MAP_STRUCT);
3375 uint64_t dev_end;
3376 if (!is_struct)
3377 dev_end = d[i].devaddr + sizes[i];
3378 else
3380 if (i + sizes[i] < n)
3381 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3382 else
3383 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3385 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3386 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3387 break;
3388 if (is_struct)
3389 i += sizes[i];
3391 return i;
3394 static inline int
3395 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3396 unsigned short *kinds, uint64_t *sizes,
3397 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3398 bool zero_len)
3400 size_t i;
3401 if (tgt_start != tgt_end)
3402 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3403 tgt_start, tgt_end);
3404 tgt_end++;
3405 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3406 tgt_start, tgt_end);
3407 if (i < n || zero_len)
3408 return i;
3409 tgt_end--;
3411 tgt_start--;
3412 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3413 tgt_start, tgt_end);
3416 /* Handle reverse offload. This is called by the device plugins for a
3417 reverse offload; it is not called if the outer target runs on the host.
3418 The mapping is simplified device-affecting constructs (except for target
3419 with device(ancestor:1)) must not be encountered; in particular not
3420 target (enter/exit) data. */
3422 void
3423 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3424 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3425 struct goacc_asyncqueue *aq)
3427 /* Return early if there is no offload code. */
3428 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3429 return;
3430 /* Currently, this fails because of calculate_firstprivate_requirements
3431 below; it could be fixed but additional code needs to be updated to
3432 handle 32bit hosts - thus, it is not worthwhile. */
3433 if (sizeof (void *) != sizeof (uint64_t))
3434 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3436 struct cpy_data *cdata = NULL;
3437 uint64_t *devaddrs;
3438 uint64_t *sizes;
3439 unsigned short *kinds;
3440 const bool short_mapkind = true;
3441 const int typemask = short_mapkind ? 0xff : 0x7;
3442 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3444 reverse_splay_tree_key n;
3445 struct reverse_splay_tree_key_s k;
3446 k.dev = fn_ptr;
3448 gomp_mutex_lock (&devicep->lock);
3449 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3450 gomp_mutex_unlock (&devicep->lock);
3452 if (n == NULL)
3453 gomp_fatal ("Cannot find reverse-offload function");
3454 void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start;
3456 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3458 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3459 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3460 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3462 else
3464 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3465 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3466 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3467 gomp_copy_dev2host (devicep, aq, devaddrs,
3468 (const void *) (uintptr_t) devaddrs_ptr,
3469 mapnum * sizeof (uint64_t));
3470 gomp_copy_dev2host (devicep, aq, sizes,
3471 (const void *) (uintptr_t) sizes_ptr,
3472 mapnum * sizeof (uint64_t));
3473 gomp_copy_dev2host (devicep, aq, kinds,
3474 (const void *) (uintptr_t) kinds_ptr,
3475 mapnum * sizeof (unsigned short));
3476 if (aq && !devicep->openacc.async.synchronize_func (aq))
3477 exit (EXIT_FAILURE);
3480 size_t tgt_align = 0, tgt_size = 0;
3482 /* If actually executed on 32bit systems, the casts lead to wrong code;
3483 but 32bit with offloading is not supported; see top of this function. */
3484 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3485 (void *) (uintptr_t) kinds,
3486 &tgt_align, &tgt_size);
3488 if (tgt_align)
3490 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3491 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3492 if (al)
3493 tgt += tgt_align - al;
3494 tgt_size = 0;
3495 for (uint64_t i = 0; i < mapnum; i++)
3496 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3497 && devaddrs[i] != 0)
3499 size_t align = (size_t) 1 << (kinds[i] >> 8);
3500 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3501 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3502 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3503 (size_t) sizes[i]);
3504 else
3506 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3507 (void *) (uintptr_t) devaddrs[i],
3508 (size_t) sizes[i]);
3509 if (aq && !devicep->openacc.async.synchronize_func (aq))
3510 exit (EXIT_FAILURE);
3512 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3513 tgt_size = tgt_size + sizes[i];
3514 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3515 && i + 1 < mapnum
3516 && ((get_kind (short_mapkind, kinds, i) & typemask)
3517 == GOMP_MAP_ATTACH))
3519 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3520 = (uint64_t) devaddrs[i];
3521 ++i;
3526 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3528 size_t j, struct_cpy = 0;
3529 splay_tree_key n2;
3530 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3531 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3532 gomp_mutex_lock (&devicep->lock);
3533 for (uint64_t i = 0; i < mapnum; i++)
3535 if (devaddrs[i] == 0)
3536 continue;
3537 n = NULL;
3538 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3539 switch (kind)
3541 case GOMP_MAP_FIRSTPRIVATE:
3542 case GOMP_MAP_FIRSTPRIVATE_INT:
3543 continue;
3545 case GOMP_MAP_DELETE:
3546 case GOMP_MAP_RELEASE:
3547 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3548 /* Assume it is present; look it up - but ignore unless the
3549 present clause is there. */
3550 case GOMP_MAP_ALLOC:
3551 case GOMP_MAP_FROM:
3552 case GOMP_MAP_FORCE_ALLOC:
3553 case GOMP_MAP_FORCE_FROM:
3554 case GOMP_MAP_ALWAYS_FROM:
3555 case GOMP_MAP_TO:
3556 case GOMP_MAP_TOFROM:
3557 case GOMP_MAP_FORCE_TO:
3558 case GOMP_MAP_FORCE_TOFROM:
3559 case GOMP_MAP_ALWAYS_TO:
3560 case GOMP_MAP_ALWAYS_TOFROM:
3561 case GOMP_MAP_FORCE_PRESENT:
3562 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3563 case GOMP_MAP_ALWAYS_PRESENT_TO:
3564 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3565 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3566 cdata[i].devaddr = devaddrs[i];
3567 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3568 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3569 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3570 devaddrs[i],
3571 devaddrs[i] + sizes[i], zero_len);
3572 if (j < i)
3574 n2 = NULL;
3575 cdata[i].present = true;
3576 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3578 else
3580 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3581 devaddrs[i],
3582 devaddrs[i] + sizes[i], zero_len);
3583 cdata[i].present = n2 != NULL;
3585 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3587 gomp_mutex_unlock (&devicep->lock);
3588 #ifdef HAVE_INTTYPES_H
3589 gomp_fatal ("present clause: no corresponding data on "
3590 "parent device at %p with size %"PRIu64,
3591 (void *) (uintptr_t) devaddrs[i],
3592 (uint64_t) sizes[i]);
3593 #else
3594 gomp_fatal ("present clause: no corresponding data on "
3595 "parent device at %p with size %lu",
3596 (void *) (uintptr_t) devaddrs[i],
3597 (unsigned long) sizes[i]);
3598 #endif
3599 break;
3601 else if (!cdata[i].present
3602 && kind != GOMP_MAP_DELETE
3603 && kind != GOMP_MAP_RELEASE
3604 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3606 cdata[i].aligned = true;
3607 size_t align = (size_t) 1 << (kinds[i] >> 8);
3608 devaddrs[i]
3609 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3610 sizes[i]);
3612 else if (n2 != NULL)
3613 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3614 - (n2->tgt->tgt_start + n2->tgt_offset));
3615 if (((!cdata[i].present || struct_cpy)
3616 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3617 || kind == GOMP_MAP_FORCE_TO
3618 || kind == GOMP_MAP_FORCE_TOFROM
3619 || GOMP_MAP_ALWAYS_TO_P (kind))
3621 gomp_copy_dev2host (devicep, aq,
3622 (void *) (uintptr_t) devaddrs[i],
3623 (void *) (uintptr_t) cdata[i].devaddr,
3624 sizes[i]);
3625 if (aq && !devicep->openacc.async.synchronize_func (aq))
3627 gomp_mutex_unlock (&devicep->lock);
3628 exit (EXIT_FAILURE);
3631 if (struct_cpy)
3632 struct_cpy--;
3633 break;
3634 case GOMP_MAP_ATTACH:
3635 case GOMP_MAP_POINTER:
3636 case GOMP_MAP_ALWAYS_POINTER:
3637 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3638 devaddrs[i] + sizes[i],
3639 devaddrs[i] + sizes[i]
3640 + sizeof (void*), false);
3641 cdata[i].present = n2 != NULL;
3642 cdata[i].devaddr = devaddrs[i];
3643 if (n2)
3644 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3645 - (n2->tgt->tgt_start + n2->tgt_offset));
3646 else
3648 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3649 devaddrs[i] + sizes[i],
3650 devaddrs[i] + sizes[i]
3651 + sizeof (void*), false);
3652 if (j < i)
3654 cdata[i].present = true;
3655 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3656 - cdata[j].devaddr);
3659 if (!cdata[i].present)
3660 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3661 /* Assume that when present, the pointer is already correct. */
3662 if (!n2)
3663 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3664 = devaddrs[i-1];
3665 break;
3666 case GOMP_MAP_TO_PSET:
3667 /* Assume that when present, the pointers are fine and no 'to:'
3668 is required. */
3669 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3670 devaddrs[i], devaddrs[i] + sizes[i],
3671 false);
3672 cdata[i].present = n2 != NULL;
3673 cdata[i].devaddr = devaddrs[i];
3674 if (n2)
3675 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3676 - (n2->tgt->tgt_start + n2->tgt_offset));
3677 else
3679 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3680 devaddrs[i],
3681 devaddrs[i] + sizes[i], false);
3682 if (j < i)
3684 cdata[i].present = true;
3685 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3686 - cdata[j].devaddr);
3689 if (!cdata[i].present)
3691 cdata[i].aligned = true;
3692 size_t align = (size_t) 1 << (kinds[i] >> 8);
3693 devaddrs[i]
3694 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3695 sizes[i]);
3696 gomp_copy_dev2host (devicep, aq,
3697 (void *) (uintptr_t) devaddrs[i],
3698 (void *) (uintptr_t) cdata[i].devaddr,
3699 sizes[i]);
3700 if (aq && !devicep->openacc.async.synchronize_func (aq))
3702 gomp_mutex_unlock (&devicep->lock);
3703 exit (EXIT_FAILURE);
3706 for (j = i + 1; j < mapnum; j++)
3708 kind = get_kind (short_mapkind, kinds, j) & typemask;
3709 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3710 && !GOMP_MAP_POINTER_P (kind))
3711 break;
3712 if (devaddrs[j] < devaddrs[i])
3713 break;
3714 if (cdata[i].present)
3715 continue;
3716 if (devaddrs[j] == 0)
3718 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3719 continue;
3721 int k;
3722 n2 = NULL;
3723 /* Dereference devaddrs[j] to get the device addr. */
3724 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3725 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3726 + sizes[j]);
3727 cdata[j].present = true;
3728 cdata[j].devaddr = devaddrs[j];
3729 if (devaddrs[j] == 0)
3730 continue;
3731 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3732 devaddrs[j],
3733 devaddrs[j] + sizeof (void*),
3734 false);
3735 if (k < j)
3736 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3737 - cdata[k].devaddr);
3738 else
3740 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3741 devaddrs[j],
3742 devaddrs[j] + sizeof (void*),
3743 false);
3744 if (n2 == NULL)
3746 gomp_mutex_unlock (&devicep->lock);
3747 gomp_fatal ("Pointer target wasn't mapped");
3749 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3750 - (n2->tgt->tgt_start + n2->tgt_offset));
3752 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3753 = (void *) (uintptr_t) devaddrs[j];
3755 i = j -1;
3756 break;
3757 case GOMP_MAP_STRUCT:
3758 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3759 devaddrs[i + sizes[i]]
3760 + sizes[i + sizes[i]], false);
3761 cdata[i].present = n2 != NULL;
3762 cdata[i].devaddr = devaddrs[i];
3763 struct_cpy = cdata[i].present ? 0 : sizes[i];
3764 if (!n2)
3766 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3767 - devaddrs[i+1]
3768 + sizes[i + sizes[i]]);
3769 size_t align = (size_t) 1 << (kinds[i] >> 8);
3770 cdata[i].aligned = true;
3771 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3772 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3774 else
3775 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3776 - (n2->tgt->tgt_start + n2->tgt_offset));
3777 break;
3778 default:
3779 gomp_mutex_unlock (&devicep->lock);
3780 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3783 gomp_mutex_unlock (&devicep->lock);
3786 host_fn (devaddrs);
3788 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3790 uint64_t struct_cpy = 0;
3791 bool clean_struct = false;
3792 for (uint64_t i = 0; i < mapnum; i++)
3794 if (cdata[i].devaddr == 0)
3795 continue;
3796 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3797 bool copy = !cdata[i].present || struct_cpy;
3798 switch (kind)
3800 case GOMP_MAP_FORCE_FROM:
3801 case GOMP_MAP_FORCE_TOFROM:
3802 case GOMP_MAP_ALWAYS_FROM:
3803 case GOMP_MAP_ALWAYS_TOFROM:
3804 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3805 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3806 copy = true;
3807 /* FALLTHRU */
3808 case GOMP_MAP_FROM:
3809 case GOMP_MAP_TOFROM:
3810 if (copy)
3812 gomp_copy_host2dev (devicep, aq,
3813 (void *) (uintptr_t) cdata[i].devaddr,
3814 (void *) (uintptr_t) devaddrs[i],
3815 sizes[i], false, NULL);
3816 if (aq && !devicep->openacc.async.synchronize_func (aq))
3817 exit (EXIT_FAILURE);
3819 default:
3820 break;
3822 if (struct_cpy)
3824 struct_cpy--;
3825 continue;
3827 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3829 clean_struct = true;
3830 struct_cpy = sizes[i];
3832 else if (!cdata[i].present && cdata[i].aligned)
3833 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3834 else if (!cdata[i].present)
3835 free ((void *) (uintptr_t) devaddrs[i]);
3837 if (clean_struct)
3838 for (uint64_t i = 0; i < mapnum; i++)
3839 if (!cdata[i].present
3840 && ((get_kind (short_mapkind, kinds, i) & typemask)
3841 == GOMP_MAP_STRUCT))
3843 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3844 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3847 free (devaddrs);
3848 free (sizes);
3849 free (kinds);
3853 /* Host fallback for GOMP_target_data{,_ext} routines. */
3855 static void
3856 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3858 struct gomp_task_icv *icv = gomp_icv (false);
3860 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3861 && devicep != NULL)
3862 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3863 "be used for offloading");
3865 if (icv->target_data)
3867 /* Even when doing a host fallback, if there are any active
3868 #pragma omp target data constructs, need to remember the
3869 new #pragma omp target data, otherwise GOMP_target_end_data
3870 would get out of sync. */
3871 struct target_mem_desc *tgt
3872 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3873 NULL, GOMP_MAP_VARS_DATA);
3874 tgt->prev = icv->target_data;
3875 icv->target_data = tgt;
3879 void
3880 GOMP_target_data (int device, const void *unused, size_t mapnum,
3881 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3883 struct gomp_device_descr *devicep = resolve_device (device, true);
3885 if (devicep == NULL
3886 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3887 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3888 return gomp_target_data_fallback (devicep);
3890 struct target_mem_desc *tgt
3891 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3892 NULL, GOMP_MAP_VARS_DATA);
3893 struct gomp_task_icv *icv = gomp_icv (true);
3894 tgt->prev = icv->target_data;
3895 icv->target_data = tgt;
3898 void
3899 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3900 size_t *sizes, unsigned short *kinds)
3902 struct gomp_device_descr *devicep = resolve_device (device, true);
3904 if (devicep == NULL
3905 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3906 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3907 return gomp_target_data_fallback (devicep);
3909 struct target_mem_desc *tgt
3910 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3911 NULL, GOMP_MAP_VARS_DATA);
3912 struct gomp_task_icv *icv = gomp_icv (true);
3913 tgt->prev = icv->target_data;
3914 icv->target_data = tgt;
3917 void
3918 GOMP_target_end_data (void)
3920 struct gomp_task_icv *icv = gomp_icv (false);
3921 if (icv->target_data)
3923 struct target_mem_desc *tgt = icv->target_data;
3924 icv->target_data = tgt->prev;
3925 gomp_unmap_vars (tgt, true, NULL);
3929 void
3930 GOMP_target_update (int device, const void *unused, size_t mapnum,
3931 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3933 struct gomp_device_descr *devicep = resolve_device (device, true);
3935 if (devicep == NULL
3936 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3937 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3938 return;
3940 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3943 void
3944 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3945 size_t *sizes, unsigned short *kinds,
3946 unsigned int flags, void **depend)
3948 struct gomp_device_descr *devicep = resolve_device (device, true);
3950 /* If there are depend clauses, but nowait is not present,
3951 block the parent task until the dependencies are resolved
3952 and then just continue with the rest of the function as if it
3953 is a merged task. Until we are able to schedule task during
3954 variable mapping or unmapping, ignore nowait if depend clauses
3955 are not present. */
3956 if (depend != NULL)
3958 struct gomp_thread *thr = gomp_thread ();
3959 if (thr->task && thr->task->depend_hash)
3961 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3962 && thr->ts.team
3963 && !thr->task->final_task)
3965 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3966 mapnum, hostaddrs, sizes, kinds,
3967 flags | GOMP_TARGET_FLAG_UPDATE,
3968 depend, NULL, GOMP_TARGET_TASK_DATA))
3969 return;
3971 else
3973 struct gomp_team *team = thr->ts.team;
3974 /* If parallel or taskgroup has been cancelled, don't start new
3975 tasks. */
3976 if (__builtin_expect (gomp_cancel_var, 0) && team)
3978 if (gomp_team_barrier_cancelled (&team->barrier))
3979 return;
3980 if (thr->task->taskgroup)
3982 if (thr->task->taskgroup->cancelled)
3983 return;
3984 if (thr->task->taskgroup->workshare
3985 && thr->task->taskgroup->prev
3986 && thr->task->taskgroup->prev->cancelled)
3987 return;
3991 gomp_task_maybe_wait_for_dependencies (depend);
3996 if (devicep == NULL
3997 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3998 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3999 return;
4001 struct gomp_thread *thr = gomp_thread ();
4002 struct gomp_team *team = thr->ts.team;
4003 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4004 if (__builtin_expect (gomp_cancel_var, 0) && team)
4006 if (gomp_team_barrier_cancelled (&team->barrier))
4007 return;
4008 if (thr->task->taskgroup)
4010 if (thr->task->taskgroup->cancelled)
4011 return;
4012 if (thr->task->taskgroup->workshare
4013 && thr->task->taskgroup->prev
4014 && thr->task->taskgroup->prev->cancelled)
4015 return;
4019 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4022 static void
4023 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4024 void **hostaddrs, size_t *sizes, unsigned short *kinds,
4025 htab_t *refcount_set)
4027 const int typemask = 0xff;
4028 size_t i;
4029 gomp_mutex_lock (&devicep->lock);
4030 if (devicep->state == GOMP_DEVICE_FINALIZED)
4032 gomp_mutex_unlock (&devicep->lock);
4033 return;
4036 for (i = 0; i < mapnum; i++)
4037 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4039 struct splay_tree_key_s cur_node;
4040 cur_node.host_start = (uintptr_t) hostaddrs[i];
4041 cur_node.host_end = cur_node.host_start + sizeof (void *);
4042 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4044 if (n)
4045 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4046 false, NULL);
4049 int nrmvars = 0;
4050 splay_tree_key remove_vars[mapnum];
4052 for (i = 0; i < mapnum; i++)
4054 struct splay_tree_key_s cur_node;
4055 unsigned char kind = kinds[i] & typemask;
4056 switch (kind)
4058 case GOMP_MAP_FROM:
4059 case GOMP_MAP_ALWAYS_FROM:
4060 case GOMP_MAP_DELETE:
4061 case GOMP_MAP_RELEASE:
4062 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4063 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4064 cur_node.host_start = (uintptr_t) hostaddrs[i];
4065 cur_node.host_end = cur_node.host_start + sizes[i];
4066 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4067 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4068 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4069 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4070 if (!k)
4071 continue;
4073 bool delete_p = (kind == GOMP_MAP_DELETE
4074 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4075 bool do_copy, do_remove;
4076 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4077 &do_remove);
4079 if ((kind == GOMP_MAP_FROM && do_copy)
4080 || kind == GOMP_MAP_ALWAYS_FROM)
4082 if (k->aux && k->aux->attach_count)
4084 /* We have to be careful not to overwrite still attached
4085 pointers during the copyback to host. */
4086 uintptr_t addr = k->host_start;
4087 while (addr < k->host_end)
4089 size_t i = (addr - k->host_start) / sizeof (void *);
4090 if (k->aux->attach_count[i] == 0)
4091 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4092 (void *) (k->tgt->tgt_start
4093 + k->tgt_offset
4094 + addr - k->host_start),
4095 sizeof (void *));
4096 addr += sizeof (void *);
4099 else
4100 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4101 (void *) (k->tgt->tgt_start + k->tgt_offset
4102 + cur_node.host_start
4103 - k->host_start),
4104 cur_node.host_end - cur_node.host_start);
4107 /* Structure elements lists are removed altogether at once, which
4108 may cause immediate deallocation of the target_mem_desc, causing
4109 errors if we still have following element siblings to copy back.
4110 While we're at it, it also seems more disciplined to simply
4111 queue all removals together for processing below.
4113 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4114 not have this problem, since they maintain an additional
4115 tgt->refcount = 1 reference to the target_mem_desc to start with.
4117 if (do_remove)
4118 remove_vars[nrmvars++] = k;
4119 break;
4121 case GOMP_MAP_DETACH:
4122 break;
4123 default:
4124 gomp_mutex_unlock (&devicep->lock);
4125 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4126 kind);
4130 for (int i = 0; i < nrmvars; i++)
4131 gomp_remove_var (devicep, remove_vars[i]);
4133 gomp_mutex_unlock (&devicep->lock);
4136 void
4137 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4138 size_t *sizes, unsigned short *kinds,
4139 unsigned int flags, void **depend)
4141 struct gomp_device_descr *devicep = resolve_device (device, true);
4143 /* If there are depend clauses, but nowait is not present,
4144 block the parent task until the dependencies are resolved
4145 and then just continue with the rest of the function as if it
4146 is a merged task. Until we are able to schedule task during
4147 variable mapping or unmapping, ignore nowait if depend clauses
4148 are not present. */
4149 if (depend != NULL)
4151 struct gomp_thread *thr = gomp_thread ();
4152 if (thr->task && thr->task->depend_hash)
4154 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4155 && thr->ts.team
4156 && !thr->task->final_task)
4158 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4159 mapnum, hostaddrs, sizes, kinds,
4160 flags, depend, NULL,
4161 GOMP_TARGET_TASK_DATA))
4162 return;
4164 else
4166 struct gomp_team *team = thr->ts.team;
4167 /* If parallel or taskgroup has been cancelled, don't start new
4168 tasks. */
4169 if (__builtin_expect (gomp_cancel_var, 0) && team)
4171 if (gomp_team_barrier_cancelled (&team->barrier))
4172 return;
4173 if (thr->task->taskgroup)
4175 if (thr->task->taskgroup->cancelled)
4176 return;
4177 if (thr->task->taskgroup->workshare
4178 && thr->task->taskgroup->prev
4179 && thr->task->taskgroup->prev->cancelled)
4180 return;
4184 gomp_task_maybe_wait_for_dependencies (depend);
4189 if (devicep == NULL
4190 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4191 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4192 return;
4194 struct gomp_thread *thr = gomp_thread ();
4195 struct gomp_team *team = thr->ts.team;
4196 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4197 if (__builtin_expect (gomp_cancel_var, 0) && team)
4199 if (gomp_team_barrier_cancelled (&team->barrier))
4200 return;
4201 if (thr->task->taskgroup)
4203 if (thr->task->taskgroup->cancelled)
4204 return;
4205 if (thr->task->taskgroup->workshare
4206 && thr->task->taskgroup->prev
4207 && thr->task->taskgroup->prev->cancelled)
4208 return;
4212 htab_t refcount_set = htab_create (mapnum);
4214 /* The variables are mapped separately such that they can be released
4215 independently. */
4216 size_t i, j;
4217 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4218 for (i = 0; i < mapnum; i++)
4219 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
4220 || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4222 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4223 &kinds[i], true, &refcount_set,
4224 GOMP_MAP_VARS_ENTER_DATA);
4225 i += sizes[i];
4227 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4229 for (j = i + 1; j < mapnum; j++)
4230 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4231 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4232 break;
4233 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4234 &kinds[i], true, &refcount_set,
4235 GOMP_MAP_VARS_ENTER_DATA);
4236 i += j - i - 1;
4238 else if (i + 1 < mapnum
4239 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4240 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4241 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4243 /* An attach operation must be processed together with the mapped
4244 base-pointer list item. */
4245 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4246 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4247 i += 1;
4249 else
4250 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4251 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4252 else
4253 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4254 htab_free (refcount_set);
4257 bool
4258 gomp_target_task_fn (void *data)
4260 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4261 struct gomp_device_descr *devicep = ttask->devicep;
4263 if (ttask->fn != NULL)
4265 void *fn_addr;
4266 if (devicep == NULL
4267 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4268 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4269 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4271 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4272 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4273 ttask->args);
4274 return false;
4277 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4279 if (ttask->tgt)
4280 gomp_unmap_vars (ttask->tgt, true, NULL);
4281 return false;
4284 void *actual_arguments;
4285 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4287 ttask->tgt = NULL;
4288 actual_arguments = ttask->hostaddrs;
4290 else
4292 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4293 NULL, ttask->sizes, ttask->kinds, true,
4294 NULL, GOMP_MAP_VARS_TARGET);
4295 actual_arguments = (void *) ttask->tgt->tgt_start;
4297 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4299 assert (devicep->async_run_func);
4300 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4301 ttask->args, (void *) ttask);
4302 return true;
4304 else if (devicep == NULL
4305 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4306 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4307 return false;
4309 size_t i;
4310 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4311 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4312 ttask->kinds, true);
4313 else
4315 htab_t refcount_set = htab_create (ttask->mapnum);
4316 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4317 for (i = 0; i < ttask->mapnum; i++)
4318 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
4319 || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4321 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4322 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4323 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4324 i += ttask->sizes[i];
4326 else
4327 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4328 &ttask->kinds[i], true, &refcount_set,
4329 GOMP_MAP_VARS_ENTER_DATA);
4330 else
4331 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4332 ttask->kinds, &refcount_set);
4333 htab_free (refcount_set);
4335 return false;
4338 void
4339 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4341 if (thread_limit)
4343 struct gomp_task_icv *icv = gomp_icv (true);
4344 icv->thread_limit_var
4345 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4347 (void) num_teams;
4350 bool
4351 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4352 unsigned int thread_limit, bool first)
4354 struct gomp_thread *thr = gomp_thread ();
4355 if (first)
4357 if (thread_limit)
4359 struct gomp_task_icv *icv = gomp_icv (true);
4360 icv->thread_limit_var
4361 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4363 (void) num_teams_high;
4364 if (num_teams_low == 0)
4365 num_teams_low = 1;
4366 thr->num_teams = num_teams_low - 1;
4367 thr->team_num = 0;
4369 else if (thr->team_num == thr->num_teams)
4370 return false;
4371 else
4372 ++thr->team_num;
4373 return true;
4376 void *
4377 omp_target_alloc (size_t size, int device_num)
4379 if (device_num == omp_initial_device
4380 || device_num == gomp_get_num_devices ())
4381 return malloc (size);
4383 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4384 if (devicep == NULL)
4385 return NULL;
4387 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4388 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4389 return malloc (size);
4391 gomp_mutex_lock (&devicep->lock);
4392 void *ret = devicep->alloc_func (devicep->target_id, size);
4393 gomp_mutex_unlock (&devicep->lock);
4394 return ret;
4397 void
4398 omp_target_free (void *device_ptr, int device_num)
4400 if (device_num == omp_initial_device
4401 || device_num == gomp_get_num_devices ())
4403 free (device_ptr);
4404 return;
4407 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4408 if (devicep == NULL || device_ptr == NULL)
4409 return;
4411 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4412 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4414 free (device_ptr);
4415 return;
4418 gomp_mutex_lock (&devicep->lock);
4419 gomp_free_device_memory (devicep, device_ptr);
4420 gomp_mutex_unlock (&devicep->lock);
4424 omp_target_is_present (const void *ptr, int device_num)
4426 if (device_num == omp_initial_device
4427 || device_num == gomp_get_num_devices ())
4428 return 1;
4430 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4431 if (devicep == NULL)
4432 return 0;
4434 if (ptr == NULL)
4435 return 1;
4437 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4438 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4439 return 1;
4441 gomp_mutex_lock (&devicep->lock);
4442 struct splay_tree_s *mem_map = &devicep->mem_map;
4443 struct splay_tree_key_s cur_node;
4445 cur_node.host_start = (uintptr_t) ptr;
4446 cur_node.host_end = cur_node.host_start;
4447 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4448 int ret = n != NULL;
4449 gomp_mutex_unlock (&devicep->lock);
4450 return ret;
4453 static int
4454 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4455 struct gomp_device_descr **dst_devicep,
4456 struct gomp_device_descr **src_devicep)
4458 if (dst_device_num != gomp_get_num_devices ()
4459 /* Above gomp_get_num_devices has to be called unconditionally. */
4460 && dst_device_num != omp_initial_device)
4462 *dst_devicep = resolve_device (dst_device_num, false);
4463 if (*dst_devicep == NULL)
4464 return EINVAL;
4466 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4467 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4468 *dst_devicep = NULL;
4471 if (src_device_num != num_devices_openmp
4472 && src_device_num != omp_initial_device)
4474 *src_devicep = resolve_device (src_device_num, false);
4475 if (*src_devicep == NULL)
4476 return EINVAL;
4478 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4479 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4480 *src_devicep = NULL;
4483 return 0;
4486 static int
4487 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4488 size_t dst_offset, size_t src_offset,
4489 struct gomp_device_descr *dst_devicep,
4490 struct gomp_device_descr *src_devicep)
4492 bool ret;
4493 if (src_devicep == NULL && dst_devicep == NULL)
4495 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4496 return 0;
4498 if (src_devicep == NULL)
4500 gomp_mutex_lock (&dst_devicep->lock);
4501 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4502 (char *) dst + dst_offset,
4503 (char *) src + src_offset, length);
4504 gomp_mutex_unlock (&dst_devicep->lock);
4505 return (ret ? 0 : EINVAL);
4507 if (dst_devicep == NULL)
4509 gomp_mutex_lock (&src_devicep->lock);
4510 ret = src_devicep->dev2host_func (src_devicep->target_id,
4511 (char *) dst + dst_offset,
4512 (char *) src + src_offset, length);
4513 gomp_mutex_unlock (&src_devicep->lock);
4514 return (ret ? 0 : EINVAL);
4516 if (src_devicep == dst_devicep)
4518 gomp_mutex_lock (&src_devicep->lock);
4519 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4520 (char *) dst + dst_offset,
4521 (char *) src + src_offset, length);
4522 gomp_mutex_unlock (&src_devicep->lock);
4523 return (ret ? 0 : EINVAL);
4525 return EINVAL;
4529 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4530 size_t src_offset, int dst_device_num, int src_device_num)
4532 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4533 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4534 &dst_devicep, &src_devicep);
4536 if (ret)
4537 return ret;
4539 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4540 dst_devicep, src_devicep);
4542 return ret;
4545 typedef struct
4547 void *dst;
4548 const void *src;
4549 size_t length;
4550 size_t dst_offset;
4551 size_t src_offset;
4552 struct gomp_device_descr *dst_devicep;
4553 struct gomp_device_descr *src_devicep;
4554 } omp_target_memcpy_data;
4556 static void
4557 omp_target_memcpy_async_helper (void *args)
4559 omp_target_memcpy_data *a = args;
4560 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4561 a->src_offset, a->dst_devicep, a->src_devicep))
4562 gomp_fatal ("omp_target_memcpy failed");
4566 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4567 size_t dst_offset, size_t src_offset,
4568 int dst_device_num, int src_device_num,
4569 int depobj_count, omp_depend_t *depobj_list)
4571 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4572 unsigned int flags = 0;
4573 void *depend[depobj_count + 5];
4574 int i;
4575 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4576 &dst_devicep, &src_devicep);
4578 omp_target_memcpy_data s = {
4579 .dst = dst,
4580 .src = src,
4581 .length = length,
4582 .dst_offset = dst_offset,
4583 .src_offset = src_offset,
4584 .dst_devicep = dst_devicep,
4585 .src_devicep = src_devicep
4588 if (check)
4589 return check;
4591 if (depobj_count > 0 && depobj_list != NULL)
4593 flags |= GOMP_TASK_FLAG_DEPEND;
4594 depend[0] = 0;
4595 depend[1] = (void *) (uintptr_t) depobj_count;
4596 depend[2] = depend[3] = depend[4] = 0;
4597 for (i = 0; i < depobj_count; ++i)
4598 depend[i + 5] = &depobj_list[i];
4601 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4602 __alignof__ (s), true, flags, depend, 0, NULL);
4604 return 0;
4607 static int
4608 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4609 int num_dims, const size_t *volume,
4610 const size_t *dst_offsets,
4611 const size_t *src_offsets,
4612 const size_t *dst_dimensions,
4613 const size_t *src_dimensions,
4614 struct gomp_device_descr *dst_devicep,
4615 struct gomp_device_descr *src_devicep,
4616 size_t *tmp_size, void **tmp)
4618 size_t dst_slice = element_size;
4619 size_t src_slice = element_size;
4620 size_t j, dst_off, src_off, length;
4621 int i, ret;
4623 if (num_dims == 1)
4625 if (__builtin_mul_overflow (element_size, volume[0], &length)
4626 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4627 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4628 return EINVAL;
4629 if (dst_devicep == NULL && src_devicep == NULL)
4631 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4632 length);
4633 ret = 1;
4635 else if (src_devicep == NULL)
4636 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4637 (char *) dst + dst_off,
4638 (const char *) src + src_off,
4639 length);
4640 else if (dst_devicep == NULL)
4641 ret = src_devicep->dev2host_func (src_devicep->target_id,
4642 (char *) dst + dst_off,
4643 (const char *) src + src_off,
4644 length);
4645 else if (src_devicep == dst_devicep)
4646 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4647 (char *) dst + dst_off,
4648 (const char *) src + src_off,
4649 length);
4650 else
4652 if (*tmp_size == 0)
4654 *tmp_size = length;
4655 *tmp = malloc (length);
4656 if (*tmp == NULL)
4657 return ENOMEM;
4659 else if (*tmp_size < length)
4661 *tmp_size = length;
4662 free (*tmp);
4663 *tmp = malloc (length);
4664 if (*tmp == NULL)
4665 return ENOMEM;
4667 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4668 (const char *) src + src_off,
4669 length);
4670 if (ret == 1)
4671 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4672 (char *) dst + dst_off, *tmp,
4673 length);
4675 return ret ? 0 : EINVAL;
4678 /* host->device, device->host and intra device. */
4679 if (num_dims == 2
4680 && ((src_devicep
4681 && src_devicep == dst_devicep
4682 && src_devicep->memcpy2d_func)
4683 || (!src_devicep != !dst_devicep
4684 && ((src_devicep && src_devicep->memcpy2d_func)
4685 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4687 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4688 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4689 int src_id = src_devicep ? src_devicep->target_id : -1;
4690 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4692 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4693 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4694 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4695 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4696 || __builtin_mul_overflow (src_offsets[1], element_size,
4697 &src_off_sz1))
4698 return EINVAL;
4699 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4700 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4701 src, src_off_sz1, src_offsets[0], src_sz1);
4702 if (ret != -1)
4703 return ret ? 0 : EINVAL;
4705 else if (num_dims == 3
4706 && ((src_devicep
4707 && src_devicep == dst_devicep
4708 && src_devicep->memcpy3d_func)
4709 || (!src_devicep != !dst_devicep
4710 && ((src_devicep && src_devicep->memcpy3d_func)
4711 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4713 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4714 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4715 int src_id = src_devicep ? src_devicep->target_id : -1;
4716 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4718 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4719 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4720 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4721 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4722 || __builtin_mul_overflow (src_offsets[2], element_size,
4723 &src_off_sz2))
4724 return EINVAL;
4725 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4726 dst, dst_off_sz2, dst_offsets[1],
4727 dst_offsets[0], dst_sz2, dst_dimensions[1],
4728 src, src_off_sz2, src_offsets[1],
4729 src_offsets[0], src_sz2, src_dimensions[1]);
4730 if (ret != -1)
4731 return ret ? 0 : EINVAL;
4734 for (i = 1; i < num_dims; i++)
4735 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4736 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4737 return EINVAL;
4738 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4739 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4740 return EINVAL;
4741 for (j = 0; j < volume[0]; j++)
4743 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4744 (const char *) src + src_off,
4745 element_size, num_dims - 1,
4746 volume + 1, dst_offsets + 1,
4747 src_offsets + 1, dst_dimensions + 1,
4748 src_dimensions + 1, dst_devicep,
4749 src_devicep, tmp_size, tmp);
4750 if (ret)
4751 return ret;
4752 dst_off += dst_slice;
4753 src_off += src_slice;
4755 return 0;
4758 static int
4759 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4760 int src_device_num,
4761 struct gomp_device_descr **dst_devicep,
4762 struct gomp_device_descr **src_devicep)
4764 if (!dst && !src)
4765 return INT_MAX;
4767 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4768 dst_devicep, src_devicep);
4769 if (ret)
4770 return ret;
4772 return 0;
4775 static int
4776 omp_target_memcpy_rect_copy (void *dst, const void *src,
4777 size_t element_size, int num_dims,
4778 const size_t *volume, const size_t *dst_offsets,
4779 const size_t *src_offsets,
4780 const size_t *dst_dimensions,
4781 const size_t *src_dimensions,
4782 struct gomp_device_descr *dst_devicep,
4783 struct gomp_device_descr *src_devicep)
4785 size_t tmp_size = 0;
4786 void *tmp = NULL;
4787 bool lock_src;
4788 bool lock_dst;
4790 lock_src = src_devicep != NULL;
4791 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4792 if (lock_src)
4793 gomp_mutex_lock (&src_devicep->lock);
4794 if (lock_dst)
4795 gomp_mutex_lock (&dst_devicep->lock);
4796 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4797 volume, dst_offsets, src_offsets,
4798 dst_dimensions, src_dimensions,
4799 dst_devicep, src_devicep,
4800 &tmp_size, &tmp);
4801 if (lock_src)
4802 gomp_mutex_unlock (&src_devicep->lock);
4803 if (lock_dst)
4804 gomp_mutex_unlock (&dst_devicep->lock);
4805 if (tmp)
4806 free (tmp);
4808 return ret;
4812 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4813 int num_dims, const size_t *volume,
4814 const size_t *dst_offsets,
4815 const size_t *src_offsets,
4816 const size_t *dst_dimensions,
4817 const size_t *src_dimensions,
4818 int dst_device_num, int src_device_num)
4820 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4822 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4823 src_device_num, &dst_devicep,
4824 &src_devicep);
4826 if (check)
4827 return check;
4829 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4830 volume, dst_offsets, src_offsets,
4831 dst_dimensions, src_dimensions,
4832 dst_devicep, src_devicep);
4834 return ret;
4837 typedef struct
4839 void *dst;
4840 const void *src;
4841 size_t element_size;
4842 const size_t *volume;
4843 const size_t *dst_offsets;
4844 const size_t *src_offsets;
4845 const size_t *dst_dimensions;
4846 const size_t *src_dimensions;
4847 struct gomp_device_descr *dst_devicep;
4848 struct gomp_device_descr *src_devicep;
4849 int num_dims;
4850 } omp_target_memcpy_rect_data;
4852 static void
4853 omp_target_memcpy_rect_async_helper (void *args)
4855 omp_target_memcpy_rect_data *a = args;
4856 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4857 a->num_dims, a->volume, a->dst_offsets,
4858 a->src_offsets, a->dst_dimensions,
4859 a->src_dimensions, a->dst_devicep,
4860 a->src_devicep);
4861 if (ret)
4862 gomp_fatal ("omp_target_memcpy_rect failed");
4866 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4867 int num_dims, const size_t *volume,
4868 const size_t *dst_offsets,
4869 const size_t *src_offsets,
4870 const size_t *dst_dimensions,
4871 const size_t *src_dimensions,
4872 int dst_device_num, int src_device_num,
4873 int depobj_count, omp_depend_t *depobj_list)
4875 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4876 unsigned flags = 0;
4877 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4878 src_device_num, &dst_devicep,
4879 &src_devicep);
4880 void *depend[depobj_count + 5];
4881 int i;
4883 omp_target_memcpy_rect_data s = {
4884 .dst = dst,
4885 .src = src,
4886 .element_size = element_size,
4887 .num_dims = num_dims,
4888 .volume = volume,
4889 .dst_offsets = dst_offsets,
4890 .src_offsets = src_offsets,
4891 .dst_dimensions = dst_dimensions,
4892 .src_dimensions = src_dimensions,
4893 .dst_devicep = dst_devicep,
4894 .src_devicep = src_devicep
4897 if (check)
4898 return check;
4900 if (depobj_count > 0 && depobj_list != NULL)
4902 flags |= GOMP_TASK_FLAG_DEPEND;
4903 depend[0] = 0;
4904 depend[1] = (void *) (uintptr_t) depobj_count;
4905 depend[2] = depend[3] = depend[4] = 0;
4906 for (i = 0; i < depobj_count; ++i)
4907 depend[i + 5] = &depobj_list[i];
4910 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4911 __alignof__ (s), true, flags, depend, 0, NULL);
4913 return 0;
4917 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4918 size_t size, size_t device_offset, int device_num)
4920 if (device_num == omp_initial_device
4921 || device_num == gomp_get_num_devices ())
4922 return EINVAL;
4924 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4925 if (devicep == NULL)
4926 return EINVAL;
4928 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4929 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4930 return EINVAL;
4932 gomp_mutex_lock (&devicep->lock);
4934 struct splay_tree_s *mem_map = &devicep->mem_map;
4935 struct splay_tree_key_s cur_node;
4936 int ret = EINVAL;
4938 cur_node.host_start = (uintptr_t) host_ptr;
4939 cur_node.host_end = cur_node.host_start + size;
4940 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4941 if (n)
4943 if (n->tgt->tgt_start + n->tgt_offset
4944 == (uintptr_t) device_ptr + device_offset
4945 && n->host_start <= cur_node.host_start
4946 && n->host_end >= cur_node.host_end)
4947 ret = 0;
4949 else
4951 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4952 tgt->array = gomp_malloc (sizeof (*tgt->array));
4953 tgt->refcount = 1;
4954 tgt->tgt_start = 0;
4955 tgt->tgt_end = 0;
4956 tgt->to_free = NULL;
4957 tgt->prev = NULL;
4958 tgt->list_count = 0;
4959 tgt->device_descr = devicep;
4960 splay_tree_node array = tgt->array;
4961 splay_tree_key k = &array->key;
4962 k->host_start = cur_node.host_start;
4963 k->host_end = cur_node.host_end;
4964 k->tgt = tgt;
4965 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4966 k->refcount = REFCOUNT_INFINITY;
4967 k->dynamic_refcount = 0;
4968 k->aux = NULL;
4969 array->left = NULL;
4970 array->right = NULL;
4971 splay_tree_insert (&devicep->mem_map, array);
4972 ret = 0;
4974 gomp_mutex_unlock (&devicep->lock);
4975 return ret;
4979 omp_target_disassociate_ptr (const void *ptr, int device_num)
4981 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4982 if (devicep == NULL)
4983 return EINVAL;
4985 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4986 return EINVAL;
4988 gomp_mutex_lock (&devicep->lock);
4990 struct splay_tree_s *mem_map = &devicep->mem_map;
4991 struct splay_tree_key_s cur_node;
4992 int ret = EINVAL;
4994 cur_node.host_start = (uintptr_t) ptr;
4995 cur_node.host_end = cur_node.host_start;
4996 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4997 if (n
4998 && n->host_start == cur_node.host_start
4999 && n->refcount == REFCOUNT_INFINITY
5000 && n->tgt->tgt_start == 0
5001 && n->tgt->to_free == NULL
5002 && n->tgt->refcount == 1
5003 && n->tgt->list_count == 0)
5005 splay_tree_remove (&devicep->mem_map, n);
5006 gomp_unmap_tgt (n->tgt);
5007 ret = 0;
5010 gomp_mutex_unlock (&devicep->lock);
5011 return ret;
5014 void *
5015 omp_get_mapped_ptr (const void *ptr, int device_num)
5017 if (device_num == omp_initial_device
5018 || device_num == omp_get_initial_device ())
5019 return (void *) ptr;
5021 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5022 if (devicep == NULL)
5023 return NULL;
5025 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5026 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5027 return (void *) ptr;
5029 gomp_mutex_lock (&devicep->lock);
5031 struct splay_tree_s *mem_map = &devicep->mem_map;
5032 struct splay_tree_key_s cur_node;
5033 void *ret = NULL;
5035 cur_node.host_start = (uintptr_t) ptr;
5036 cur_node.host_end = cur_node.host_start;
5037 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5039 if (n)
5041 uintptr_t offset = cur_node.host_start - n->host_start;
5042 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5045 gomp_mutex_unlock (&devicep->lock);
5047 return ret;
5051 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5053 if (device_num == omp_initial_device
5054 || device_num == gomp_get_num_devices ())
5055 return true;
5057 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5058 if (devicep == NULL)
5059 return false;
5061 /* TODO: Unified shared memory must be handled when available. */
5063 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
5067 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5069 (void) kind;
5070 if (device_num == omp_initial_device
5071 || device_num == gomp_get_num_devices ())
5072 return gomp_pause_host ();
5074 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5075 if (devicep == NULL)
5076 return -1;
5078 /* Do nothing for target devices for now. */
5079 return 0;
5083 omp_pause_resource_all (omp_pause_resource_t kind)
5085 (void) kind;
5086 if (gomp_pause_host ())
5087 return -1;
5088 /* Do nothing for target devices for now. */
5089 return 0;
5092 ialias (omp_pause_resource)
5093 ialias (omp_pause_resource_all)
5095 #ifdef PLUGIN_SUPPORT
5097 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5098 in PLUGIN_NAME.
5099 The handles of the found functions are stored in the corresponding fields
5100 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5102 static bool
5103 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5104 const char *plugin_name)
5106 const char *err = NULL, *last_missing = NULL;
5108 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5109 if (!plugin_handle)
5110 #if OFFLOAD_DEFAULTED
5111 return 0;
5112 #else
5113 goto dl_fail;
5114 #endif
5116 /* Check if all required functions are available in the plugin and store
5117 their handlers. None of the symbols can legitimately be NULL,
5118 so we don't need to check dlerror all the time. */
5119 #define DLSYM(f) \
5120 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5121 goto dl_fail
5122 /* Similar, but missing functions are not an error. Return false if
5123 failed, true otherwise. */
5124 #define DLSYM_OPT(f, n) \
5125 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5126 || (last_missing = #n, 0))
5128 DLSYM (version);
5129 if (device->version_func () != GOMP_VERSION)
5131 err = "plugin version mismatch";
5132 goto fail;
5135 DLSYM (get_name);
5136 DLSYM (get_caps);
5137 DLSYM (get_type);
5138 DLSYM (get_num_devices);
5139 DLSYM (init_device);
5140 DLSYM (fini_device);
5141 DLSYM (load_image);
5142 DLSYM (unload_image);
5143 DLSYM (alloc);
5144 DLSYM (free);
5145 DLSYM (dev2host);
5146 DLSYM (host2dev);
5147 DLSYM_OPT (memcpy2d, memcpy2d);
5148 DLSYM_OPT (memcpy3d, memcpy3d);
5149 device->capabilities = device->get_caps_func ();
5150 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5152 DLSYM (run);
5153 DLSYM_OPT (async_run, async_run);
5154 DLSYM_OPT (can_run, can_run);
5155 DLSYM (dev2dev);
5157 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5159 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5160 || !DLSYM_OPT (openacc.create_thread_data,
5161 openacc_create_thread_data)
5162 || !DLSYM_OPT (openacc.destroy_thread_data,
5163 openacc_destroy_thread_data)
5164 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5165 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5166 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5167 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5168 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5169 || !DLSYM_OPT (openacc.async.queue_callback,
5170 openacc_async_queue_callback)
5171 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5172 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5173 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5174 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5176 /* Require all the OpenACC handlers if we have
5177 GOMP_OFFLOAD_CAP_OPENACC_200. */
5178 err = "plugin missing OpenACC handler function";
5179 goto fail;
5182 unsigned cuda = 0;
5183 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5184 openacc_cuda_get_current_device);
5185 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5186 openacc_cuda_get_current_context);
5187 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5188 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5189 if (cuda && cuda != 4)
5191 /* Make sure all the CUDA functions are there if any of them are. */
5192 err = "plugin missing OpenACC CUDA handler function";
5193 goto fail;
5196 #undef DLSYM
5197 #undef DLSYM_OPT
5199 return 1;
5201 dl_fail:
5202 err = dlerror ();
5203 fail:
5204 gomp_error ("while loading %s: %s", plugin_name, err);
5205 if (last_missing)
5206 gomp_error ("missing function was %s", last_missing);
5207 if (plugin_handle)
5208 dlclose (plugin_handle);
5210 return 0;
5213 /* This function finalizes all initialized devices. */
5215 static void
5216 gomp_target_fini (void)
5218 int i;
5219 for (i = 0; i < num_devices; i++)
5221 bool ret = true;
5222 struct gomp_device_descr *devicep = &devices[i];
5223 gomp_mutex_lock (&devicep->lock);
5224 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5225 ret = gomp_fini_device (devicep);
5226 gomp_mutex_unlock (&devicep->lock);
5227 if (!ret)
5228 gomp_fatal ("device finalization failed");
5232 /* This function initializes the runtime for offloading.
5233 It parses the list of offload plugins, and tries to load these.
5234 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5235 will be set, and the array DEVICES initialized, containing descriptors for
5236 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5237 by the others. */
5239 static void
5240 gomp_target_init (void)
5242 const char *prefix ="libgomp-plugin-";
5243 const char *suffix = SONAME_SUFFIX (1);
5244 const char *cur, *next;
5245 char *plugin_name;
5246 int i, new_num_devs;
5247 int num_devs = 0, num_devs_openmp;
5248 struct gomp_device_descr *devs = NULL;
5250 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5251 return;
5253 cur = OFFLOAD_PLUGINS;
5254 if (*cur)
5257 struct gomp_device_descr current_device;
5258 size_t prefix_len, suffix_len, cur_len;
5260 next = strchr (cur, ',');
5262 prefix_len = strlen (prefix);
5263 cur_len = next ? next - cur : strlen (cur);
5264 suffix_len = strlen (suffix);
5266 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5267 if (!plugin_name)
5269 num_devs = 0;
5270 break;
5273 memcpy (plugin_name, prefix, prefix_len);
5274 memcpy (plugin_name + prefix_len, cur, cur_len);
5275 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5277 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5279 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5280 new_num_devs = current_device.get_num_devices_func (omp_req);
5281 if (gomp_debug_var > 0 && new_num_devs < 0)
5283 bool found = false;
5284 int type = current_device.get_type_func ();
5285 for (int img = 0; img < num_offload_images; img++)
5286 if (type == offload_images[img].type)
5287 found = true;
5288 if (found)
5290 char buf[sizeof ("unified_address, unified_shared_memory, "
5291 "reverse_offload")];
5292 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5293 char *name = (char *) malloc (cur_len + 1);
5294 memcpy (name, cur, cur_len);
5295 name[cur_len] = '\0';
5296 gomp_debug (1,
5297 "%s devices present but 'omp requires %s' "
5298 "cannot be fulfilled\n", name, buf);
5299 free (name);
5302 else if (new_num_devs >= 1)
5304 /* Augment DEVICES and NUM_DEVICES. */
5306 devs = realloc (devs, (num_devs + new_num_devs)
5307 * sizeof (struct gomp_device_descr));
5308 if (!devs)
5310 num_devs = 0;
5311 free (plugin_name);
5312 break;
5315 current_device.name = current_device.get_name_func ();
5316 /* current_device.capabilities has already been set. */
5317 current_device.type = current_device.get_type_func ();
5318 current_device.mem_map.root = NULL;
5319 current_device.mem_map_rev.root = NULL;
5320 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5321 for (i = 0; i < new_num_devs; i++)
5323 current_device.target_id = i;
5324 devs[num_devs] = current_device;
5325 gomp_mutex_init (&devs[num_devs].lock);
5326 num_devs++;
5331 free (plugin_name);
5332 cur = next + 1;
5334 while (next);
5336 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5337 NUM_DEVICES_OPENMP. */
5338 struct gomp_device_descr *devs_s
5339 = malloc (num_devs * sizeof (struct gomp_device_descr));
5340 if (!devs_s)
5342 num_devs = 0;
5343 free (devs);
5344 devs = NULL;
5346 num_devs_openmp = 0;
5347 for (i = 0; i < num_devs; i++)
5348 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5349 devs_s[num_devs_openmp++] = devs[i];
5350 int num_devs_after_openmp = num_devs_openmp;
5351 for (i = 0; i < num_devs; i++)
5352 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5353 devs_s[num_devs_after_openmp++] = devs[i];
5354 free (devs);
5355 devs = devs_s;
5357 for (i = 0; i < num_devs; i++)
5359 /* The 'devices' array can be moved (by the realloc call) until we have
5360 found all the plugins, so registering with the OpenACC runtime (which
5361 takes a copy of the pointer argument) must be delayed until now. */
5362 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5363 goacc_register (&devs[i]);
5365 if (gomp_global_icv.default_device_var == INT_MIN)
5367 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5368 struct gomp_icv_list *none;
5369 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5370 gomp_global_icv.default_device_var = (num_devs_openmp
5371 ? 0 : omp_invalid_device);
5372 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5375 num_devices = num_devs;
5376 num_devices_openmp = num_devs_openmp;
5377 devices = devs;
5378 if (atexit (gomp_target_fini) != 0)
5379 gomp_fatal ("atexit failed");
5382 #else /* PLUGIN_SUPPORT */
5383 /* If dlfcn.h is unavailable we always fallback to host execution.
5384 GOMP_target* routines are just stubs for this case. */
5385 static void
5386 gomp_target_init (void)
5389 #endif /* PLUGIN_SUPPORT */