Increase timeout factor for hppa*-*-* in gcc.dg/long_branch.c
[official-gcc.git] / libgomp / target.c
blob812674d19a927036ee841fb832c861da7dc2a6f7
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
59 ialias_redirect (GOMP_task)
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
64 return hash_pointer ((void *) element);
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
70 return x == y;
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
96 /* Total number of offload images. */
97 static int num_offload_images;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
102 /* Total number of available devices. */
103 static int num_devices;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
116 void *ret = realloc (old, size);
117 if (ret == NULL)
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 return ret;
125 attribute_hidden void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
131 attribute_hidden int
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp;
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
141 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
142 called, which must be done before using default_device_var. */
143 int num_devices = gomp_get_num_devices ();
145 if (remapped && device_id == GOMP_DEVICE_ICV)
147 struct gomp_task_icv *icv = gomp_icv (false);
148 device_id = icv->default_device_var;
149 remapped = false;
152 if (device_id < 0)
154 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
155 : omp_initial_device))
156 return NULL;
157 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
158 && num_devices == 0)
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but only the host device is available");
161 else if (device_id == omp_invalid_device)
162 gomp_fatal ("omp_invalid_device encountered");
163 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
164 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
165 "but device not found");
167 return NULL;
169 else if (device_id >= num_devices)
171 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
172 && device_id != num_devices)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device not found");
176 return NULL;
179 gomp_mutex_lock (&devices[device_id].lock);
180 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
181 gomp_init_device (&devices[device_id]);
182 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
184 gomp_mutex_unlock (&devices[device_id].lock);
186 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
187 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
188 "but device is finalized");
190 return NULL;
192 gomp_mutex_unlock (&devices[device_id].lock);
194 return &devices[device_id];
198 static inline splay_tree_key
199 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
201 if (key->host_start != key->host_end)
202 return splay_tree_lookup (mem_map, key);
204 key->host_end++;
205 splay_tree_key n = splay_tree_lookup (mem_map, key);
206 key->host_end--;
207 if (n)
208 return n;
209 key->host_start--;
210 n = splay_tree_lookup (mem_map, key);
211 key->host_start++;
212 if (n)
213 return n;
214 return splay_tree_lookup (mem_map, key);
217 static inline reverse_splay_tree_key
218 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
220 return reverse_splay_tree_lookup (mem_map_rev, key);
223 static inline splay_tree_key
224 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
226 if (key->host_start != key->host_end)
227 return splay_tree_lookup (mem_map, key);
229 key->host_end++;
230 splay_tree_key n = splay_tree_lookup (mem_map, key);
231 key->host_end--;
232 return n;
235 static inline void
236 gomp_device_copy (struct gomp_device_descr *devicep,
237 bool (*copy_func) (int, void *, const void *, size_t),
238 const char *dst, void *dstaddr,
239 const char *src, const void *srcaddr,
240 size_t size)
242 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
244 gomp_mutex_unlock (&devicep->lock);
245 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
246 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
250 static inline void
251 goacc_device_copy_async (struct gomp_device_descr *devicep,
252 bool (*copy_func) (int, void *, const void *, size_t,
253 struct goacc_asyncqueue *),
254 const char *dst, void *dstaddr,
255 const char *src, const void *srcaddr,
256 const void *srcaddr_orig,
257 size_t size, struct goacc_asyncqueue *aq)
259 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
261 gomp_mutex_unlock (&devicep->lock);
262 if (srcaddr_orig && srcaddr_orig != srcaddr)
263 gomp_fatal ("Copying of %s object [%p..%p)"
264 " via buffer %s object [%p..%p)"
265 " to %s object [%p..%p) failed",
266 src, srcaddr_orig, srcaddr_orig + size,
267 src, srcaddr, srcaddr + size,
268 dst, dstaddr, dstaddr + size);
269 else
270 gomp_fatal ("Copying of %s object [%p..%p)"
271 " to %s object [%p..%p) failed",
272 src, srcaddr, srcaddr + size,
273 dst, dstaddr, dstaddr + size);
277 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
278 host to device memory transfers. */
280 struct gomp_coalesce_chunk
282 /* The starting and ending point of a coalesced chunk of memory. */
283 size_t start, end;
286 struct gomp_coalesce_buf
288 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
289 it will be copied to the device. */
290 void *buf;
291 struct target_mem_desc *tgt;
292 /* Array with offsets, chunks[i].start is the starting offset and
293 chunks[i].end ending offset relative to tgt->tgt_start device address
294 of chunks which are to be copied to buf and later copied to device. */
295 struct gomp_coalesce_chunk *chunks;
296 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
297 be performed. */
298 long chunk_cnt;
299 /* During construction of chunks array, how many memory regions are within
300 the last chunk. If there is just one memory region for a chunk, we copy
301 it directly to device rather than going through buf. */
302 long use_cnt;
305 /* Maximum size of memory region considered for coalescing. Larger copies
306 are performed directly. */
307 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
309 /* Maximum size of a gap in between regions to consider them being copied
310 within the same chunk. All the device offsets considered are within
311 newly allocated device memory, so it isn't fatal if we copy some padding
312 in between from host to device. The gaps come either from alignment
313 padding or from memory regions which are not supposed to be copied from
314 host to device (e.g. map(alloc:), map(from:) etc.). */
315 #define MAX_COALESCE_BUF_GAP (4 * 1024)
317 /* Add region with device tgt_start relative offset and length to CBUF.
319 This must not be used for asynchronous copies, because the host data might
320 not be computed yet (by an earlier asynchronous compute region, for
321 example). The exception is for EPHEMERAL data, that we know is available
322 already "by construction". */
324 static inline void
325 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
327 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
328 return;
329 if (cbuf->chunk_cnt)
331 if (cbuf->chunk_cnt < 0)
332 return;
333 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
335 cbuf->chunk_cnt = -1;
336 return;
338 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
340 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
341 cbuf->use_cnt++;
342 return;
344 /* If the last chunk is only used by one mapping, discard it,
345 as it will be one host to device copy anyway and
346 memcpying it around will only waste cycles. */
347 if (cbuf->use_cnt == 1)
348 cbuf->chunk_cnt--;
350 cbuf->chunks[cbuf->chunk_cnt].start = start;
351 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
352 cbuf->chunk_cnt++;
353 cbuf->use_cnt = 1;
356 /* Return true for mapping kinds which need to copy data from the
357 host to device for regions that weren't previously mapped. */
359 static inline bool
360 gomp_to_device_kind_p (int kind)
362 switch (kind)
364 case GOMP_MAP_ALLOC:
365 case GOMP_MAP_FROM:
366 case GOMP_MAP_FORCE_ALLOC:
367 case GOMP_MAP_FORCE_FROM:
368 case GOMP_MAP_ALWAYS_FROM:
369 case GOMP_MAP_ALWAYS_PRESENT_FROM:
370 case GOMP_MAP_FORCE_PRESENT:
371 return false;
372 default:
373 return true;
377 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
378 non-NULL), when the source data is stack or may otherwise be deallocated
379 before the asynchronous copy takes place, EPHEMERAL must be passed as
380 TRUE. */
382 attribute_hidden void
383 gomp_copy_host2dev (struct gomp_device_descr *devicep,
384 struct goacc_asyncqueue *aq,
385 void *d, const void *h, size_t sz,
386 bool ephemeral, struct gomp_coalesce_buf *cbuf)
388 if (cbuf)
390 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
391 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
393 long first = 0;
394 long last = cbuf->chunk_cnt - 1;
395 while (first <= last)
397 long middle = (first + last) >> 1;
398 if (cbuf->chunks[middle].end <= doff)
399 first = middle + 1;
400 else if (cbuf->chunks[middle].start <= doff)
402 if (doff + sz > cbuf->chunks[middle].end)
404 gomp_mutex_unlock (&devicep->lock);
405 gomp_fatal ("internal libgomp cbuf error");
408 /* In an asynchronous context, verify that CBUF isn't used
409 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
410 if (__builtin_expect (aq != NULL, 0))
411 assert (ephemeral);
413 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
414 h, sz);
415 return;
417 else
418 last = middle - 1;
423 if (__builtin_expect (aq != NULL, 0))
425 void *h_buf = (void *) h;
426 if (ephemeral)
428 /* We're queueing up an asynchronous copy from data that may
429 disappear before the transfer takes place (i.e. because it is a
430 stack local in a function that is no longer executing). As we've
431 not been able to use CBUF, make a copy of the data into a
432 temporary buffer. */
433 h_buf = gomp_malloc (sz);
434 memcpy (h_buf, h, sz);
436 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
437 "dev", d, "host", h_buf, h, sz, aq);
438 if (ephemeral)
439 /* Free once the transfer has completed. */
440 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
442 else
443 gomp_device_copy (devicep, devicep->host2dev_func,
444 "dev", d, "host", h, sz);
447 attribute_hidden void
448 gomp_copy_dev2host (struct gomp_device_descr *devicep,
449 struct goacc_asyncqueue *aq,
450 void *h, const void *d, size_t sz)
452 if (__builtin_expect (aq != NULL, 0))
453 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
454 "host", h, "dev", d, NULL, sz, aq);
455 else
456 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
459 static void
460 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
462 if (!devicep->free_func (devicep->target_id, devptr))
464 gomp_mutex_unlock (&devicep->lock);
465 gomp_fatal ("error in freeing device memory block at %p", devptr);
469 /* Increment reference count of a splay_tree_key region K by 1.
470 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
471 increment the value if refcount is not yet contained in the set (used for
472 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
473 once for each construct). */
475 static inline void
476 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
478 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
479 return;
481 uintptr_t *refcount_ptr = &k->refcount;
483 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
484 refcount_ptr = &k->structelem_refcount;
485 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
486 refcount_ptr = k->structelem_refcount_ptr;
488 if (refcount_set)
490 if (htab_find (*refcount_set, refcount_ptr))
491 return;
492 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
493 *slot = refcount_ptr;
496 *refcount_ptr += 1;
497 return;
500 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
501 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
502 track already seen refcounts, and only adjust the value if refcount is not
503 yet contained in the set (like gomp_increment_refcount).
505 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
506 it is already zero and we know we decremented it earlier. This signals that
507 associated maps should be copied back to host.
509 *DO_REMOVE is set to true when we this is the first handling of this refcount
510 and we are setting it to zero. This signals a removal of this key from the
511 splay-tree map.
513 Copy and removal are separated due to cases like handling of structure
514 elements, e.g. each map of a structure element representing a possible copy
515 out of a structure field has to be handled individually, but we only signal
516 removal for one (the first encountered) sibing map. */
518 static inline void
519 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
520 bool *do_copy, bool *do_remove)
522 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
524 *do_copy = *do_remove = false;
525 return;
528 uintptr_t *refcount_ptr = &k->refcount;
530 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
531 refcount_ptr = &k->structelem_refcount;
532 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
533 refcount_ptr = k->structelem_refcount_ptr;
535 bool new_encountered_refcount;
536 bool set_to_zero = false;
537 bool is_zero = false;
539 uintptr_t orig_refcount = *refcount_ptr;
541 if (refcount_set)
543 if (htab_find (*refcount_set, refcount_ptr))
545 new_encountered_refcount = false;
546 goto end;
549 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
550 *slot = refcount_ptr;
551 new_encountered_refcount = true;
553 else
554 /* If no refcount_set being used, assume all keys are being decremented
555 for the first time. */
556 new_encountered_refcount = true;
558 if (delete_p)
559 *refcount_ptr = 0;
560 else if (*refcount_ptr > 0)
561 *refcount_ptr -= 1;
563 end:
564 if (*refcount_ptr == 0)
566 if (orig_refcount > 0)
567 set_to_zero = true;
569 is_zero = true;
572 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
573 *do_remove = (new_encountered_refcount && set_to_zero);
576 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
577 gomp_map_0len_lookup found oldn for newn.
578 Helper function of gomp_map_vars. */
580 static inline void
581 gomp_map_vars_existing (struct gomp_device_descr *devicep,
582 struct goacc_asyncqueue *aq, splay_tree_key oldn,
583 splay_tree_key newn, struct target_var_desc *tgt_var,
584 unsigned char kind, bool always_to_flag, bool implicit,
585 struct gomp_coalesce_buf *cbuf,
586 htab_t *refcount_set)
588 assert (kind != GOMP_MAP_ATTACH
589 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
591 tgt_var->key = oldn;
592 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
593 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
594 tgt_var->is_attach = false;
595 tgt_var->offset = newn->host_start - oldn->host_start;
597 /* For implicit maps, old contained in new is valid. */
598 bool implicit_subset = (implicit
599 && newn->host_start <= oldn->host_start
600 && oldn->host_end <= newn->host_end);
601 if (implicit_subset)
602 tgt_var->length = oldn->host_end - oldn->host_start;
603 else
604 tgt_var->length = newn->host_end - newn->host_start;
606 if (GOMP_MAP_FORCE_P (kind)
607 /* For implicit maps, old contained in new is valid. */
608 || !(implicit_subset
609 /* Otherwise, new contained inside old is considered valid. */
610 || (oldn->host_start <= newn->host_start
611 && newn->host_end <= oldn->host_end)))
613 gomp_mutex_unlock (&devicep->lock);
614 gomp_fatal ("Trying to map into device [%p..%p) object when "
615 "[%p..%p) is already mapped",
616 (void *) newn->host_start, (void *) newn->host_end,
617 (void *) oldn->host_start, (void *) oldn->host_end);
620 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
622 /* Implicit + always should not happen. If this does occur, below
623 address/length adjustment is a TODO. */
624 assert (!implicit_subset);
626 if (oldn->aux && oldn->aux->attach_count)
628 /* We have to be careful not to overwrite still attached pointers
629 during the copyback to host. */
630 uintptr_t addr = newn->host_start;
631 while (addr < newn->host_end)
633 size_t i = (addr - oldn->host_start) / sizeof (void *);
634 if (oldn->aux->attach_count[i] == 0)
635 gomp_copy_host2dev (devicep, aq,
636 (void *) (oldn->tgt->tgt_start
637 + oldn->tgt_offset
638 + addr - oldn->host_start),
639 (void *) addr,
640 sizeof (void *), false, cbuf);
641 addr += sizeof (void *);
644 else
645 gomp_copy_host2dev (devicep, aq,
646 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
647 + newn->host_start - oldn->host_start),
648 (void *) newn->host_start,
649 newn->host_end - newn->host_start, false, cbuf);
652 gomp_increment_refcount (oldn, refcount_set);
655 static int
656 get_kind (bool short_mapkind, void *kinds, int idx)
658 if (!short_mapkind)
659 return ((unsigned char *) kinds)[idx];
661 int val = ((unsigned short *) kinds)[idx];
662 if (GOMP_MAP_IMPLICIT_P (val))
663 val &= ~GOMP_MAP_IMPLICIT;
664 return val;
668 static bool
669 get_implicit (bool short_mapkind, void *kinds, int idx)
671 if (!short_mapkind)
672 return false;
674 int val = ((unsigned short *) kinds)[idx];
675 return GOMP_MAP_IMPLICIT_P (val);
678 static void
679 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
680 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
681 struct gomp_coalesce_buf *cbuf,
682 bool allow_zero_length_array_sections)
684 struct gomp_device_descr *devicep = tgt->device_descr;
685 struct splay_tree_s *mem_map = &devicep->mem_map;
686 struct splay_tree_key_s cur_node;
688 cur_node.host_start = host_ptr;
689 if (cur_node.host_start == (uintptr_t) NULL)
691 cur_node.tgt_offset = (uintptr_t) NULL;
692 gomp_copy_host2dev (devicep, aq,
693 (void *) (tgt->tgt_start + target_offset),
694 (void *) &cur_node.tgt_offset, sizeof (void *),
695 true, cbuf);
696 return;
698 /* Add bias to the pointer value. */
699 cur_node.host_start += bias;
700 cur_node.host_end = cur_node.host_start;
701 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
702 if (n == NULL)
704 if (allow_zero_length_array_sections)
705 cur_node.tgt_offset = 0;
706 else
708 gomp_mutex_unlock (&devicep->lock);
709 gomp_fatal ("Pointer target of array section wasn't mapped");
712 else
714 cur_node.host_start -= n->host_start;
715 cur_node.tgt_offset
716 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
717 /* At this point tgt_offset is target address of the
718 array section. Now subtract bias to get what we want
719 to initialize the pointer with. */
720 cur_node.tgt_offset -= bias;
722 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
723 (void *) &cur_node.tgt_offset, sizeof (void *),
724 true, cbuf);
727 static void
728 gomp_map_fields_existing (struct target_mem_desc *tgt,
729 struct goacc_asyncqueue *aq, splay_tree_key n,
730 size_t first, size_t i, void **hostaddrs,
731 size_t *sizes, void *kinds,
732 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
734 struct gomp_device_descr *devicep = tgt->device_descr;
735 struct splay_tree_s *mem_map = &devicep->mem_map;
736 struct splay_tree_key_s cur_node;
737 int kind;
738 bool implicit;
739 const bool short_mapkind = true;
740 const int typemask = short_mapkind ? 0xff : 0x7;
742 cur_node.host_start = (uintptr_t) hostaddrs[i];
743 cur_node.host_end = cur_node.host_start + sizes[i];
744 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
745 kind = get_kind (short_mapkind, kinds, i);
746 implicit = get_implicit (short_mapkind, kinds, i);
747 if (n2
748 && n2->tgt == n->tgt
749 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
751 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
752 kind & typemask, false, implicit, cbuf,
753 refcount_set);
754 return;
756 if (sizes[i] == 0)
758 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
760 cur_node.host_start--;
761 n2 = splay_tree_lookup (mem_map, &cur_node);
762 cur_node.host_start++;
763 if (n2
764 && n2->tgt == n->tgt
765 && n2->host_start - n->host_start
766 == n2->tgt_offset - n->tgt_offset)
768 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
769 kind & typemask, false, implicit, cbuf,
770 refcount_set);
771 return;
774 cur_node.host_end++;
775 n2 = splay_tree_lookup (mem_map, &cur_node);
776 cur_node.host_end--;
777 if (n2
778 && n2->tgt == n->tgt
779 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
781 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
782 kind & typemask, false, implicit, cbuf,
783 refcount_set);
784 return;
787 gomp_mutex_unlock (&devicep->lock);
788 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
789 "other mapped elements from the same structure weren't mapped "
790 "together with it", (void *) cur_node.host_start,
791 (void *) cur_node.host_end);
794 attribute_hidden void
795 gomp_attach_pointer (struct gomp_device_descr *devicep,
796 struct goacc_asyncqueue *aq, splay_tree mem_map,
797 splay_tree_key n, uintptr_t attach_to, size_t bias,
798 struct gomp_coalesce_buf *cbufp,
799 bool allow_zero_length_array_sections)
801 struct splay_tree_key_s s;
802 size_t size, idx;
804 if (n == NULL)
806 gomp_mutex_unlock (&devicep->lock);
807 gomp_fatal ("enclosing struct not mapped for attach");
810 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
811 /* We might have a pointer in a packed struct: however we cannot have more
812 than one such pointer in each pointer-sized portion of the struct, so
813 this is safe. */
814 idx = (attach_to - n->host_start) / sizeof (void *);
816 if (!n->aux)
817 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
819 if (!n->aux->attach_count)
820 n->aux->attach_count
821 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
823 if (n->aux->attach_count[idx] < UINTPTR_MAX)
824 n->aux->attach_count[idx]++;
825 else
827 gomp_mutex_unlock (&devicep->lock);
828 gomp_fatal ("attach count overflow");
831 if (n->aux->attach_count[idx] == 1)
833 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
834 - n->host_start;
835 uintptr_t target = (uintptr_t) *(void **) attach_to;
836 splay_tree_key tn;
837 uintptr_t data;
839 if ((void *) target == NULL)
841 gomp_mutex_unlock (&devicep->lock);
842 gomp_fatal ("attempt to attach null pointer");
845 s.host_start = target + bias;
846 s.host_end = s.host_start + 1;
847 tn = splay_tree_lookup (mem_map, &s);
849 if (!tn)
851 if (allow_zero_length_array_sections)
852 /* When allowing attachment to zero-length array sections, we
853 allow attaching to NULL pointers when the target region is not
854 mapped. */
855 data = 0;
856 else
858 gomp_mutex_unlock (&devicep->lock);
859 gomp_fatal ("pointer target not mapped for attach");
862 else
863 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
865 gomp_debug (1,
866 "%s: attaching host %p, target %p (struct base %p) to %p\n",
867 __FUNCTION__, (void *) attach_to, (void *) devptr,
868 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
870 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
871 sizeof (void *), true, cbufp);
873 else
874 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
875 (void *) attach_to, (int) n->aux->attach_count[idx]);
878 attribute_hidden void
879 gomp_detach_pointer (struct gomp_device_descr *devicep,
880 struct goacc_asyncqueue *aq, splay_tree_key n,
881 uintptr_t detach_from, bool finalize,
882 struct gomp_coalesce_buf *cbufp)
884 size_t idx;
886 if (n == NULL)
888 gomp_mutex_unlock (&devicep->lock);
889 gomp_fatal ("enclosing struct not mapped for detach");
892 idx = (detach_from - n->host_start) / sizeof (void *);
894 if (!n->aux || !n->aux->attach_count)
896 gomp_mutex_unlock (&devicep->lock);
897 gomp_fatal ("no attachment counters for struct");
900 if (finalize)
901 n->aux->attach_count[idx] = 1;
903 if (n->aux->attach_count[idx] == 0)
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("attach count underflow");
908 else
909 n->aux->attach_count[idx]--;
911 if (n->aux->attach_count[idx] == 0)
913 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
914 - n->host_start;
915 uintptr_t target = (uintptr_t) *(void **) detach_from;
917 gomp_debug (1,
918 "%s: detaching host %p, target %p (struct base %p) to %p\n",
919 __FUNCTION__, (void *) detach_from, (void *) devptr,
920 (void *) (n->tgt->tgt_start + n->tgt_offset),
921 (void *) target);
923 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
924 sizeof (void *), true, cbufp);
926 else
927 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
928 (void *) detach_from, (int) n->aux->attach_count[idx]);
931 attribute_hidden uintptr_t
932 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
934 if (tgt->list[i].key != NULL)
935 return tgt->list[i].key->tgt->tgt_start
936 + tgt->list[i].key->tgt_offset
937 + tgt->list[i].offset;
939 switch (tgt->list[i].offset)
941 case OFFSET_INLINED:
942 return (uintptr_t) hostaddrs[i];
944 case OFFSET_POINTER:
945 return 0;
947 case OFFSET_STRUCT:
948 return tgt->list[i + 1].key->tgt->tgt_start
949 + tgt->list[i + 1].key->tgt_offset
950 + tgt->list[i + 1].offset
951 + (uintptr_t) hostaddrs[i]
952 - (uintptr_t) hostaddrs[i + 1];
954 default:
955 return tgt->tgt_start + tgt->list[i].offset;
959 static inline __attribute__((always_inline)) struct target_mem_desc *
960 gomp_map_vars_internal (struct gomp_device_descr *devicep,
961 struct goacc_asyncqueue *aq, size_t mapnum,
962 void **hostaddrs, void **devaddrs, size_t *sizes,
963 void *kinds, bool short_mapkind,
964 htab_t *refcount_set,
965 enum gomp_map_vars_kind pragma_kind)
967 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
968 bool has_firstprivate = false;
969 bool has_always_ptrset = false;
970 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
971 const int rshift = short_mapkind ? 8 : 3;
972 const int typemask = short_mapkind ? 0xff : 0x7;
973 struct splay_tree_s *mem_map = &devicep->mem_map;
974 struct splay_tree_key_s cur_node;
975 struct target_mem_desc *tgt
976 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
977 tgt->list_count = mapnum;
978 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
979 tgt->device_descr = devicep;
980 tgt->prev = NULL;
981 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
983 if (mapnum == 0)
985 tgt->tgt_start = 0;
986 tgt->tgt_end = 0;
987 return tgt;
990 tgt_align = sizeof (void *);
991 tgt_size = 0;
992 cbuf.chunks = NULL;
993 cbuf.chunk_cnt = -1;
994 cbuf.use_cnt = 0;
995 cbuf.buf = NULL;
996 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
998 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
999 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1000 cbuf.chunk_cnt = 0;
1002 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1004 size_t align = 4 * sizeof (void *);
1005 tgt_align = align;
1006 tgt_size = mapnum * sizeof (void *);
1007 cbuf.chunk_cnt = 1;
1008 cbuf.use_cnt = 1 + (mapnum > 1);
1009 cbuf.chunks[0].start = 0;
1010 cbuf.chunks[0].end = tgt_size;
1013 gomp_mutex_lock (&devicep->lock);
1014 if (devicep->state == GOMP_DEVICE_FINALIZED)
1016 gomp_mutex_unlock (&devicep->lock);
1017 free (tgt);
1018 return NULL;
1021 for (i = 0; i < mapnum; i++)
1023 int kind = get_kind (short_mapkind, kinds, i);
1024 bool implicit = get_implicit (short_mapkind, kinds, i);
1025 if (hostaddrs[i] == NULL
1026 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1028 tgt->list[i].key = NULL;
1029 tgt->list[i].offset = OFFSET_INLINED;
1030 continue;
1032 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1033 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1035 tgt->list[i].key = NULL;
1036 if (!not_found_cnt)
1038 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1039 on a separate construct prior to using use_device_{addr,ptr}.
1040 In OpenMP 5.0, map directives need to be ordered by the
1041 middle-end before the use_device_* clauses. If
1042 !not_found_cnt, all mappings requested (if any) are already
1043 mapped, so use_device_{addr,ptr} can be resolved right away.
1044 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1045 now but would succeed after performing the mappings in the
1046 following loop. We can't defer this always to the second
1047 loop, because it is not even invoked when !not_found_cnt
1048 after the first loop. */
1049 cur_node.host_start = (uintptr_t) hostaddrs[i];
1050 cur_node.host_end = cur_node.host_start;
1051 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1052 if (n != NULL)
1054 cur_node.host_start -= n->host_start;
1055 hostaddrs[i]
1056 = (void *) (n->tgt->tgt_start + n->tgt_offset
1057 + cur_node.host_start);
1059 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1061 gomp_mutex_unlock (&devicep->lock);
1062 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1064 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1065 /* If not present, continue using the host address. */
1067 else
1068 __builtin_unreachable ();
1069 tgt->list[i].offset = OFFSET_INLINED;
1071 else
1072 tgt->list[i].offset = 0;
1073 continue;
1075 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1077 size_t first = i + 1;
1078 size_t last = i + sizes[i];
1079 cur_node.host_start = (uintptr_t) hostaddrs[i];
1080 cur_node.host_end = (uintptr_t) hostaddrs[last]
1081 + sizes[last];
1082 tgt->list[i].key = NULL;
1083 tgt->list[i].offset = OFFSET_STRUCT;
1084 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1085 if (n == NULL)
1087 size_t align = (size_t) 1 << (kind >> rshift);
1088 if (tgt_align < align)
1089 tgt_align = align;
1090 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1091 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1092 tgt_size += cur_node.host_end - cur_node.host_start;
1093 not_found_cnt += last - i;
1094 for (i = first; i <= last; i++)
1096 tgt->list[i].key = NULL;
1097 if (!aq
1098 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1099 & typemask))
1100 gomp_coalesce_buf_add (&cbuf,
1101 tgt_size - cur_node.host_end
1102 + (uintptr_t) hostaddrs[i],
1103 sizes[i]);
1105 i--;
1106 continue;
1108 for (i = first; i <= last; i++)
1109 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1110 sizes, kinds, NULL, refcount_set);
1111 i--;
1112 continue;
1114 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1116 tgt->list[i].key = NULL;
1117 tgt->list[i].offset = OFFSET_POINTER;
1118 has_firstprivate = true;
1119 continue;
1121 else if ((kind & typemask) == GOMP_MAP_ATTACH
1122 || ((kind & typemask)
1123 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1125 tgt->list[i].key = NULL;
1126 has_firstprivate = true;
1127 continue;
1129 cur_node.host_start = (uintptr_t) hostaddrs[i];
1130 if (!GOMP_MAP_POINTER_P (kind & typemask))
1131 cur_node.host_end = cur_node.host_start + sizes[i];
1132 else
1133 cur_node.host_end = cur_node.host_start + sizeof (void *);
1134 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1136 tgt->list[i].key = NULL;
1138 size_t align = (size_t) 1 << (kind >> rshift);
1139 if (tgt_align < align)
1140 tgt_align = align;
1141 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1142 if (!aq)
1143 gomp_coalesce_buf_add (&cbuf, tgt_size,
1144 cur_node.host_end - cur_node.host_start);
1145 tgt_size += cur_node.host_end - cur_node.host_start;
1146 has_firstprivate = true;
1147 continue;
1149 splay_tree_key n;
1150 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1152 n = gomp_map_0len_lookup (mem_map, &cur_node);
1153 if (!n)
1155 tgt->list[i].key = NULL;
1156 tgt->list[i].offset = OFFSET_INLINED;
1157 continue;
1160 else
1161 n = splay_tree_lookup (mem_map, &cur_node);
1162 if (n && n->refcount != REFCOUNT_LINK)
1164 int always_to_cnt = 0;
1165 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1167 bool has_nullptr = false;
1168 size_t j;
1169 for (j = 0; j < n->tgt->list_count; j++)
1170 if (n->tgt->list[j].key == n)
1172 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1173 break;
1175 if (n->tgt->list_count == 0)
1177 /* 'declare target'; assume has_nullptr; it could also be
1178 statically assigned pointer, but that it should be to
1179 the equivalent variable on the host. */
1180 assert (n->refcount == REFCOUNT_INFINITY);
1181 has_nullptr = true;
1183 else
1184 assert (j < n->tgt->list_count);
1185 /* Re-map the data if there is an 'always' modifier or if it a
1186 null pointer was there and non a nonnull has been found; that
1187 permits transparent re-mapping for Fortran array descriptors
1188 which were previously mapped unallocated. */
1189 for (j = i + 1; j < mapnum; j++)
1191 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1192 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1193 && (!has_nullptr
1194 || !GOMP_MAP_POINTER_P (ptr_kind)
1195 || *(void **) hostaddrs[j] == NULL))
1196 break;
1197 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1198 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1199 > cur_node.host_end))
1200 break;
1201 else
1203 has_always_ptrset = true;
1204 ++always_to_cnt;
1208 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1209 kind & typemask, always_to_cnt > 0, implicit,
1210 NULL, refcount_set);
1211 i += always_to_cnt;
1213 else
1215 tgt->list[i].key = NULL;
1217 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1219 /* Not present, hence, skip entry - including its MAP_POINTER,
1220 when existing. */
1221 tgt->list[i].offset = OFFSET_INLINED;
1222 if (i + 1 < mapnum
1223 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1224 == GOMP_MAP_POINTER))
1226 ++i;
1227 tgt->list[i].key = NULL;
1228 tgt->list[i].offset = 0;
1230 continue;
1232 size_t align = (size_t) 1 << (kind >> rshift);
1233 not_found_cnt++;
1234 if (tgt_align < align)
1235 tgt_align = align;
1236 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1237 if (!aq
1238 && gomp_to_device_kind_p (kind & typemask))
1239 gomp_coalesce_buf_add (&cbuf, tgt_size,
1240 cur_node.host_end - cur_node.host_start);
1241 tgt_size += cur_node.host_end - cur_node.host_start;
1242 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1244 size_t j;
1245 int kind;
1246 for (j = i + 1; j < mapnum; j++)
1247 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1248 kinds, j)) & typemask))
1249 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1250 break;
1251 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1252 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1253 > cur_node.host_end))
1254 break;
1255 else
1257 tgt->list[j].key = NULL;
1258 i++;
1264 if (devaddrs)
1266 if (mapnum != 1)
1268 gomp_mutex_unlock (&devicep->lock);
1269 gomp_fatal ("unexpected aggregation");
1271 tgt->to_free = devaddrs[0];
1272 tgt->tgt_start = (uintptr_t) tgt->to_free;
1273 tgt->tgt_end = tgt->tgt_start + sizes[0];
1275 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1277 /* Allocate tgt_align aligned tgt_size block of memory. */
1278 /* FIXME: Perhaps change interface to allocate properly aligned
1279 memory. */
1280 tgt->to_free = devicep->alloc_func (devicep->target_id,
1281 tgt_size + tgt_align - 1);
1282 if (!tgt->to_free)
1284 gomp_mutex_unlock (&devicep->lock);
1285 gomp_fatal ("device memory allocation fail");
1288 tgt->tgt_start = (uintptr_t) tgt->to_free;
1289 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1290 tgt->tgt_end = tgt->tgt_start + tgt_size;
1292 if (cbuf.use_cnt == 1)
1293 cbuf.chunk_cnt--;
1294 if (cbuf.chunk_cnt > 0)
1296 cbuf.buf
1297 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1298 if (cbuf.buf)
1300 cbuf.tgt = tgt;
1301 cbufp = &cbuf;
1305 else
1307 tgt->to_free = NULL;
1308 tgt->tgt_start = 0;
1309 tgt->tgt_end = 0;
1312 tgt_size = 0;
1313 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1314 tgt_size = mapnum * sizeof (void *);
1316 tgt->array = NULL;
1317 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1319 if (not_found_cnt)
1320 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1321 splay_tree_node array = tgt->array;
1322 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1323 uintptr_t field_tgt_base = 0;
1324 splay_tree_key field_tgt_structelem_first = NULL;
1326 for (i = 0; i < mapnum; i++)
1327 if (has_always_ptrset
1328 && tgt->list[i].key
1329 && (get_kind (short_mapkind, kinds, i) & typemask)
1330 == GOMP_MAP_TO_PSET)
1332 splay_tree_key k = tgt->list[i].key;
1333 bool has_nullptr = false;
1334 size_t j;
1335 for (j = 0; j < k->tgt->list_count; j++)
1336 if (k->tgt->list[j].key == k)
1338 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1339 break;
1341 if (k->tgt->list_count == 0)
1342 has_nullptr = true;
1343 else
1344 assert (j < k->tgt->list_count);
1346 tgt->list[i].has_null_ptr_assoc = false;
1347 for (j = i + 1; j < mapnum; j++)
1349 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1350 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1351 && (!has_nullptr
1352 || !GOMP_MAP_POINTER_P (ptr_kind)
1353 || *(void **) hostaddrs[j] == NULL))
1354 break;
1355 else if ((uintptr_t) hostaddrs[j] < k->host_start
1356 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1357 > k->host_end))
1358 break;
1359 else
1361 if (*(void **) hostaddrs[j] == NULL)
1362 tgt->list[i].has_null_ptr_assoc = true;
1363 tgt->list[j].key = k;
1364 tgt->list[j].copy_from = false;
1365 tgt->list[j].always_copy_from = false;
1366 tgt->list[j].is_attach = false;
1367 gomp_increment_refcount (k, refcount_set);
1368 gomp_map_pointer (k->tgt, aq,
1369 (uintptr_t) *(void **) hostaddrs[j],
1370 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1371 - k->host_start),
1372 sizes[j], cbufp, false);
1375 i = j - 1;
1377 else if (tgt->list[i].key == NULL)
1379 int kind = get_kind (short_mapkind, kinds, i);
1380 bool implicit = get_implicit (short_mapkind, kinds, i);
1381 if (hostaddrs[i] == NULL)
1382 continue;
1383 switch (kind & typemask)
1385 size_t align, len, first, last;
1386 splay_tree_key n;
1387 case GOMP_MAP_FIRSTPRIVATE:
1388 align = (size_t) 1 << (kind >> rshift);
1389 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1390 tgt->list[i].offset = tgt_size;
1391 len = sizes[i];
1392 gomp_copy_host2dev (devicep, aq,
1393 (void *) (tgt->tgt_start + tgt_size),
1394 (void *) hostaddrs[i], len, false, cbufp);
1395 /* Save device address in hostaddr to permit latter availablity
1396 when doing a deep-firstprivate with pointer attach. */
1397 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1398 tgt_size += len;
1400 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1401 firstprivate to hostaddrs[i+1], which is assumed to contain a
1402 device address. */
1403 if (i + 1 < mapnum
1404 && (GOMP_MAP_ATTACH
1405 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1407 uintptr_t target = (uintptr_t) hostaddrs[i];
1408 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1409 /* Per
1410 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1411 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1412 this probably needs revision for 'aq' usage. */
1413 assert (!aq);
1414 gomp_copy_host2dev (devicep, aq, devptr, &target,
1415 sizeof (void *), false, cbufp);
1416 ++i;
1418 continue;
1419 case GOMP_MAP_FIRSTPRIVATE_INT:
1420 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1421 continue;
1422 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1423 /* The OpenACC 'host_data' construct only allows 'use_device'
1424 "mapping" clauses, so in the first loop, 'not_found_cnt'
1425 must always have been zero, so all OpenACC 'use_device'
1426 clauses have already been handled. (We can only easily test
1427 'use_device' with 'if_present' clause here.) */
1428 assert (tgt->list[i].offset == OFFSET_INLINED);
1429 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1430 code conceptually simple, similar to the first loop. */
1431 case GOMP_MAP_USE_DEVICE_PTR:
1432 if (tgt->list[i].offset == 0)
1434 cur_node.host_start = (uintptr_t) hostaddrs[i];
1435 cur_node.host_end = cur_node.host_start;
1436 n = gomp_map_lookup (mem_map, &cur_node);
1437 if (n != NULL)
1439 cur_node.host_start -= n->host_start;
1440 hostaddrs[i]
1441 = (void *) (n->tgt->tgt_start + n->tgt_offset
1442 + cur_node.host_start);
1444 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1446 gomp_mutex_unlock (&devicep->lock);
1447 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1449 else if ((kind & typemask)
1450 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1451 /* If not present, continue using the host address. */
1453 else
1454 __builtin_unreachable ();
1455 tgt->list[i].offset = OFFSET_INLINED;
1457 continue;
1458 case GOMP_MAP_STRUCT:
1459 first = i + 1;
1460 last = i + sizes[i];
1461 cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 cur_node.host_end = (uintptr_t) hostaddrs[last]
1463 + sizes[last];
1464 if (tgt->list[first].key != NULL)
1465 continue;
1466 n = splay_tree_lookup (mem_map, &cur_node);
1467 if (n == NULL)
1469 size_t align = (size_t) 1 << (kind >> rshift);
1470 tgt_size -= (uintptr_t) hostaddrs[first]
1471 - (uintptr_t) hostaddrs[i];
1472 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1473 tgt_size += (uintptr_t) hostaddrs[first]
1474 - (uintptr_t) hostaddrs[i];
1475 field_tgt_base = (uintptr_t) hostaddrs[first];
1476 field_tgt_offset = tgt_size;
1477 field_tgt_clear = last;
1478 field_tgt_structelem_first = NULL;
1479 tgt_size += cur_node.host_end
1480 - (uintptr_t) hostaddrs[first];
1481 continue;
1483 for (i = first; i <= last; i++)
1484 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1485 sizes, kinds, cbufp, refcount_set);
1486 i--;
1487 continue;
1488 case GOMP_MAP_ALWAYS_POINTER:
1489 cur_node.host_start = (uintptr_t) hostaddrs[i];
1490 cur_node.host_end = cur_node.host_start + sizeof (void *);
1491 n = splay_tree_lookup (mem_map, &cur_node);
1492 if (n == NULL
1493 || n->host_start > cur_node.host_start
1494 || n->host_end < cur_node.host_end)
1496 gomp_mutex_unlock (&devicep->lock);
1497 gomp_fatal ("always pointer not mapped");
1499 if (i > 0
1500 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1501 != GOMP_MAP_ALWAYS_POINTER))
1502 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1503 if (cur_node.tgt_offset)
1504 cur_node.tgt_offset -= sizes[i];
1505 gomp_copy_host2dev (devicep, aq,
1506 (void *) (n->tgt->tgt_start
1507 + n->tgt_offset
1508 + cur_node.host_start
1509 - n->host_start),
1510 (void *) &cur_node.tgt_offset,
1511 sizeof (void *), true, cbufp);
1512 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1513 + cur_node.host_start - n->host_start;
1514 continue;
1515 case GOMP_MAP_IF_PRESENT:
1516 /* Not present - otherwise handled above. Skip over its
1517 MAP_POINTER as well. */
1518 if (i + 1 < mapnum
1519 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1520 == GOMP_MAP_POINTER))
1521 ++i;
1522 continue;
1523 case GOMP_MAP_ATTACH:
1524 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1526 cur_node.host_start = (uintptr_t) hostaddrs[i];
1527 cur_node.host_end = cur_node.host_start + sizeof (void *);
1528 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1529 if (n != NULL)
1531 tgt->list[i].key = n;
1532 tgt->list[i].offset = cur_node.host_start - n->host_start;
1533 tgt->list[i].length = n->host_end - n->host_start;
1534 tgt->list[i].copy_from = false;
1535 tgt->list[i].always_copy_from = false;
1536 tgt->list[i].is_attach = true;
1537 /* OpenACC 'attach'/'detach' doesn't affect
1538 structured/dynamic reference counts ('n->refcount',
1539 'n->dynamic_refcount'). */
1541 bool zlas
1542 = ((kind & typemask)
1543 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1544 gomp_attach_pointer (devicep, aq, mem_map, n,
1545 (uintptr_t) hostaddrs[i], sizes[i],
1546 cbufp, zlas);
1548 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1550 gomp_mutex_unlock (&devicep->lock);
1551 gomp_fatal ("outer struct not mapped for attach");
1553 continue;
1555 default:
1556 break;
1558 splay_tree_key k = &array->key;
1559 k->host_start = (uintptr_t) hostaddrs[i];
1560 if (!GOMP_MAP_POINTER_P (kind & typemask))
1561 k->host_end = k->host_start + sizes[i];
1562 else
1563 k->host_end = k->host_start + sizeof (void *);
1564 splay_tree_key n = splay_tree_lookup (mem_map, k);
1565 if (n && n->refcount != REFCOUNT_LINK)
1566 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1567 kind & typemask, false, implicit, cbufp,
1568 refcount_set);
1569 else
1571 k->aux = NULL;
1572 if (n && n->refcount == REFCOUNT_LINK)
1574 /* Replace target address of the pointer with target address
1575 of mapped object in the splay tree. */
1576 splay_tree_remove (mem_map, n);
1577 k->aux
1578 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1579 k->aux->link_key = n;
1581 size_t align = (size_t) 1 << (kind >> rshift);
1582 tgt->list[i].key = k;
1583 k->tgt = tgt;
1584 k->refcount = 0;
1585 k->dynamic_refcount = 0;
1586 if (field_tgt_clear != FIELD_TGT_EMPTY)
1588 k->tgt_offset = k->host_start - field_tgt_base
1589 + field_tgt_offset;
1590 if (openmp_p)
1592 k->refcount = REFCOUNT_STRUCTELEM;
1593 if (field_tgt_structelem_first == NULL)
1595 /* Set to first structure element of sequence. */
1596 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1597 field_tgt_structelem_first = k;
1599 else
1600 /* Point to refcount of leading element, but do not
1601 increment again. */
1602 k->structelem_refcount_ptr
1603 = &field_tgt_structelem_first->structelem_refcount;
1605 if (i == field_tgt_clear)
1607 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1608 field_tgt_structelem_first = NULL;
1611 if (i == field_tgt_clear)
1612 field_tgt_clear = FIELD_TGT_EMPTY;
1614 else
1616 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1617 k->tgt_offset = tgt_size;
1618 tgt_size += k->host_end - k->host_start;
1620 /* First increment, from 0 to 1. gomp_increment_refcount
1621 encapsulates the different increment cases, so use this
1622 instead of directly setting 1 during initialization. */
1623 gomp_increment_refcount (k, refcount_set);
1625 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1626 tgt->list[i].always_copy_from
1627 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1628 tgt->list[i].is_attach = false;
1629 tgt->list[i].offset = 0;
1630 tgt->list[i].length = k->host_end - k->host_start;
1631 tgt->refcount++;
1632 array->left = NULL;
1633 array->right = NULL;
1634 splay_tree_insert (mem_map, array);
1635 switch (kind & typemask)
1637 case GOMP_MAP_ALLOC:
1638 case GOMP_MAP_FROM:
1639 case GOMP_MAP_FORCE_ALLOC:
1640 case GOMP_MAP_FORCE_FROM:
1641 case GOMP_MAP_ALWAYS_FROM:
1642 break;
1643 case GOMP_MAP_TO:
1644 case GOMP_MAP_TOFROM:
1645 case GOMP_MAP_FORCE_TO:
1646 case GOMP_MAP_FORCE_TOFROM:
1647 case GOMP_MAP_ALWAYS_TO:
1648 case GOMP_MAP_ALWAYS_TOFROM:
1649 gomp_copy_host2dev (devicep, aq,
1650 (void *) (tgt->tgt_start
1651 + k->tgt_offset),
1652 (void *) k->host_start,
1653 k->host_end - k->host_start,
1654 false, cbufp);
1655 break;
1656 case GOMP_MAP_POINTER:
1657 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1658 gomp_map_pointer
1659 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1660 k->tgt_offset, sizes[i], cbufp,
1661 ((kind & typemask)
1662 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1663 break;
1664 case GOMP_MAP_TO_PSET:
1665 gomp_copy_host2dev (devicep, aq,
1666 (void *) (tgt->tgt_start
1667 + k->tgt_offset),
1668 (void *) k->host_start,
1669 k->host_end - k->host_start,
1670 false, cbufp);
1671 tgt->list[i].has_null_ptr_assoc = false;
1673 for (j = i + 1; j < mapnum; j++)
1675 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1676 & typemask);
1677 if (!GOMP_MAP_POINTER_P (ptr_kind)
1678 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1679 break;
1680 else if ((uintptr_t) hostaddrs[j] < k->host_start
1681 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1682 > k->host_end))
1683 break;
1684 else
1686 tgt->list[j].key = k;
1687 tgt->list[j].copy_from = false;
1688 tgt->list[j].always_copy_from = false;
1689 tgt->list[j].is_attach = false;
1690 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1691 /* For OpenMP, the use of refcount_sets causes
1692 errors if we set k->refcount = 1 above but also
1693 increment it again here, for decrementing will
1694 not properly match, since we decrement only once
1695 for each key's refcount. Therefore avoid this
1696 increment for OpenMP constructs. */
1697 if (!openmp_p)
1698 gomp_increment_refcount (k, refcount_set);
1699 gomp_map_pointer (tgt, aq,
1700 (uintptr_t) *(void **) hostaddrs[j],
1701 k->tgt_offset
1702 + ((uintptr_t) hostaddrs[j]
1703 - k->host_start),
1704 sizes[j], cbufp, false);
1707 i = j - 1;
1708 break;
1709 case GOMP_MAP_FORCE_PRESENT:
1710 case GOMP_MAP_ALWAYS_PRESENT_TO:
1711 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1712 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1714 /* We already looked up the memory region above and it
1715 was missing. */
1716 size_t size = k->host_end - k->host_start;
1717 gomp_mutex_unlock (&devicep->lock);
1718 #ifdef HAVE_INTTYPES_H
1719 gomp_fatal ("present clause: not present on the device "
1720 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1721 "dev: %d)", (void *) k->host_start,
1722 (uint64_t) size, (uint64_t) size,
1723 devicep->target_id);
1724 #else
1725 gomp_fatal ("present clause: not present on the device "
1726 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1727 (void *) k->host_start,
1728 (unsigned long) size, (unsigned long) size,
1729 devicep->target_id);
1730 #endif
1732 break;
1733 case GOMP_MAP_FORCE_DEVICEPTR:
1734 assert (k->host_end - k->host_start == sizeof (void *));
1735 gomp_copy_host2dev (devicep, aq,
1736 (void *) (tgt->tgt_start
1737 + k->tgt_offset),
1738 (void *) k->host_start,
1739 sizeof (void *), false, cbufp);
1740 break;
1741 default:
1742 gomp_mutex_unlock (&devicep->lock);
1743 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1744 kind);
1747 if (k->aux && k->aux->link_key)
1749 /* Set link pointer on target to the device address of the
1750 mapped object. */
1751 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1752 /* We intentionally do not use coalescing here, as it's not
1753 data allocated by the current call to this function. */
1754 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1755 &tgt_addr, sizeof (void *), true, NULL);
1757 array++;
1762 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1764 for (i = 0; i < mapnum; i++)
1766 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1767 gomp_copy_host2dev (devicep, aq,
1768 (void *) (tgt->tgt_start + i * sizeof (void *)),
1769 (void *) &cur_node.tgt_offset, sizeof (void *),
1770 true, cbufp);
1774 if (cbufp)
1776 long c = 0;
1777 for (c = 0; c < cbuf.chunk_cnt; ++c)
1778 gomp_copy_host2dev (devicep, aq,
1779 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1780 (char *) cbuf.buf + (cbuf.chunks[c].start
1781 - cbuf.chunks[0].start),
1782 cbuf.chunks[c].end - cbuf.chunks[c].start,
1783 false, NULL);
1784 if (aq)
1785 /* Free once the transfer has completed. */
1786 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1787 else
1788 free (cbuf.buf);
1789 cbuf.buf = NULL;
1790 cbufp = NULL;
1793 /* If the variable from "omp target enter data" map-list was already mapped,
1794 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1795 gomp_exit_data. */
1796 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1798 free (tgt);
1799 tgt = NULL;
1802 gomp_mutex_unlock (&devicep->lock);
1803 return tgt;
1806 static struct target_mem_desc *
1807 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1808 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1809 bool short_mapkind, htab_t *refcount_set,
1810 enum gomp_map_vars_kind pragma_kind)
1812 /* This management of a local refcount_set is for convenience of callers
1813 who do not share a refcount_set over multiple map/unmap uses. */
1814 htab_t local_refcount_set = NULL;
1815 if (refcount_set == NULL)
1817 local_refcount_set = htab_create (mapnum);
1818 refcount_set = &local_refcount_set;
1821 struct target_mem_desc *tgt;
1822 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1823 sizes, kinds, short_mapkind, refcount_set,
1824 pragma_kind);
1825 if (local_refcount_set)
1826 htab_free (local_refcount_set);
1828 return tgt;
1831 attribute_hidden struct target_mem_desc *
1832 goacc_map_vars (struct gomp_device_descr *devicep,
1833 struct goacc_asyncqueue *aq, size_t mapnum,
1834 void **hostaddrs, void **devaddrs, size_t *sizes,
1835 void *kinds, bool short_mapkind,
1836 enum gomp_map_vars_kind pragma_kind)
1838 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1839 sizes, kinds, short_mapkind, NULL,
1840 GOMP_MAP_VARS_OPENACC | pragma_kind);
1843 static void
1844 gomp_unmap_tgt (struct target_mem_desc *tgt)
1846 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1847 if (tgt->tgt_end)
1848 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1850 free (tgt->array);
1851 free (tgt);
1854 static bool
1855 gomp_unref_tgt (void *ptr)
1857 bool is_tgt_unmapped = false;
1859 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1861 if (tgt->refcount > 1)
1862 tgt->refcount--;
1863 else
1865 gomp_unmap_tgt (tgt);
1866 is_tgt_unmapped = true;
1869 return is_tgt_unmapped;
1872 static void
1873 gomp_unref_tgt_void (void *ptr)
1875 (void) gomp_unref_tgt (ptr);
1878 static void
1879 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1881 splay_tree_remove (sp, k);
1882 if (k->aux)
1884 if (k->aux->link_key)
1885 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1886 if (k->aux->attach_count)
1887 free (k->aux->attach_count);
1888 free (k->aux);
1889 k->aux = NULL;
1893 static inline __attribute__((always_inline)) bool
1894 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1895 struct goacc_asyncqueue *aq)
1897 bool is_tgt_unmapped = false;
1899 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1901 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1902 /* Infer the splay_tree_key of the first structelem key using the
1903 pointer to the first structleme_refcount. */
1904 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1905 - offsetof (struct splay_tree_key_s,
1906 structelem_refcount));
1907 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1909 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1910 with the splay_tree_keys embedded inside. */
1911 splay_tree_node node =
1912 (splay_tree_node) ((char *) k
1913 - offsetof (struct splay_tree_node_s, key));
1914 while (true)
1916 /* Starting from the _FIRST key, and continue for all following
1917 sibling keys. */
1918 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1919 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1920 break;
1921 else
1922 k = &(++node)->key;
1925 else
1926 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1928 if (aq)
1929 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1930 (void *) k->tgt);
1931 else
1932 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1933 return is_tgt_unmapped;
1936 attribute_hidden bool
1937 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1939 return gomp_remove_var_internal (devicep, k, NULL);
1942 /* Remove a variable asynchronously. This actually removes the variable
1943 mapping immediately, but retains the linked target_mem_desc until the
1944 asynchronous operation has completed (as it may still refer to target
1945 memory). The device lock must be held before entry, and remains locked on
1946 exit. */
1948 attribute_hidden void
1949 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1950 struct goacc_asyncqueue *aq)
1952 (void) gomp_remove_var_internal (devicep, k, aq);
1955 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1956 variables back from device to host: if it is false, it is assumed that this
1957 has been done already. */
1959 static inline __attribute__((always_inline)) void
1960 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1961 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1963 struct gomp_device_descr *devicep = tgt->device_descr;
1965 if (tgt->list_count == 0)
1967 free (tgt);
1968 return;
1971 gomp_mutex_lock (&devicep->lock);
1972 if (devicep->state == GOMP_DEVICE_FINALIZED)
1974 gomp_mutex_unlock (&devicep->lock);
1975 free (tgt->array);
1976 free (tgt);
1977 return;
1980 size_t i;
1982 /* We must perform detachments before any copies back to the host. */
1983 for (i = 0; i < tgt->list_count; i++)
1985 splay_tree_key k = tgt->list[i].key;
1987 if (k != NULL && tgt->list[i].is_attach)
1988 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1989 + tgt->list[i].offset,
1990 false, NULL);
1993 for (i = 0; i < tgt->list_count; i++)
1995 splay_tree_key k = tgt->list[i].key;
1996 if (k == NULL)
1997 continue;
1999 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2000 counts ('n->refcount', 'n->dynamic_refcount'). */
2001 if (tgt->list[i].is_attach)
2002 continue;
2004 bool do_copy, do_remove;
2005 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2007 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2008 || tgt->list[i].always_copy_from)
2009 gomp_copy_dev2host (devicep, aq,
2010 (void *) (k->host_start + tgt->list[i].offset),
2011 (void *) (k->tgt->tgt_start + k->tgt_offset
2012 + tgt->list[i].offset),
2013 tgt->list[i].length);
2014 if (do_remove)
2016 struct target_mem_desc *k_tgt = k->tgt;
2017 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2018 /* It would be bad if TGT got unmapped while we're still iterating
2019 over its LIST_COUNT, and also expect to use it in the following
2020 code. */
2021 assert (!is_tgt_unmapped
2022 || k_tgt != tgt);
2026 if (aq)
2027 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2028 (void *) tgt);
2029 else
2030 gomp_unref_tgt ((void *) tgt);
2032 gomp_mutex_unlock (&devicep->lock);
2035 static void
2036 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2037 htab_t *refcount_set)
2039 /* This management of a local refcount_set is for convenience of callers
2040 who do not share a refcount_set over multiple map/unmap uses. */
2041 htab_t local_refcount_set = NULL;
2042 if (refcount_set == NULL)
2044 local_refcount_set = htab_create (tgt->list_count);
2045 refcount_set = &local_refcount_set;
2048 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2050 if (local_refcount_set)
2051 htab_free (local_refcount_set);
2054 attribute_hidden void
2055 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2056 struct goacc_asyncqueue *aq)
2058 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2061 static void
2062 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2063 size_t *sizes, void *kinds, bool short_mapkind)
2065 size_t i;
2066 struct splay_tree_key_s cur_node;
2067 const int typemask = short_mapkind ? 0xff : 0x7;
2069 if (!devicep)
2070 return;
2072 if (mapnum == 0)
2073 return;
2075 gomp_mutex_lock (&devicep->lock);
2076 if (devicep->state == GOMP_DEVICE_FINALIZED)
2078 gomp_mutex_unlock (&devicep->lock);
2079 return;
2082 for (i = 0; i < mapnum; i++)
2083 if (sizes[i])
2085 cur_node.host_start = (uintptr_t) hostaddrs[i];
2086 cur_node.host_end = cur_node.host_start + sizes[i];
2087 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2088 if (n)
2090 int kind = get_kind (short_mapkind, kinds, i);
2091 if (n->host_start > cur_node.host_start
2092 || n->host_end < cur_node.host_end)
2094 gomp_mutex_unlock (&devicep->lock);
2095 gomp_fatal ("Trying to update [%p..%p) object when "
2096 "only [%p..%p) is mapped",
2097 (void *) cur_node.host_start,
2098 (void *) cur_node.host_end,
2099 (void *) n->host_start,
2100 (void *) n->host_end);
2103 if (n->aux && n->aux->attach_count)
2105 uintptr_t addr = cur_node.host_start;
2106 while (addr < cur_node.host_end)
2108 /* We have to be careful not to overwrite still attached
2109 pointers during host<->device updates. */
2110 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2111 if (n->aux->attach_count[i] == 0)
2113 void *devaddr = (void *) (n->tgt->tgt_start
2114 + n->tgt_offset
2115 + addr - n->host_start);
2116 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2117 gomp_copy_host2dev (devicep, NULL,
2118 devaddr, (void *) addr,
2119 sizeof (void *), false, NULL);
2120 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2121 gomp_copy_dev2host (devicep, NULL,
2122 (void *) addr, devaddr,
2123 sizeof (void *));
2125 addr += sizeof (void *);
2128 else
2130 void *hostaddr = (void *) cur_node.host_start;
2131 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2132 + cur_node.host_start
2133 - n->host_start);
2134 size_t size = cur_node.host_end - cur_node.host_start;
2136 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2137 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2138 false, NULL);
2139 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2140 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2143 else
2145 int kind = get_kind (short_mapkind, kinds, i);
2147 if (GOMP_MAP_PRESENT_P (kind))
2149 /* We already looked up the memory region above and it
2150 was missing. */
2151 gomp_mutex_unlock (&devicep->lock);
2152 #ifdef HAVE_INTTYPES_H
2153 gomp_fatal ("present clause: not present on the device "
2154 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2155 "dev: %d)", (void *) hostaddrs[i],
2156 (uint64_t) sizes[i], (uint64_t) sizes[i],
2157 devicep->target_id);
2158 #else
2159 gomp_fatal ("present clause: not present on the device "
2160 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2161 (void *) hostaddrs[i], (unsigned long) sizes[i],
2162 (unsigned long) sizes[i], devicep->target_id);
2163 #endif
2167 gomp_mutex_unlock (&devicep->lock);
2170 static struct gomp_offload_icv_list *
2171 gomp_get_offload_icv_item (int dev_num)
2173 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2174 while (l != NULL && l->device_num != dev_num)
2175 l = l->next;
2177 return l;
2180 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2181 depending on the device num and the variable hierarchy
2182 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2183 device and thus no item with that device number is contained in
2184 gomp_offload_icv_list, then a new item is created and added to the list. */
2186 static struct gomp_offload_icvs *
2187 get_gomp_offload_icvs (int dev_num)
2189 struct gomp_icv_list *dev
2190 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2191 struct gomp_icv_list *all
2192 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2193 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2194 struct gomp_offload_icv_list *offload_icvs
2195 = gomp_get_offload_icv_item (dev_num);
2197 if (offload_icvs != NULL)
2198 return &offload_icvs->icvs;
2200 struct gomp_offload_icv_list *new
2201 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2203 new->device_num = dev_num;
2204 new->icvs.device_num = dev_num;
2205 new->next = gomp_offload_icv_list;
2207 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2208 new->icvs.nteams = dev_x->icvs.nteams_var;
2209 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2210 new->icvs.nteams = dev->icvs.nteams_var;
2211 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2212 new->icvs.nteams = all->icvs.nteams_var;
2213 else
2214 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2216 if (dev_x != NULL
2217 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2218 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2219 else if (dev != NULL
2220 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2221 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2222 else if (all != NULL
2223 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2224 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2225 else
2226 new->icvs.teams_thread_limit
2227 = gomp_default_icv_values.teams_thread_limit_var;
2229 if (dev_x != NULL
2230 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2231 new->icvs.default_device = dev_x->icvs.default_device_var;
2232 else if (dev != NULL
2233 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2234 new->icvs.default_device = dev->icvs.default_device_var;
2235 else if (all != NULL
2236 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2237 new->icvs.default_device = all->icvs.default_device_var;
2238 else
2239 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2241 gomp_offload_icv_list = new;
2242 return &new->icvs;
2245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2246 And insert to splay tree the mapping between addresses from HOST_TABLE and
2247 from loaded target image. We rely in the host and device compiler
2248 emitting variable and functions in the same order. */
2250 static void
2251 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2252 const void *host_table, const void *target_data,
2253 bool is_register_lock)
2255 void **host_func_table = ((void ***) host_table)[0];
2256 void **host_funcs_end = ((void ***) host_table)[1];
2257 void **host_var_table = ((void ***) host_table)[2];
2258 void **host_vars_end = ((void ***) host_table)[3];
2260 /* The func table contains only addresses, the var table contains addresses
2261 and corresponding sizes. */
2262 int num_funcs = host_funcs_end - host_func_table;
2263 int num_vars = (host_vars_end - host_var_table) / 2;
2265 /* Load image to device and get target addresses for the image. */
2266 struct addr_pair *target_table = NULL;
2267 uint64_t *rev_target_fn_table = NULL;
2268 int i, num_target_entries;
2270 /* With reverse offload, insert also target-host addresses. */
2271 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2273 num_target_entries
2274 = devicep->load_image_func (devicep->target_id, version,
2275 target_data, &target_table,
2276 rev_lookup ? &rev_target_fn_table : NULL);
2278 if (num_target_entries != num_funcs + num_vars
2279 /* "+1" due to the additional ICV struct. */
2280 && num_target_entries != num_funcs + num_vars + 1)
2282 gomp_mutex_unlock (&devicep->lock);
2283 if (is_register_lock)
2284 gomp_mutex_unlock (&register_lock);
2285 gomp_fatal ("Cannot map target functions or variables"
2286 " (expected %u, have %u)", num_funcs + num_vars,
2287 num_target_entries);
2290 /* Insert host-target address mapping into splay tree. */
2291 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2292 /* "+1" due to the additional ICV struct. */
2293 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2294 * sizeof (*tgt->array));
2295 if (rev_target_fn_table)
2296 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2297 else
2298 tgt->rev_array = NULL;
2299 tgt->refcount = REFCOUNT_INFINITY;
2300 tgt->tgt_start = 0;
2301 tgt->tgt_end = 0;
2302 tgt->to_free = NULL;
2303 tgt->prev = NULL;
2304 tgt->list_count = 0;
2305 tgt->device_descr = devicep;
2306 splay_tree_node array = tgt->array;
2307 reverse_splay_tree_node rev_array = tgt->rev_array;
2309 for (i = 0; i < num_funcs; i++)
2311 splay_tree_key k = &array->key;
2312 k->host_start = (uintptr_t) host_func_table[i];
2313 k->host_end = k->host_start + 1;
2314 k->tgt = tgt;
2315 k->tgt_offset = target_table[i].start;
2316 k->refcount = REFCOUNT_INFINITY;
2317 k->dynamic_refcount = 0;
2318 k->aux = NULL;
2319 array->left = NULL;
2320 array->right = NULL;
2321 splay_tree_insert (&devicep->mem_map, array);
2322 if (rev_target_fn_table)
2324 reverse_splay_tree_key k2 = &rev_array->key;
2325 k2->dev = rev_target_fn_table[i];
2326 k2->k = k;
2327 rev_array->left = NULL;
2328 rev_array->right = NULL;
2329 if (k2->dev != 0)
2330 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2331 rev_array++;
2333 array++;
2336 /* Most significant bit of the size in host and target tables marks
2337 "omp declare target link" variables. */
2338 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2339 const uintptr_t size_mask = ~link_bit;
2341 for (i = 0; i < num_vars; i++)
2343 struct addr_pair *target_var = &target_table[num_funcs + i];
2344 uintptr_t target_size = target_var->end - target_var->start;
2345 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2347 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2349 gomp_mutex_unlock (&devicep->lock);
2350 if (is_register_lock)
2351 gomp_mutex_unlock (&register_lock);
2352 gomp_fatal ("Cannot map target variables (size mismatch)");
2355 splay_tree_key k = &array->key;
2356 k->host_start = (uintptr_t) host_var_table[i * 2];
2357 k->host_end
2358 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2359 k->tgt = tgt;
2360 k->tgt_offset = target_var->start;
2361 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2362 k->dynamic_refcount = 0;
2363 k->aux = NULL;
2364 array->left = NULL;
2365 array->right = NULL;
2366 splay_tree_insert (&devicep->mem_map, array);
2367 array++;
2370 /* Last entry is for a ICVs variable.
2371 Tolerate case where plugin does not return those entries. */
2372 if (num_funcs + num_vars < num_target_entries)
2374 struct addr_pair *var = &target_table[num_funcs + num_vars];
2376 /* Start address will be non-zero for the ICVs variable if
2377 the variable was found in this image. */
2378 if (var->start != 0)
2380 /* The index of the devicep within devices[] is regarded as its
2381 'device number', which is different from the per-device type
2382 devicep->target_id. */
2383 int dev_num = (int) (devicep - &devices[0]);
2384 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2385 size_t var_size = var->end - var->start;
2386 if (var_size != sizeof (struct gomp_offload_icvs))
2388 gomp_mutex_unlock (&devicep->lock);
2389 if (is_register_lock)
2390 gomp_mutex_unlock (&register_lock);
2391 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2392 "format");
2394 /* Copy the ICVs variable to place on device memory, hereby
2395 actually designating its device number into effect. */
2396 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2397 var_size, false, NULL);
2398 splay_tree_key k = &array->key;
2399 k->host_start = (uintptr_t) icvs;
2400 k->host_end =
2401 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2402 k->tgt = tgt;
2403 k->tgt_offset = var->start;
2404 k->refcount = REFCOUNT_INFINITY;
2405 k->dynamic_refcount = 0;
2406 k->aux = NULL;
2407 array->left = NULL;
2408 array->right = NULL;
2409 splay_tree_insert (&devicep->mem_map, array);
2410 array++;
2414 free (target_table);
2417 /* Unload the mappings described by target_data from device DEVICE_P.
2418 The device must be locked. */
2420 static void
2421 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2422 unsigned version,
2423 const void *host_table, const void *target_data)
2425 void **host_func_table = ((void ***) host_table)[0];
2426 void **host_funcs_end = ((void ***) host_table)[1];
2427 void **host_var_table = ((void ***) host_table)[2];
2428 void **host_vars_end = ((void ***) host_table)[3];
2430 /* The func table contains only addresses, the var table contains addresses
2431 and corresponding sizes. */
2432 int num_funcs = host_funcs_end - host_func_table;
2433 int num_vars = (host_vars_end - host_var_table) / 2;
2435 struct splay_tree_key_s k;
2436 splay_tree_key node = NULL;
2438 /* Find mapping at start of node array */
2439 if (num_funcs || num_vars)
2441 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2442 : (uintptr_t) host_var_table[0]);
2443 k.host_end = k.host_start + 1;
2444 node = splay_tree_lookup (&devicep->mem_map, &k);
2447 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2449 gomp_mutex_unlock (&devicep->lock);
2450 gomp_fatal ("image unload fail");
2452 if (devicep->mem_map_rev.root)
2454 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2455 real allocation. */
2456 assert (node && node->tgt && node->tgt->rev_array);
2457 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2458 free (node->tgt->rev_array);
2459 devicep->mem_map_rev.root = NULL;
2462 /* Remove mappings from splay tree. */
2463 int i;
2464 for (i = 0; i < num_funcs; i++)
2466 k.host_start = (uintptr_t) host_func_table[i];
2467 k.host_end = k.host_start + 1;
2468 splay_tree_remove (&devicep->mem_map, &k);
2471 /* Most significant bit of the size in host and target tables marks
2472 "omp declare target link" variables. */
2473 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2474 const uintptr_t size_mask = ~link_bit;
2475 bool is_tgt_unmapped = false;
2477 for (i = 0; i < num_vars; i++)
2479 k.host_start = (uintptr_t) host_var_table[i * 2];
2480 k.host_end
2481 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2483 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2484 splay_tree_remove (&devicep->mem_map, &k);
2485 else
2487 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2488 is_tgt_unmapped = gomp_remove_var (devicep, n);
2492 if (node && !is_tgt_unmapped)
2494 free (node->tgt);
2495 free (node);
2499 static void
2500 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2502 char *end = buf + size, *p = buf;
2503 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2504 p += snprintf (p, end - p, "unified_address");
2505 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2506 p += snprintf (p, end - p, "%sunified_shared_memory",
2507 (p == buf ? "" : ", "));
2508 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2509 p += snprintf (p, end - p, "%sreverse_offload",
2510 (p == buf ? "" : ", "));
2513 /* This function should be called from every offload image while loading.
2514 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2515 the target, and DATA. */
2517 void
2518 GOMP_offload_register_ver (unsigned version, const void *host_table,
2519 int target_type, const void *data)
2521 int i;
2523 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2524 gomp_fatal ("Library too old for offload (version %u < %u)",
2525 GOMP_VERSION, GOMP_VERSION_LIB (version));
2527 int omp_req;
2528 const void *target_data;
2529 if (GOMP_VERSION_LIB (version) > 1)
2531 omp_req = (int) (size_t) ((void **) data)[0];
2532 target_data = &((void **) data)[1];
2534 else
2536 omp_req = 0;
2537 target_data = data;
2540 gomp_mutex_lock (&register_lock);
2542 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2544 char buf1[sizeof ("unified_address, unified_shared_memory, "
2545 "reverse_offload")];
2546 char buf2[sizeof ("unified_address, unified_shared_memory, "
2547 "reverse_offload")];
2548 gomp_requires_to_name (buf2, sizeof (buf2),
2549 omp_req != GOMP_REQUIRES_TARGET_USED
2550 ? omp_req : omp_requires_mask);
2551 if (omp_req != GOMP_REQUIRES_TARGET_USED
2552 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2554 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2555 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2556 "in multiple compilation units: '%s' vs. '%s'",
2557 buf1, buf2);
2559 else
2560 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2561 "some compilation units", buf2);
2563 omp_requires_mask = omp_req;
2565 /* Load image to all initialized devices. */
2566 for (i = 0; i < num_devices; i++)
2568 struct gomp_device_descr *devicep = &devices[i];
2569 gomp_mutex_lock (&devicep->lock);
2570 if (devicep->type == target_type
2571 && devicep->state == GOMP_DEVICE_INITIALIZED)
2572 gomp_load_image_to_device (devicep, version,
2573 host_table, target_data, true);
2574 gomp_mutex_unlock (&devicep->lock);
2577 /* Insert image to array of pending images. */
2578 offload_images
2579 = gomp_realloc_unlock (offload_images,
2580 (num_offload_images + 1)
2581 * sizeof (struct offload_image_descr));
2582 offload_images[num_offload_images].version = version;
2583 offload_images[num_offload_images].type = target_type;
2584 offload_images[num_offload_images].host_table = host_table;
2585 offload_images[num_offload_images].target_data = target_data;
2587 num_offload_images++;
2588 gomp_mutex_unlock (&register_lock);
2591 /* Legacy entry point. */
2593 void
2594 GOMP_offload_register (const void *host_table, int target_type,
2595 const void *target_data)
2597 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2600 /* This function should be called from every offload image while unloading.
2601 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2602 the target, and DATA. */
2604 void
2605 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2606 int target_type, const void *data)
2608 int i;
2610 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2611 gomp_fatal ("Library too old for offload (version %u < %u)",
2612 GOMP_VERSION, GOMP_VERSION_LIB (version));
2614 const void *target_data;
2615 if (GOMP_VERSION_LIB (version) > 1)
2616 target_data = &((void **) data)[1];
2617 else
2618 target_data = data;
2620 gomp_mutex_lock (&register_lock);
2622 /* Unload image from all initialized devices. */
2623 for (i = 0; i < num_devices; i++)
2625 struct gomp_device_descr *devicep = &devices[i];
2626 gomp_mutex_lock (&devicep->lock);
2627 if (devicep->type == target_type
2628 && devicep->state == GOMP_DEVICE_INITIALIZED)
2629 gomp_unload_image_from_device (devicep, version,
2630 host_table, target_data);
2631 gomp_mutex_unlock (&devicep->lock);
2634 /* Remove image from array of pending images. */
2635 for (i = 0; i < num_offload_images; i++)
2636 if (offload_images[i].target_data == target_data)
2638 offload_images[i] = offload_images[--num_offload_images];
2639 break;
2642 gomp_mutex_unlock (&register_lock);
2645 /* Legacy entry point. */
2647 void
2648 GOMP_offload_unregister (const void *host_table, int target_type,
2649 const void *target_data)
2651 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2654 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2655 must be locked on entry, and remains locked on return. */
2657 attribute_hidden void
2658 gomp_init_device (struct gomp_device_descr *devicep)
2660 int i;
2661 if (!devicep->init_device_func (devicep->target_id))
2663 gomp_mutex_unlock (&devicep->lock);
2664 gomp_fatal ("device initialization failed");
2667 /* Load to device all images registered by the moment. */
2668 for (i = 0; i < num_offload_images; i++)
2670 struct offload_image_descr *image = &offload_images[i];
2671 if (image->type == devicep->type)
2672 gomp_load_image_to_device (devicep, image->version,
2673 image->host_table, image->target_data,
2674 false);
2677 /* Initialize OpenACC asynchronous queues. */
2678 goacc_init_asyncqueues (devicep);
2680 devicep->state = GOMP_DEVICE_INITIALIZED;
2683 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2684 must be locked on entry, and remains locked on return. */
2686 attribute_hidden bool
2687 gomp_fini_device (struct gomp_device_descr *devicep)
2689 bool ret = goacc_fini_asyncqueues (devicep);
2690 ret &= devicep->fini_device_func (devicep->target_id);
2691 devicep->state = GOMP_DEVICE_FINALIZED;
2692 return ret;
2695 attribute_hidden void
2696 gomp_unload_device (struct gomp_device_descr *devicep)
2698 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2700 unsigned i;
2702 /* Unload from device all images registered at the moment. */
2703 for (i = 0; i < num_offload_images; i++)
2705 struct offload_image_descr *image = &offload_images[i];
2706 if (image->type == devicep->type)
2707 gomp_unload_image_from_device (devicep, image->version,
2708 image->host_table,
2709 image->target_data);
2714 /* Host fallback for GOMP_target{,_ext} routines. */
2716 static void
2717 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2718 struct gomp_device_descr *devicep, void **args)
2720 struct gomp_thread old_thr, *thr = gomp_thread ();
2722 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2723 && devicep != NULL)
2724 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2725 "be used for offloading");
2727 old_thr = *thr;
2728 memset (thr, '\0', sizeof (*thr));
2729 if (gomp_places_list)
2731 thr->place = old_thr.place;
2732 thr->ts.place_partition_len = gomp_places_list_len;
2734 if (args)
2735 while (*args)
2737 intptr_t id = (intptr_t) *args++, val;
2738 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2739 val = (intptr_t) *args++;
2740 else
2741 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2742 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2743 continue;
2744 id &= GOMP_TARGET_ARG_ID_MASK;
2745 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2746 continue;
2747 val = val > INT_MAX ? INT_MAX : val;
2748 if (val)
2749 gomp_icv (true)->thread_limit_var = val;
2750 break;
2753 fn (hostaddrs);
2754 gomp_free_thread (thr);
2755 *thr = old_thr;
2758 /* Calculate alignment and size requirements of a private copy of data shared
2759 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2761 static inline void
2762 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2763 unsigned short *kinds, size_t *tgt_align,
2764 size_t *tgt_size)
2766 size_t i;
2767 for (i = 0; i < mapnum; i++)
2768 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2770 size_t align = (size_t) 1 << (kinds[i] >> 8);
2771 if (*tgt_align < align)
2772 *tgt_align = align;
2773 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2774 *tgt_size += sizes[i];
2778 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2780 static inline void
2781 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2782 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2783 size_t tgt_size)
2785 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2786 if (al)
2787 tgt += tgt_align - al;
2788 tgt_size = 0;
2789 size_t i;
2790 for (i = 0; i < mapnum; i++)
2791 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2793 size_t align = (size_t) 1 << (kinds[i] >> 8);
2794 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2795 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2796 hostaddrs[i] = tgt + tgt_size;
2797 tgt_size = tgt_size + sizes[i];
2798 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2800 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2801 ++i;
2806 /* Helper function of GOMP_target{,_ext} routines. */
2808 static void *
2809 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2810 void (*host_fn) (void *))
2812 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2813 return (void *) host_fn;
2814 else
2816 gomp_mutex_lock (&devicep->lock);
2817 if (devicep->state == GOMP_DEVICE_FINALIZED)
2819 gomp_mutex_unlock (&devicep->lock);
2820 return NULL;
2823 struct splay_tree_key_s k;
2824 k.host_start = (uintptr_t) host_fn;
2825 k.host_end = k.host_start + 1;
2826 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2827 gomp_mutex_unlock (&devicep->lock);
2828 if (tgt_fn == NULL)
2829 return NULL;
2831 return (void *) tgt_fn->tgt_offset;
2835 /* Called when encountering a target directive. If DEVICE
2836 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2837 GOMP_DEVICE_HOST_FALLBACK (or any value
2838 larger than last available hw device), use host fallback.
2839 FN is address of host code, UNUSED is part of the current ABI, but
2840 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2841 with MAPNUM entries, with addresses of the host objects,
2842 sizes of the host objects (resp. for pointer kind pointer bias
2843 and assumed sizeof (void *) size) and kinds. */
2845 void
2846 GOMP_target (int device, void (*fn) (void *), const void *unused,
2847 size_t mapnum, void **hostaddrs, size_t *sizes,
2848 unsigned char *kinds)
2850 struct gomp_device_descr *devicep = resolve_device (device, true);
2852 void *fn_addr;
2853 if (devicep == NULL
2854 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2855 /* All shared memory devices should use the GOMP_target_ext function. */
2856 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2857 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2858 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2860 htab_t refcount_set = htab_create (mapnum);
2861 struct target_mem_desc *tgt_vars
2862 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2863 &refcount_set, GOMP_MAP_VARS_TARGET);
2864 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2865 NULL);
2866 htab_clear (refcount_set);
2867 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2868 htab_free (refcount_set);
2871 static inline unsigned int
2872 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2874 /* If we cannot run asynchronously, simply ignore nowait. */
2875 if (devicep != NULL && devicep->async_run_func == NULL)
2876 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2878 return flags;
2881 static void
2882 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2884 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2885 if (item == NULL)
2886 return;
2888 void *host_ptr = &item->icvs;
2889 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2890 if (dev_ptr != NULL)
2891 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2892 sizeof (struct gomp_offload_icvs));
2895 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2896 and several arguments have been added:
2897 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2898 DEPEND is array of dependencies, see GOMP_task for details.
2900 ARGS is a pointer to an array consisting of a variable number of both
2901 device-independent and device-specific arguments, which can take one two
2902 elements where the first specifies for which device it is intended, the type
2903 and optionally also the value. If the value is not present in the first
2904 one, the whole second element the actual value. The last element of the
2905 array is a single NULL. Among the device independent can be for example
2906 NUM_TEAMS and THREAD_LIMIT.
2908 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2909 that value, or 1 if teams construct is not present, or 0, if
2910 teams construct does not have num_teams clause and so the choice is
2911 implementation defined, and -1 if it can't be determined on the host
2912 what value will GOMP_teams have on the device.
2913 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2914 body with that value, or 0, if teams construct does not have thread_limit
2915 clause or the teams construct is not present, or -1 if it can't be
2916 determined on the host what value will GOMP_teams have on the device. */
2918 void
2919 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2920 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2921 unsigned int flags, void **depend, void **args)
2923 struct gomp_device_descr *devicep = resolve_device (device, true);
2924 size_t tgt_align = 0, tgt_size = 0;
2925 bool fpc_done = false;
2927 /* Obtain the original TEAMS and THREADS values from ARGS. */
2928 intptr_t orig_teams = 1, orig_threads = 0;
2929 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2930 void **tmpargs = args;
2931 while (*tmpargs)
2933 intptr_t id = (intptr_t) *tmpargs++, val;
2934 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2936 val = (intptr_t) *tmpargs++;
2937 len = 2;
2939 else
2941 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2942 len = 1;
2944 num_args += len;
2945 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2946 continue;
2947 val = val > INT_MAX ? INT_MAX : val;
2948 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2950 orig_teams = val;
2951 teams_len = len;
2953 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2955 orig_threads = val;
2956 threads_len = len;
2960 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2961 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2962 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2963 value could not be determined. No change.
2964 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2965 Set device-specific value.
2966 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2967 No change. */
2968 if (orig_teams == -2)
2969 new_teams = 1;
2970 else if (orig_teams == 0)
2972 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2973 if (item != NULL)
2974 new_teams = item->icvs.nteams;
2976 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2977 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2978 e.g. a THREAD_LIMIT clause. */
2979 if (orig_teams > -2 && orig_threads == 0)
2981 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2982 if (item != NULL)
2983 new_threads = item->icvs.teams_thread_limit;
2986 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2987 updated. */
2988 void **new_args = args;
2989 if (orig_teams != new_teams || orig_threads != new_threads)
2991 size_t tms_len = (orig_teams == new_teams
2992 ? teams_len
2993 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
2994 ? 1 : 2));
2995 size_t ths_len = (orig_threads == new_threads
2996 ? threads_len
2997 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
2998 ? 1 : 2));
2999 /* One additional item after the last arg must be NULL. */
3000 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3001 + ths_len + 1;
3002 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3004 tmpargs = args;
3005 void **tmp_new_args = new_args;
3006 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3007 too if they have not been changed and skipped otherwise. */
3008 while (*tmpargs)
3010 intptr_t id = (intptr_t) *tmpargs;
3011 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3012 && orig_teams != new_teams)
3013 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3014 && orig_threads != new_threads))
3016 tmpargs++;
3017 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3018 tmpargs++;
3020 else
3022 *tmp_new_args++ = *tmpargs++;
3023 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3024 *tmp_new_args++ = *tmpargs++;
3028 /* Add the new TEAMS arg to the new args list if it has been changed. */
3029 if (orig_teams != new_teams)
3031 intptr_t new_val = new_teams;
3032 if (tms_len == 1)
3034 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3035 | GOMP_TARGET_ARG_NUM_TEAMS;
3036 *tmp_new_args++ = (void *) new_val;
3038 else
3040 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3041 | GOMP_TARGET_ARG_NUM_TEAMS);
3042 *tmp_new_args++ = (void *) new_val;
3046 /* Add the new THREADS arg to the new args list if it has been changed. */
3047 if (orig_threads != new_threads)
3049 intptr_t new_val = new_threads;
3050 if (ths_len == 1)
3052 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3053 | GOMP_TARGET_ARG_THREAD_LIMIT;
3054 *tmp_new_args++ = (void *) new_val;
3056 else
3058 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3059 | GOMP_TARGET_ARG_THREAD_LIMIT);
3060 *tmp_new_args++ = (void *) new_val;
3064 *tmp_new_args = NULL;
3067 flags = clear_unsupported_flags (devicep, flags);
3069 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3071 struct gomp_thread *thr = gomp_thread ();
3072 /* Create a team if we don't have any around, as nowait
3073 target tasks make sense to run asynchronously even when
3074 outside of any parallel. */
3075 if (__builtin_expect (thr->ts.team == NULL, 0))
3077 struct gomp_team *team = gomp_new_team (1);
3078 struct gomp_task *task = thr->task;
3079 struct gomp_task **implicit_task = &task;
3080 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3081 team->prev_ts = thr->ts;
3082 thr->ts.team = team;
3083 thr->ts.team_id = 0;
3084 thr->ts.work_share = &team->work_shares[0];
3085 thr->ts.last_work_share = NULL;
3086 #ifdef HAVE_SYNC_BUILTINS
3087 thr->ts.single_count = 0;
3088 #endif
3089 thr->ts.static_trip = 0;
3090 thr->task = &team->implicit_task[0];
3091 gomp_init_task (thr->task, NULL, icv);
3092 while (*implicit_task
3093 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3094 implicit_task = &(*implicit_task)->parent;
3095 if (*implicit_task)
3097 thr->task = *implicit_task;
3098 gomp_end_task ();
3099 free (*implicit_task);
3100 thr->task = &team->implicit_task[0];
3102 else
3103 pthread_setspecific (gomp_thread_destructor, thr);
3104 if (implicit_task != &task)
3106 *implicit_task = thr->task;
3107 thr->task = task;
3110 if (thr->ts.team
3111 && !thr->task->final_task)
3113 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3114 sizes, kinds, flags, depend, new_args,
3115 GOMP_TARGET_TASK_BEFORE_MAP);
3116 return;
3120 /* If there are depend clauses, but nowait is not present
3121 (or we are in a final task), block the parent task until the
3122 dependencies are resolved and then just continue with the rest
3123 of the function as if it is a merged task. */
3124 if (depend != NULL)
3126 struct gomp_thread *thr = gomp_thread ();
3127 if (thr->task && thr->task->depend_hash)
3129 /* If we might need to wait, copy firstprivate now. */
3130 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3131 &tgt_align, &tgt_size);
3132 if (tgt_align)
3134 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3135 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3136 tgt_align, tgt_size);
3138 fpc_done = true;
3139 gomp_task_maybe_wait_for_dependencies (depend);
3143 void *fn_addr;
3144 if (devicep == NULL
3145 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3146 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3147 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3149 if (!fpc_done)
3151 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3152 &tgt_align, &tgt_size);
3153 if (tgt_align)
3155 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3156 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3157 tgt_align, tgt_size);
3160 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3161 return;
3164 struct target_mem_desc *tgt_vars;
3165 htab_t refcount_set = NULL;
3167 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3169 if (!fpc_done)
3171 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3172 &tgt_align, &tgt_size);
3173 if (tgt_align)
3175 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3176 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3177 tgt_align, tgt_size);
3180 tgt_vars = NULL;
3182 else
3184 refcount_set = htab_create (mapnum);
3185 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3186 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3188 devicep->run_func (devicep->target_id, fn_addr,
3189 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3190 new_args);
3191 if (tgt_vars)
3193 htab_clear (refcount_set);
3194 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3196 if (refcount_set)
3197 htab_free (refcount_set);
3199 /* Copy back ICVs from device to host.
3200 HOST_PTR is expected to exist since it was added in
3201 gomp_load_image_to_device if not already available. */
3202 gomp_copy_back_icvs (devicep, device);
3207 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3208 keeping track of all variable handling - assuming that reverse offload occurs
3209 ony very rarely. Downside is that the reverse search is slow. */
3211 struct gomp_splay_tree_rev_lookup_data {
3212 uintptr_t tgt_start;
3213 uintptr_t tgt_end;
3214 splay_tree_key key;
3217 static int
3218 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3220 struct gomp_splay_tree_rev_lookup_data *data;
3221 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3222 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3224 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3225 return 0;
3227 size_t j;
3228 for (j = 0; j < key->tgt->list_count; j++)
3229 if (key->tgt->list[j].key == key)
3230 break;
3231 assert (j < key->tgt->list_count);
3232 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3234 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3235 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3237 data->key = key;
3238 return 1;
3240 return 0;
3243 static inline splay_tree_key
3244 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3245 bool zero_len)
3247 struct gomp_splay_tree_rev_lookup_data data;
3248 data.key = NULL;
3249 data.tgt_start = tgt_start;
3250 data.tgt_end = tgt_end;
3252 if (tgt_start != tgt_end)
3254 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3255 return data.key;
3258 data.tgt_end++;
3259 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3260 if (data.key != NULL || zero_len)
3261 return data.key;
3262 data.tgt_end--;
3264 data.tgt_start--;
3265 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3266 return data.key;
3269 struct cpy_data
3271 uint64_t devaddr;
3272 bool present, aligned;
3276 /* Search just mapped reverse-offload data; returns index if found,
3277 otherwise >= n. */
3279 static inline int
3280 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3281 unsigned short *kinds, uint64_t *sizes, size_t n,
3282 uint64_t tgt_start, uint64_t tgt_end)
3284 const bool short_mapkind = true;
3285 const int typemask = short_mapkind ? 0xff : 0x7;
3286 size_t i;
3287 for (i = 0; i < n; i++)
3289 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3290 == GOMP_MAP_STRUCT);
3291 uint64_t dev_end;
3292 if (!is_struct)
3293 dev_end = d[i].devaddr + sizes[i];
3294 else
3296 if (i + sizes[i] < n)
3297 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3298 else
3299 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3301 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3302 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3303 break;
3304 if (is_struct)
3305 i += sizes[i];
3307 return i;
3310 static inline int
3311 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3312 unsigned short *kinds, uint64_t *sizes,
3313 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3314 bool zero_len)
3316 size_t i;
3317 if (tgt_start != tgt_end)
3318 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3319 tgt_start, tgt_end);
3320 tgt_end++;
3321 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3322 tgt_start, tgt_end);
3323 if (i < n || zero_len)
3324 return i;
3325 tgt_end--;
3327 tgt_start--;
3328 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3329 tgt_start, tgt_end);
3332 /* Handle reverse offload. This is called by the device plugins for a
3333 reverse offload; it is not called if the outer target runs on the host.
3334 The mapping is simplified device-affecting constructs (except for target
3335 with device(ancestor:1)) must not be encountered; in particular not
3336 target (enter/exit) data. */
3338 void
3339 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3340 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3341 struct goacc_asyncqueue *aq)
3343 /* Return early if there is no offload code. */
3344 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3345 return;
3346 /* Currently, this fails because of calculate_firstprivate_requirements
3347 below; it could be fixed but additional code needs to be updated to
3348 handle 32bit hosts - thus, it is not worthwhile. */
3349 if (sizeof (void *) != sizeof (uint64_t))
3350 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3352 struct cpy_data *cdata = NULL;
3353 uint64_t *devaddrs;
3354 uint64_t *sizes;
3355 unsigned short *kinds;
3356 const bool short_mapkind = true;
3357 const int typemask = short_mapkind ? 0xff : 0x7;
3358 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3360 reverse_splay_tree_key n;
3361 struct reverse_splay_tree_key_s k;
3362 k.dev = fn_ptr;
3364 gomp_mutex_lock (&devicep->lock);
3365 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3366 gomp_mutex_unlock (&devicep->lock);
3368 if (n == NULL)
3369 gomp_fatal ("Cannot find reverse-offload function");
3370 void (*host_fn)() = (void (*)()) n->k->host_start;
3372 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3374 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3375 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3376 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3378 else
3380 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3381 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3382 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3383 gomp_copy_dev2host (devicep, aq, devaddrs,
3384 (const void *) (uintptr_t) devaddrs_ptr,
3385 mapnum * sizeof (uint64_t));
3386 gomp_copy_dev2host (devicep, aq, sizes,
3387 (const void *) (uintptr_t) sizes_ptr,
3388 mapnum * sizeof (uint64_t));
3389 gomp_copy_dev2host (devicep, aq, kinds,
3390 (const void *) (uintptr_t) kinds_ptr,
3391 mapnum * sizeof (unsigned short));
3392 if (aq && !devicep->openacc.async.synchronize_func (aq))
3393 exit (EXIT_FAILURE);
3396 size_t tgt_align = 0, tgt_size = 0;
3398 /* If actually executed on 32bit systems, the casts lead to wrong code;
3399 but 32bit with offloading is not supported; see top of this function. */
3400 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3401 (void *) (uintptr_t) kinds,
3402 &tgt_align, &tgt_size);
3404 if (tgt_align)
3406 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3407 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3408 if (al)
3409 tgt += tgt_align - al;
3410 tgt_size = 0;
3411 for (uint64_t i = 0; i < mapnum; i++)
3412 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3413 && devaddrs[i] != 0)
3415 size_t align = (size_t) 1 << (kinds[i] >> 8);
3416 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3417 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3418 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3419 (size_t) sizes[i]);
3420 else
3422 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3423 (void *) (uintptr_t) devaddrs[i],
3424 (size_t) sizes[i]);
3425 if (aq && !devicep->openacc.async.synchronize_func (aq))
3426 exit (EXIT_FAILURE);
3428 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3429 tgt_size = tgt_size + sizes[i];
3430 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3431 && i + 1 < mapnum
3432 && ((get_kind (short_mapkind, kinds, i) & typemask)
3433 == GOMP_MAP_ATTACH))
3435 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3436 = (uint64_t) devaddrs[i];
3437 ++i;
3442 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3444 size_t j, struct_cpy = 0;
3445 splay_tree_key n2;
3446 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3447 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3448 gomp_mutex_lock (&devicep->lock);
3449 for (uint64_t i = 0; i < mapnum; i++)
3451 if (devaddrs[i] == 0)
3452 continue;
3453 n = NULL;
3454 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3455 switch (kind)
3457 case GOMP_MAP_FIRSTPRIVATE:
3458 case GOMP_MAP_FIRSTPRIVATE_INT:
3459 continue;
3461 case GOMP_MAP_DELETE:
3462 case GOMP_MAP_RELEASE:
3463 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3464 /* Assume it is present; look it up - but ignore unless the
3465 present clause is there. */
3466 case GOMP_MAP_ALLOC:
3467 case GOMP_MAP_FROM:
3468 case GOMP_MAP_FORCE_ALLOC:
3469 case GOMP_MAP_FORCE_FROM:
3470 case GOMP_MAP_ALWAYS_FROM:
3471 case GOMP_MAP_TO:
3472 case GOMP_MAP_TOFROM:
3473 case GOMP_MAP_FORCE_TO:
3474 case GOMP_MAP_FORCE_TOFROM:
3475 case GOMP_MAP_ALWAYS_TO:
3476 case GOMP_MAP_ALWAYS_TOFROM:
3477 case GOMP_MAP_FORCE_PRESENT:
3478 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3479 case GOMP_MAP_ALWAYS_PRESENT_TO:
3480 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3481 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3482 cdata[i].devaddr = devaddrs[i];
3483 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3484 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3485 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3486 devaddrs[i],
3487 devaddrs[i] + sizes[i], zero_len);
3488 if (j < i)
3490 n2 = NULL;
3491 cdata[i].present = true;
3492 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3494 else
3496 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3497 devaddrs[i],
3498 devaddrs[i] + sizes[i], zero_len);
3499 cdata[i].present = n2 != NULL;
3501 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3503 gomp_mutex_unlock (&devicep->lock);
3504 #ifdef HAVE_INTTYPES_H
3505 gomp_fatal ("present clause: no corresponding data on "
3506 "parent device at %p with size %"PRIu64,
3507 (void *) (uintptr_t) devaddrs[i],
3508 (uint64_t) sizes[i]);
3509 #else
3510 gomp_fatal ("present clause: no corresponding data on "
3511 "parent device at %p with size %lu",
3512 (void *) (uintptr_t) devaddrs[i],
3513 (unsigned long) sizes[i]);
3514 #endif
3515 break;
3517 else if (!cdata[i].present
3518 && kind != GOMP_MAP_DELETE
3519 && kind != GOMP_MAP_RELEASE
3520 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3522 cdata[i].aligned = true;
3523 size_t align = (size_t) 1 << (kinds[i] >> 8);
3524 devaddrs[i]
3525 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3526 sizes[i]);
3528 else if (n2 != NULL)
3529 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3530 - (n2->tgt->tgt_start + n2->tgt_offset));
3531 if (((!cdata[i].present || struct_cpy)
3532 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3533 || kind == GOMP_MAP_FORCE_TO
3534 || kind == GOMP_MAP_FORCE_TOFROM
3535 || GOMP_MAP_ALWAYS_TO_P (kind))
3537 gomp_copy_dev2host (devicep, aq,
3538 (void *) (uintptr_t) devaddrs[i],
3539 (void *) (uintptr_t) cdata[i].devaddr,
3540 sizes[i]);
3541 if (aq && !devicep->openacc.async.synchronize_func (aq))
3543 gomp_mutex_unlock (&devicep->lock);
3544 exit (EXIT_FAILURE);
3547 if (struct_cpy)
3548 struct_cpy--;
3549 break;
3550 case GOMP_MAP_ATTACH:
3551 case GOMP_MAP_POINTER:
3552 case GOMP_MAP_ALWAYS_POINTER:
3553 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3554 devaddrs[i] + sizes[i],
3555 devaddrs[i] + sizes[i]
3556 + sizeof (void*), false);
3557 cdata[i].present = n2 != NULL;
3558 cdata[i].devaddr = devaddrs[i];
3559 if (n2)
3560 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3561 - (n2->tgt->tgt_start + n2->tgt_offset));
3562 else
3564 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3565 devaddrs[i] + sizes[i],
3566 devaddrs[i] + sizes[i]
3567 + sizeof (void*), false);
3568 if (j < i)
3570 cdata[i].present = true;
3571 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3572 - cdata[j].devaddr);
3575 if (!cdata[i].present)
3576 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3577 /* Assume that when present, the pointer is already correct. */
3578 if (!n2)
3579 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3580 = devaddrs[i-1];
3581 break;
3582 case GOMP_MAP_TO_PSET:
3583 /* Assume that when present, the pointers are fine and no 'to:'
3584 is required. */
3585 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3586 devaddrs[i], devaddrs[i] + sizes[i],
3587 false);
3588 cdata[i].present = n2 != NULL;
3589 cdata[i].devaddr = devaddrs[i];
3590 if (n2)
3591 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3592 - (n2->tgt->tgt_start + n2->tgt_offset));
3593 else
3595 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3596 devaddrs[i],
3597 devaddrs[i] + sizes[i], false);
3598 if (j < i)
3600 cdata[i].present = true;
3601 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3602 - cdata[j].devaddr);
3605 if (!cdata[i].present)
3607 cdata[i].aligned = true;
3608 size_t align = (size_t) 1 << (kinds[i] >> 8);
3609 devaddrs[i]
3610 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3611 sizes[i]);
3612 gomp_copy_dev2host (devicep, aq,
3613 (void *) (uintptr_t) devaddrs[i],
3614 (void *) (uintptr_t) cdata[i].devaddr,
3615 sizes[i]);
3616 if (aq && !devicep->openacc.async.synchronize_func (aq))
3618 gomp_mutex_unlock (&devicep->lock);
3619 exit (EXIT_FAILURE);
3622 for (j = i + 1; j < mapnum; j++)
3624 kind = get_kind (short_mapkind, kinds, j) & typemask;
3625 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3626 && !GOMP_MAP_POINTER_P (kind))
3627 break;
3628 if (devaddrs[j] < devaddrs[i])
3629 break;
3630 if (cdata[i].present)
3631 continue;
3632 if (devaddrs[j] == 0)
3634 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3635 continue;
3637 int k;
3638 n2 = NULL;
3639 /* Dereference devaddrs[j] to get the device addr. */
3640 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3641 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3642 + sizes[j]);
3643 cdata[j].present = true;
3644 cdata[j].devaddr = devaddrs[j];
3645 if (devaddrs[j] == 0)
3646 continue;
3647 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3648 devaddrs[j],
3649 devaddrs[j] + sizeof (void*),
3650 false);
3651 if (k < j)
3652 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3653 - cdata[k].devaddr);
3654 else
3656 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3657 devaddrs[j],
3658 devaddrs[j] + sizeof (void*),
3659 false);
3660 if (n2 == NULL)
3662 gomp_mutex_unlock (&devicep->lock);
3663 gomp_fatal ("Pointer target wasn't mapped");
3665 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3666 - (n2->tgt->tgt_start + n2->tgt_offset));
3668 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3669 = (void *) (uintptr_t) devaddrs[j];
3671 i = j -1;
3672 break;
3673 case GOMP_MAP_STRUCT:
3674 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3675 devaddrs[i + sizes[i]]
3676 + sizes[i + sizes[i]], false);
3677 cdata[i].present = n2 != NULL;
3678 cdata[i].devaddr = devaddrs[i];
3679 struct_cpy = cdata[i].present ? 0 : sizes[i];
3680 if (!n2)
3682 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3683 - devaddrs[i+1]
3684 + sizes[i + sizes[i]]);
3685 size_t align = (size_t) 1 << (kinds[i] >> 8);
3686 cdata[i].aligned = true;
3687 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3688 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3690 else
3691 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3692 - (n2->tgt->tgt_start + n2->tgt_offset));
3693 break;
3694 default:
3695 gomp_mutex_unlock (&devicep->lock);
3696 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3699 gomp_mutex_unlock (&devicep->lock);
3702 host_fn (devaddrs);
3704 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3706 uint64_t struct_cpy = 0;
3707 bool clean_struct = false;
3708 for (uint64_t i = 0; i < mapnum; i++)
3710 if (cdata[i].devaddr == 0)
3711 continue;
3712 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3713 bool copy = !cdata[i].present || struct_cpy;
3714 switch (kind)
3716 case GOMP_MAP_FORCE_FROM:
3717 case GOMP_MAP_FORCE_TOFROM:
3718 case GOMP_MAP_ALWAYS_FROM:
3719 case GOMP_MAP_ALWAYS_TOFROM:
3720 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3721 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3722 copy = true;
3723 /* FALLTHRU */
3724 case GOMP_MAP_FROM:
3725 case GOMP_MAP_TOFROM:
3726 if (copy)
3728 gomp_copy_host2dev (devicep, aq,
3729 (void *) (uintptr_t) cdata[i].devaddr,
3730 (void *) (uintptr_t) devaddrs[i],
3731 sizes[i], false, NULL);
3732 if (aq && !devicep->openacc.async.synchronize_func (aq))
3733 exit (EXIT_FAILURE);
3735 default:
3736 break;
3738 if (struct_cpy)
3740 struct_cpy--;
3741 continue;
3743 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3745 clean_struct = true;
3746 struct_cpy = sizes[i];
3748 else if (!cdata[i].present && cdata[i].aligned)
3749 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3750 else if (!cdata[i].present)
3751 free ((void *) (uintptr_t) devaddrs[i]);
3753 if (clean_struct)
3754 for (uint64_t i = 0; i < mapnum; i++)
3755 if (!cdata[i].present
3756 && ((get_kind (short_mapkind, kinds, i) & typemask)
3757 == GOMP_MAP_STRUCT))
3759 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3760 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3763 free (devaddrs);
3764 free (sizes);
3765 free (kinds);
3769 /* Host fallback for GOMP_target_data{,_ext} routines. */
3771 static void
3772 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3774 struct gomp_task_icv *icv = gomp_icv (false);
3776 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3777 && devicep != NULL)
3778 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3779 "be used for offloading");
3781 if (icv->target_data)
3783 /* Even when doing a host fallback, if there are any active
3784 #pragma omp target data constructs, need to remember the
3785 new #pragma omp target data, otherwise GOMP_target_end_data
3786 would get out of sync. */
3787 struct target_mem_desc *tgt
3788 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3789 NULL, GOMP_MAP_VARS_DATA);
3790 tgt->prev = icv->target_data;
3791 icv->target_data = tgt;
3795 void
3796 GOMP_target_data (int device, const void *unused, size_t mapnum,
3797 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3799 struct gomp_device_descr *devicep = resolve_device (device, true);
3801 if (devicep == NULL
3802 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3803 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3804 return gomp_target_data_fallback (devicep);
3806 struct target_mem_desc *tgt
3807 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3808 NULL, GOMP_MAP_VARS_DATA);
3809 struct gomp_task_icv *icv = gomp_icv (true);
3810 tgt->prev = icv->target_data;
3811 icv->target_data = tgt;
3814 void
3815 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3816 size_t *sizes, unsigned short *kinds)
3818 struct gomp_device_descr *devicep = resolve_device (device, true);
3820 if (devicep == NULL
3821 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3822 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3823 return gomp_target_data_fallback (devicep);
3825 struct target_mem_desc *tgt
3826 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3827 NULL, GOMP_MAP_VARS_DATA);
3828 struct gomp_task_icv *icv = gomp_icv (true);
3829 tgt->prev = icv->target_data;
3830 icv->target_data = tgt;
3833 void
3834 GOMP_target_end_data (void)
3836 struct gomp_task_icv *icv = gomp_icv (false);
3837 if (icv->target_data)
3839 struct target_mem_desc *tgt = icv->target_data;
3840 icv->target_data = tgt->prev;
3841 gomp_unmap_vars (tgt, true, NULL);
3845 void
3846 GOMP_target_update (int device, const void *unused, size_t mapnum,
3847 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3849 struct gomp_device_descr *devicep = resolve_device (device, true);
3851 if (devicep == NULL
3852 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3853 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3854 return;
3856 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3859 void
3860 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3861 size_t *sizes, unsigned short *kinds,
3862 unsigned int flags, void **depend)
3864 struct gomp_device_descr *devicep = resolve_device (device, true);
3866 /* If there are depend clauses, but nowait is not present,
3867 block the parent task until the dependencies are resolved
3868 and then just continue with the rest of the function as if it
3869 is a merged task. Until we are able to schedule task during
3870 variable mapping or unmapping, ignore nowait if depend clauses
3871 are not present. */
3872 if (depend != NULL)
3874 struct gomp_thread *thr = gomp_thread ();
3875 if (thr->task && thr->task->depend_hash)
3877 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3878 && thr->ts.team
3879 && !thr->task->final_task)
3881 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3882 mapnum, hostaddrs, sizes, kinds,
3883 flags | GOMP_TARGET_FLAG_UPDATE,
3884 depend, NULL, GOMP_TARGET_TASK_DATA))
3885 return;
3887 else
3889 struct gomp_team *team = thr->ts.team;
3890 /* If parallel or taskgroup has been cancelled, don't start new
3891 tasks. */
3892 if (__builtin_expect (gomp_cancel_var, 0) && team)
3894 if (gomp_team_barrier_cancelled (&team->barrier))
3895 return;
3896 if (thr->task->taskgroup)
3898 if (thr->task->taskgroup->cancelled)
3899 return;
3900 if (thr->task->taskgroup->workshare
3901 && thr->task->taskgroup->prev
3902 && thr->task->taskgroup->prev->cancelled)
3903 return;
3907 gomp_task_maybe_wait_for_dependencies (depend);
3912 if (devicep == NULL
3913 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3914 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3915 return;
3917 struct gomp_thread *thr = gomp_thread ();
3918 struct gomp_team *team = thr->ts.team;
3919 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3920 if (__builtin_expect (gomp_cancel_var, 0) && team)
3922 if (gomp_team_barrier_cancelled (&team->barrier))
3923 return;
3924 if (thr->task->taskgroup)
3926 if (thr->task->taskgroup->cancelled)
3927 return;
3928 if (thr->task->taskgroup->workshare
3929 && thr->task->taskgroup->prev
3930 && thr->task->taskgroup->prev->cancelled)
3931 return;
3935 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3938 static void
3939 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3940 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3941 htab_t *refcount_set)
3943 const int typemask = 0xff;
3944 size_t i;
3945 gomp_mutex_lock (&devicep->lock);
3946 if (devicep->state == GOMP_DEVICE_FINALIZED)
3948 gomp_mutex_unlock (&devicep->lock);
3949 return;
3952 for (i = 0; i < mapnum; i++)
3953 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3955 struct splay_tree_key_s cur_node;
3956 cur_node.host_start = (uintptr_t) hostaddrs[i];
3957 cur_node.host_end = cur_node.host_start + sizeof (void *);
3958 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3960 if (n)
3961 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3962 false, NULL);
3965 int nrmvars = 0;
3966 splay_tree_key remove_vars[mapnum];
3968 for (i = 0; i < mapnum; i++)
3970 struct splay_tree_key_s cur_node;
3971 unsigned char kind = kinds[i] & typemask;
3972 switch (kind)
3974 case GOMP_MAP_FROM:
3975 case GOMP_MAP_ALWAYS_FROM:
3976 case GOMP_MAP_DELETE:
3977 case GOMP_MAP_RELEASE:
3978 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3979 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3980 cur_node.host_start = (uintptr_t) hostaddrs[i];
3981 cur_node.host_end = cur_node.host_start + sizes[i];
3982 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3983 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3984 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3985 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3986 if (!k)
3987 continue;
3989 bool delete_p = (kind == GOMP_MAP_DELETE
3990 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3991 bool do_copy, do_remove;
3992 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3993 &do_remove);
3995 if ((kind == GOMP_MAP_FROM && do_copy)
3996 || kind == GOMP_MAP_ALWAYS_FROM)
3998 if (k->aux && k->aux->attach_count)
4000 /* We have to be careful not to overwrite still attached
4001 pointers during the copyback to host. */
4002 uintptr_t addr = k->host_start;
4003 while (addr < k->host_end)
4005 size_t i = (addr - k->host_start) / sizeof (void *);
4006 if (k->aux->attach_count[i] == 0)
4007 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4008 (void *) (k->tgt->tgt_start
4009 + k->tgt_offset
4010 + addr - k->host_start),
4011 sizeof (void *));
4012 addr += sizeof (void *);
4015 else
4016 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4017 (void *) (k->tgt->tgt_start + k->tgt_offset
4018 + cur_node.host_start
4019 - k->host_start),
4020 cur_node.host_end - cur_node.host_start);
4023 /* Structure elements lists are removed altogether at once, which
4024 may cause immediate deallocation of the target_mem_desc, causing
4025 errors if we still have following element siblings to copy back.
4026 While we're at it, it also seems more disciplined to simply
4027 queue all removals together for processing below.
4029 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4030 not have this problem, since they maintain an additional
4031 tgt->refcount = 1 reference to the target_mem_desc to start with.
4033 if (do_remove)
4034 remove_vars[nrmvars++] = k;
4035 break;
4037 case GOMP_MAP_DETACH:
4038 break;
4039 default:
4040 gomp_mutex_unlock (&devicep->lock);
4041 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4042 kind);
4046 for (int i = 0; i < nrmvars; i++)
4047 gomp_remove_var (devicep, remove_vars[i]);
4049 gomp_mutex_unlock (&devicep->lock);
4052 void
4053 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4054 size_t *sizes, unsigned short *kinds,
4055 unsigned int flags, void **depend)
4057 struct gomp_device_descr *devicep = resolve_device (device, true);
4059 /* If there are depend clauses, but nowait is not present,
4060 block the parent task until the dependencies are resolved
4061 and then just continue with the rest of the function as if it
4062 is a merged task. Until we are able to schedule task during
4063 variable mapping or unmapping, ignore nowait if depend clauses
4064 are not present. */
4065 if (depend != NULL)
4067 struct gomp_thread *thr = gomp_thread ();
4068 if (thr->task && thr->task->depend_hash)
4070 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4071 && thr->ts.team
4072 && !thr->task->final_task)
4074 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4075 mapnum, hostaddrs, sizes, kinds,
4076 flags, depend, NULL,
4077 GOMP_TARGET_TASK_DATA))
4078 return;
4080 else
4082 struct gomp_team *team = thr->ts.team;
4083 /* If parallel or taskgroup has been cancelled, don't start new
4084 tasks. */
4085 if (__builtin_expect (gomp_cancel_var, 0) && team)
4087 if (gomp_team_barrier_cancelled (&team->barrier))
4088 return;
4089 if (thr->task->taskgroup)
4091 if (thr->task->taskgroup->cancelled)
4092 return;
4093 if (thr->task->taskgroup->workshare
4094 && thr->task->taskgroup->prev
4095 && thr->task->taskgroup->prev->cancelled)
4096 return;
4100 gomp_task_maybe_wait_for_dependencies (depend);
4105 if (devicep == NULL
4106 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4107 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4108 return;
4110 struct gomp_thread *thr = gomp_thread ();
4111 struct gomp_team *team = thr->ts.team;
4112 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4113 if (__builtin_expect (gomp_cancel_var, 0) && team)
4115 if (gomp_team_barrier_cancelled (&team->barrier))
4116 return;
4117 if (thr->task->taskgroup)
4119 if (thr->task->taskgroup->cancelled)
4120 return;
4121 if (thr->task->taskgroup->workshare
4122 && thr->task->taskgroup->prev
4123 && thr->task->taskgroup->prev->cancelled)
4124 return;
4128 htab_t refcount_set = htab_create (mapnum);
4130 /* The variables are mapped separately such that they can be released
4131 independently. */
4132 size_t i, j;
4133 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4134 for (i = 0; i < mapnum; i++)
4135 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4137 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4138 &kinds[i], true, &refcount_set,
4139 GOMP_MAP_VARS_ENTER_DATA);
4140 i += sizes[i];
4142 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4144 for (j = i + 1; j < mapnum; j++)
4145 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4146 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4147 break;
4148 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4149 &kinds[i], true, &refcount_set,
4150 GOMP_MAP_VARS_ENTER_DATA);
4151 i += j - i - 1;
4153 else if (i + 1 < mapnum
4154 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4155 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4156 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4158 /* An attach operation must be processed together with the mapped
4159 base-pointer list item. */
4160 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4161 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4162 i += 1;
4164 else
4165 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4166 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4167 else
4168 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4169 htab_free (refcount_set);
4172 bool
4173 gomp_target_task_fn (void *data)
4175 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4176 struct gomp_device_descr *devicep = ttask->devicep;
4178 if (ttask->fn != NULL)
4180 void *fn_addr;
4181 if (devicep == NULL
4182 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4183 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4184 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4186 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4187 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4188 ttask->args);
4189 return false;
4192 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4194 if (ttask->tgt)
4195 gomp_unmap_vars (ttask->tgt, true, NULL);
4196 return false;
4199 void *actual_arguments;
4200 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4202 ttask->tgt = NULL;
4203 actual_arguments = ttask->hostaddrs;
4205 else
4207 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4208 NULL, ttask->sizes, ttask->kinds, true,
4209 NULL, GOMP_MAP_VARS_TARGET);
4210 actual_arguments = (void *) ttask->tgt->tgt_start;
4212 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4214 assert (devicep->async_run_func);
4215 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4216 ttask->args, (void *) ttask);
4217 return true;
4219 else if (devicep == NULL
4220 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4221 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4222 return false;
4224 size_t i;
4225 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4226 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4227 ttask->kinds, true);
4228 else
4230 htab_t refcount_set = htab_create (ttask->mapnum);
4231 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4232 for (i = 0; i < ttask->mapnum; i++)
4233 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4235 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4236 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4237 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4238 i += ttask->sizes[i];
4240 else
4241 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4242 &ttask->kinds[i], true, &refcount_set,
4243 GOMP_MAP_VARS_ENTER_DATA);
4244 else
4245 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4246 ttask->kinds, &refcount_set);
4247 htab_free (refcount_set);
4249 return false;
4252 void
4253 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4255 if (thread_limit)
4257 struct gomp_task_icv *icv = gomp_icv (true);
4258 icv->thread_limit_var
4259 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4261 (void) num_teams;
4264 bool
4265 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4266 unsigned int thread_limit, bool first)
4268 struct gomp_thread *thr = gomp_thread ();
4269 if (first)
4271 if (thread_limit)
4273 struct gomp_task_icv *icv = gomp_icv (true);
4274 icv->thread_limit_var
4275 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4277 (void) num_teams_high;
4278 if (num_teams_low == 0)
4279 num_teams_low = 1;
4280 thr->num_teams = num_teams_low - 1;
4281 thr->team_num = 0;
4283 else if (thr->team_num == thr->num_teams)
4284 return false;
4285 else
4286 ++thr->team_num;
4287 return true;
4290 void *
4291 omp_target_alloc (size_t size, int device_num)
4293 if (device_num == omp_initial_device
4294 || device_num == gomp_get_num_devices ())
4295 return malloc (size);
4297 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4298 if (devicep == NULL)
4299 return NULL;
4301 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4302 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4303 return malloc (size);
4305 gomp_mutex_lock (&devicep->lock);
4306 void *ret = devicep->alloc_func (devicep->target_id, size);
4307 gomp_mutex_unlock (&devicep->lock);
4308 return ret;
4311 void
4312 omp_target_free (void *device_ptr, int device_num)
4314 if (device_num == omp_initial_device
4315 || device_num == gomp_get_num_devices ())
4317 free (device_ptr);
4318 return;
4321 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4322 if (devicep == NULL || device_ptr == NULL)
4323 return;
4325 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4326 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4328 free (device_ptr);
4329 return;
4332 gomp_mutex_lock (&devicep->lock);
4333 gomp_free_device_memory (devicep, device_ptr);
4334 gomp_mutex_unlock (&devicep->lock);
4338 omp_target_is_present (const void *ptr, int device_num)
4340 if (device_num == omp_initial_device
4341 || device_num == gomp_get_num_devices ())
4342 return 1;
4344 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4345 if (devicep == NULL)
4346 return 0;
4348 if (ptr == NULL)
4349 return 1;
4351 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4352 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4353 return 1;
4355 gomp_mutex_lock (&devicep->lock);
4356 struct splay_tree_s *mem_map = &devicep->mem_map;
4357 struct splay_tree_key_s cur_node;
4359 cur_node.host_start = (uintptr_t) ptr;
4360 cur_node.host_end = cur_node.host_start;
4361 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4362 int ret = n != NULL;
4363 gomp_mutex_unlock (&devicep->lock);
4364 return ret;
4367 static int
4368 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4369 struct gomp_device_descr **dst_devicep,
4370 struct gomp_device_descr **src_devicep)
4372 if (dst_device_num != gomp_get_num_devices ()
4373 /* Above gomp_get_num_devices has to be called unconditionally. */
4374 && dst_device_num != omp_initial_device)
4376 *dst_devicep = resolve_device (dst_device_num, false);
4377 if (*dst_devicep == NULL)
4378 return EINVAL;
4380 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4381 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4382 *dst_devicep = NULL;
4385 if (src_device_num != num_devices_openmp
4386 && src_device_num != omp_initial_device)
4388 *src_devicep = resolve_device (src_device_num, false);
4389 if (*src_devicep == NULL)
4390 return EINVAL;
4392 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4393 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4394 *src_devicep = NULL;
4397 return 0;
4400 static int
4401 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4402 size_t dst_offset, size_t src_offset,
4403 struct gomp_device_descr *dst_devicep,
4404 struct gomp_device_descr *src_devicep)
4406 bool ret;
4407 if (src_devicep == NULL && dst_devicep == NULL)
4409 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4410 return 0;
4412 if (src_devicep == NULL)
4414 gomp_mutex_lock (&dst_devicep->lock);
4415 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4416 (char *) dst + dst_offset,
4417 (char *) src + src_offset, length);
4418 gomp_mutex_unlock (&dst_devicep->lock);
4419 return (ret ? 0 : EINVAL);
4421 if (dst_devicep == NULL)
4423 gomp_mutex_lock (&src_devicep->lock);
4424 ret = src_devicep->dev2host_func (src_devicep->target_id,
4425 (char *) dst + dst_offset,
4426 (char *) src + src_offset, length);
4427 gomp_mutex_unlock (&src_devicep->lock);
4428 return (ret ? 0 : EINVAL);
4430 if (src_devicep == dst_devicep)
4432 gomp_mutex_lock (&src_devicep->lock);
4433 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4434 (char *) dst + dst_offset,
4435 (char *) src + src_offset, length);
4436 gomp_mutex_unlock (&src_devicep->lock);
4437 return (ret ? 0 : EINVAL);
4439 return EINVAL;
4443 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4444 size_t src_offset, int dst_device_num, int src_device_num)
4446 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4447 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4448 &dst_devicep, &src_devicep);
4450 if (ret)
4451 return ret;
4453 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4454 dst_devicep, src_devicep);
4456 return ret;
4459 typedef struct
4461 void *dst;
4462 const void *src;
4463 size_t length;
4464 size_t dst_offset;
4465 size_t src_offset;
4466 struct gomp_device_descr *dst_devicep;
4467 struct gomp_device_descr *src_devicep;
4468 } omp_target_memcpy_data;
4470 static void
4471 omp_target_memcpy_async_helper (void *args)
4473 omp_target_memcpy_data *a = args;
4474 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4475 a->src_offset, a->dst_devicep, a->src_devicep))
4476 gomp_fatal ("omp_target_memcpy failed");
4480 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4481 size_t dst_offset, size_t src_offset,
4482 int dst_device_num, int src_device_num,
4483 int depobj_count, omp_depend_t *depobj_list)
4485 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4486 unsigned int flags = 0;
4487 void *depend[depobj_count + 5];
4488 int i;
4489 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4490 &dst_devicep, &src_devicep);
4492 omp_target_memcpy_data s = {
4493 .dst = dst,
4494 .src = src,
4495 .length = length,
4496 .dst_offset = dst_offset,
4497 .src_offset = src_offset,
4498 .dst_devicep = dst_devicep,
4499 .src_devicep = src_devicep
4502 if (check)
4503 return check;
4505 if (depobj_count > 0 && depobj_list != NULL)
4507 flags |= GOMP_TASK_FLAG_DEPEND;
4508 depend[0] = 0;
4509 depend[1] = (void *) (uintptr_t) depobj_count;
4510 depend[2] = depend[3] = depend[4] = 0;
4511 for (i = 0; i < depobj_count; ++i)
4512 depend[i + 5] = &depobj_list[i];
4515 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4516 __alignof__ (s), true, flags, depend, 0, NULL);
4518 return 0;
4521 static int
4522 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4523 int num_dims, const size_t *volume,
4524 const size_t *dst_offsets,
4525 const size_t *src_offsets,
4526 const size_t *dst_dimensions,
4527 const size_t *src_dimensions,
4528 struct gomp_device_descr *dst_devicep,
4529 struct gomp_device_descr *src_devicep,
4530 size_t *tmp_size, void **tmp)
4532 size_t dst_slice = element_size;
4533 size_t src_slice = element_size;
4534 size_t j, dst_off, src_off, length;
4535 int i, ret;
4537 if (num_dims == 1)
4539 if (__builtin_mul_overflow (element_size, volume[0], &length)
4540 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4541 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4542 return EINVAL;
4543 if (dst_devicep == NULL && src_devicep == NULL)
4545 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4546 length);
4547 ret = 1;
4549 else if (src_devicep == NULL)
4550 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4551 (char *) dst + dst_off,
4552 (const char *) src + src_off,
4553 length);
4554 else if (dst_devicep == NULL)
4555 ret = src_devicep->dev2host_func (src_devicep->target_id,
4556 (char *) dst + dst_off,
4557 (const char *) src + src_off,
4558 length);
4559 else if (src_devicep == dst_devicep)
4560 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4561 (char *) dst + dst_off,
4562 (const char *) src + src_off,
4563 length);
4564 else
4566 if (*tmp_size == 0)
4568 *tmp_size = length;
4569 *tmp = malloc (length);
4570 if (*tmp == NULL)
4571 return ENOMEM;
4573 else if (*tmp_size < length)
4575 *tmp_size = length;
4576 free (*tmp);
4577 *tmp = malloc (length);
4578 if (*tmp == NULL)
4579 return ENOMEM;
4581 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4582 (const char *) src + src_off,
4583 length);
4584 if (ret == 1)
4585 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4586 (char *) dst + dst_off, *tmp,
4587 length);
4589 return ret ? 0 : EINVAL;
4592 /* host->device, device->host and intra device. */
4593 if (num_dims == 2
4594 && ((src_devicep
4595 && src_devicep == dst_devicep
4596 && src_devicep->memcpy2d_func)
4597 || (!src_devicep != !dst_devicep
4598 && ((src_devicep && src_devicep->memcpy2d_func)
4599 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4601 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4602 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4603 int src_id = src_devicep ? src_devicep->target_id : -1;
4604 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4606 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4607 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4608 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4609 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4610 || __builtin_mul_overflow (src_offsets[1], element_size,
4611 &src_off_sz1))
4612 return EINVAL;
4613 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4614 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4615 src, src_off_sz1, src_offsets[0], src_sz1);
4616 if (ret != -1)
4617 return ret ? 0 : EINVAL;
4619 else if (num_dims == 3
4620 && ((src_devicep
4621 && src_devicep == dst_devicep
4622 && src_devicep->memcpy3d_func)
4623 || (!src_devicep != !dst_devicep
4624 && ((src_devicep && src_devicep->memcpy3d_func)
4625 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4627 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4628 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4629 int src_id = src_devicep ? src_devicep->target_id : -1;
4630 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4632 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4633 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4634 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4635 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4636 || __builtin_mul_overflow (src_offsets[2], element_size,
4637 &src_off_sz2))
4638 return EINVAL;
4639 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4640 dst, dst_off_sz2, dst_offsets[1],
4641 dst_offsets[0], dst_sz2, dst_dimensions[1],
4642 src, src_off_sz2, src_offsets[1],
4643 src_offsets[0], src_sz2, src_dimensions[1]);
4644 if (ret != -1)
4645 return ret ? 0 : EINVAL;
4648 for (i = 1; i < num_dims; i++)
4649 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4650 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4651 return EINVAL;
4652 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4653 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4654 return EINVAL;
4655 for (j = 0; j < volume[0]; j++)
4657 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4658 (const char *) src + src_off,
4659 element_size, num_dims - 1,
4660 volume + 1, dst_offsets + 1,
4661 src_offsets + 1, dst_dimensions + 1,
4662 src_dimensions + 1, dst_devicep,
4663 src_devicep, tmp_size, tmp);
4664 if (ret)
4665 return ret;
4666 dst_off += dst_slice;
4667 src_off += src_slice;
4669 return 0;
4672 static int
4673 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4674 int src_device_num,
4675 struct gomp_device_descr **dst_devicep,
4676 struct gomp_device_descr **src_devicep)
4678 if (!dst && !src)
4679 return INT_MAX;
4681 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4682 dst_devicep, src_devicep);
4683 if (ret)
4684 return ret;
4686 return 0;
4689 static int
4690 omp_target_memcpy_rect_copy (void *dst, const void *src,
4691 size_t element_size, int num_dims,
4692 const size_t *volume, const size_t *dst_offsets,
4693 const size_t *src_offsets,
4694 const size_t *dst_dimensions,
4695 const size_t *src_dimensions,
4696 struct gomp_device_descr *dst_devicep,
4697 struct gomp_device_descr *src_devicep)
4699 size_t tmp_size = 0;
4700 void *tmp = NULL;
4701 bool lock_src;
4702 bool lock_dst;
4704 lock_src = src_devicep != NULL;
4705 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4706 if (lock_src)
4707 gomp_mutex_lock (&src_devicep->lock);
4708 if (lock_dst)
4709 gomp_mutex_lock (&dst_devicep->lock);
4710 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4711 volume, dst_offsets, src_offsets,
4712 dst_dimensions, src_dimensions,
4713 dst_devicep, src_devicep,
4714 &tmp_size, &tmp);
4715 if (lock_src)
4716 gomp_mutex_unlock (&src_devicep->lock);
4717 if (lock_dst)
4718 gomp_mutex_unlock (&dst_devicep->lock);
4719 if (tmp)
4720 free (tmp);
4722 return ret;
4726 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4727 int num_dims, const size_t *volume,
4728 const size_t *dst_offsets,
4729 const size_t *src_offsets,
4730 const size_t *dst_dimensions,
4731 const size_t *src_dimensions,
4732 int dst_device_num, int src_device_num)
4734 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4736 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4737 src_device_num, &dst_devicep,
4738 &src_devicep);
4740 if (check)
4741 return check;
4743 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4744 volume, dst_offsets, src_offsets,
4745 dst_dimensions, src_dimensions,
4746 dst_devicep, src_devicep);
4748 return ret;
4751 typedef struct
4753 void *dst;
4754 const void *src;
4755 size_t element_size;
4756 const size_t *volume;
4757 const size_t *dst_offsets;
4758 const size_t *src_offsets;
4759 const size_t *dst_dimensions;
4760 const size_t *src_dimensions;
4761 struct gomp_device_descr *dst_devicep;
4762 struct gomp_device_descr *src_devicep;
4763 int num_dims;
4764 } omp_target_memcpy_rect_data;
4766 static void
4767 omp_target_memcpy_rect_async_helper (void *args)
4769 omp_target_memcpy_rect_data *a = args;
4770 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4771 a->num_dims, a->volume, a->dst_offsets,
4772 a->src_offsets, a->dst_dimensions,
4773 a->src_dimensions, a->dst_devicep,
4774 a->src_devicep);
4775 if (ret)
4776 gomp_fatal ("omp_target_memcpy_rect failed");
4780 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4781 int num_dims, const size_t *volume,
4782 const size_t *dst_offsets,
4783 const size_t *src_offsets,
4784 const size_t *dst_dimensions,
4785 const size_t *src_dimensions,
4786 int dst_device_num, int src_device_num,
4787 int depobj_count, omp_depend_t *depobj_list)
4789 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4790 unsigned flags = 0;
4791 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4792 src_device_num, &dst_devicep,
4793 &src_devicep);
4794 void *depend[depobj_count + 5];
4795 int i;
4797 omp_target_memcpy_rect_data s = {
4798 .dst = dst,
4799 .src = src,
4800 .element_size = element_size,
4801 .num_dims = num_dims,
4802 .volume = volume,
4803 .dst_offsets = dst_offsets,
4804 .src_offsets = src_offsets,
4805 .dst_dimensions = dst_dimensions,
4806 .src_dimensions = src_dimensions,
4807 .dst_devicep = dst_devicep,
4808 .src_devicep = src_devicep
4811 if (check)
4812 return check;
4814 if (depobj_count > 0 && depobj_list != NULL)
4816 flags |= GOMP_TASK_FLAG_DEPEND;
4817 depend[0] = 0;
4818 depend[1] = (void *) (uintptr_t) depobj_count;
4819 depend[2] = depend[3] = depend[4] = 0;
4820 for (i = 0; i < depobj_count; ++i)
4821 depend[i + 5] = &depobj_list[i];
4824 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4825 __alignof__ (s), true, flags, depend, 0, NULL);
4827 return 0;
4831 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4832 size_t size, size_t device_offset, int device_num)
4834 if (device_num == omp_initial_device
4835 || device_num == gomp_get_num_devices ())
4836 return EINVAL;
4838 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4839 if (devicep == NULL)
4840 return EINVAL;
4842 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4843 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4844 return EINVAL;
4846 gomp_mutex_lock (&devicep->lock);
4848 struct splay_tree_s *mem_map = &devicep->mem_map;
4849 struct splay_tree_key_s cur_node;
4850 int ret = EINVAL;
4852 cur_node.host_start = (uintptr_t) host_ptr;
4853 cur_node.host_end = cur_node.host_start + size;
4854 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4855 if (n)
4857 if (n->tgt->tgt_start + n->tgt_offset
4858 == (uintptr_t) device_ptr + device_offset
4859 && n->host_start <= cur_node.host_start
4860 && n->host_end >= cur_node.host_end)
4861 ret = 0;
4863 else
4865 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4866 tgt->array = gomp_malloc (sizeof (*tgt->array));
4867 tgt->refcount = 1;
4868 tgt->tgt_start = 0;
4869 tgt->tgt_end = 0;
4870 tgt->to_free = NULL;
4871 tgt->prev = NULL;
4872 tgt->list_count = 0;
4873 tgt->device_descr = devicep;
4874 splay_tree_node array = tgt->array;
4875 splay_tree_key k = &array->key;
4876 k->host_start = cur_node.host_start;
4877 k->host_end = cur_node.host_end;
4878 k->tgt = tgt;
4879 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4880 k->refcount = REFCOUNT_INFINITY;
4881 k->dynamic_refcount = 0;
4882 k->aux = NULL;
4883 array->left = NULL;
4884 array->right = NULL;
4885 splay_tree_insert (&devicep->mem_map, array);
4886 ret = 0;
4888 gomp_mutex_unlock (&devicep->lock);
4889 return ret;
4893 omp_target_disassociate_ptr (const void *ptr, int device_num)
4895 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4896 if (devicep == NULL)
4897 return EINVAL;
4899 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4900 return EINVAL;
4902 gomp_mutex_lock (&devicep->lock);
4904 struct splay_tree_s *mem_map = &devicep->mem_map;
4905 struct splay_tree_key_s cur_node;
4906 int ret = EINVAL;
4908 cur_node.host_start = (uintptr_t) ptr;
4909 cur_node.host_end = cur_node.host_start;
4910 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4911 if (n
4912 && n->host_start == cur_node.host_start
4913 && n->refcount == REFCOUNT_INFINITY
4914 && n->tgt->tgt_start == 0
4915 && n->tgt->to_free == NULL
4916 && n->tgt->refcount == 1
4917 && n->tgt->list_count == 0)
4919 splay_tree_remove (&devicep->mem_map, n);
4920 gomp_unmap_tgt (n->tgt);
4921 ret = 0;
4924 gomp_mutex_unlock (&devicep->lock);
4925 return ret;
4928 void *
4929 omp_get_mapped_ptr (const void *ptr, int device_num)
4931 if (device_num == omp_initial_device
4932 || device_num == omp_get_initial_device ())
4933 return (void *) ptr;
4935 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4936 if (devicep == NULL)
4937 return NULL;
4939 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4940 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4941 return (void *) ptr;
4943 gomp_mutex_lock (&devicep->lock);
4945 struct splay_tree_s *mem_map = &devicep->mem_map;
4946 struct splay_tree_key_s cur_node;
4947 void *ret = NULL;
4949 cur_node.host_start = (uintptr_t) ptr;
4950 cur_node.host_end = cur_node.host_start;
4951 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4953 if (n)
4955 uintptr_t offset = cur_node.host_start - n->host_start;
4956 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4959 gomp_mutex_unlock (&devicep->lock);
4961 return ret;
4965 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4967 if (device_num == omp_initial_device
4968 || device_num == gomp_get_num_devices ())
4969 return true;
4971 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4972 if (devicep == NULL)
4973 return false;
4975 /* TODO: Unified shared memory must be handled when available. */
4977 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4981 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4983 (void) kind;
4984 if (device_num == omp_initial_device
4985 || device_num == gomp_get_num_devices ())
4986 return gomp_pause_host ();
4988 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4989 if (devicep == NULL)
4990 return -1;
4992 /* Do nothing for target devices for now. */
4993 return 0;
4997 omp_pause_resource_all (omp_pause_resource_t kind)
4999 (void) kind;
5000 if (gomp_pause_host ())
5001 return -1;
5002 /* Do nothing for target devices for now. */
5003 return 0;
5006 ialias (omp_pause_resource)
5007 ialias (omp_pause_resource_all)
5009 #ifdef PLUGIN_SUPPORT
5011 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5012 in PLUGIN_NAME.
5013 The handles of the found functions are stored in the corresponding fields
5014 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5016 static bool
5017 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5018 const char *plugin_name)
5020 const char *err = NULL, *last_missing = NULL;
5022 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5023 if (!plugin_handle)
5024 #if OFFLOAD_DEFAULTED
5025 return 0;
5026 #else
5027 goto dl_fail;
5028 #endif
5030 /* Check if all required functions are available in the plugin and store
5031 their handlers. None of the symbols can legitimately be NULL,
5032 so we don't need to check dlerror all the time. */
5033 #define DLSYM(f) \
5034 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5035 goto dl_fail
5036 /* Similar, but missing functions are not an error. Return false if
5037 failed, true otherwise. */
5038 #define DLSYM_OPT(f, n) \
5039 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5040 || (last_missing = #n, 0))
5042 DLSYM (version);
5043 if (device->version_func () != GOMP_VERSION)
5045 err = "plugin version mismatch";
5046 goto fail;
5049 DLSYM (get_name);
5050 DLSYM (get_caps);
5051 DLSYM (get_type);
5052 DLSYM (get_num_devices);
5053 DLSYM (init_device);
5054 DLSYM (fini_device);
5055 DLSYM (load_image);
5056 DLSYM (unload_image);
5057 DLSYM (alloc);
5058 DLSYM (free);
5059 DLSYM (dev2host);
5060 DLSYM (host2dev);
5061 DLSYM_OPT (memcpy2d, memcpy2d);
5062 DLSYM_OPT (memcpy3d, memcpy3d);
5063 device->capabilities = device->get_caps_func ();
5064 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5066 DLSYM (run);
5067 DLSYM_OPT (async_run, async_run);
5068 DLSYM_OPT (can_run, can_run);
5069 DLSYM (dev2dev);
5071 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5073 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5074 || !DLSYM_OPT (openacc.create_thread_data,
5075 openacc_create_thread_data)
5076 || !DLSYM_OPT (openacc.destroy_thread_data,
5077 openacc_destroy_thread_data)
5078 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5079 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5080 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5081 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5082 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5083 || !DLSYM_OPT (openacc.async.queue_callback,
5084 openacc_async_queue_callback)
5085 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5086 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5087 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5088 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5090 /* Require all the OpenACC handlers if we have
5091 GOMP_OFFLOAD_CAP_OPENACC_200. */
5092 err = "plugin missing OpenACC handler function";
5093 goto fail;
5096 unsigned cuda = 0;
5097 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5098 openacc_cuda_get_current_device);
5099 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5100 openacc_cuda_get_current_context);
5101 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5102 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5103 if (cuda && cuda != 4)
5105 /* Make sure all the CUDA functions are there if any of them are. */
5106 err = "plugin missing OpenACC CUDA handler function";
5107 goto fail;
5110 #undef DLSYM
5111 #undef DLSYM_OPT
5113 return 1;
5115 dl_fail:
5116 err = dlerror ();
5117 fail:
5118 gomp_error ("while loading %s: %s", plugin_name, err);
5119 if (last_missing)
5120 gomp_error ("missing function was %s", last_missing);
5121 if (plugin_handle)
5122 dlclose (plugin_handle);
5124 return 0;
5127 /* This function finalizes all initialized devices. */
5129 static void
5130 gomp_target_fini (void)
5132 int i;
5133 for (i = 0; i < num_devices; i++)
5135 bool ret = true;
5136 struct gomp_device_descr *devicep = &devices[i];
5137 gomp_mutex_lock (&devicep->lock);
5138 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5139 ret = gomp_fini_device (devicep);
5140 gomp_mutex_unlock (&devicep->lock);
5141 if (!ret)
5142 gomp_fatal ("device finalization failed");
5146 /* This function initializes the runtime for offloading.
5147 It parses the list of offload plugins, and tries to load these.
5148 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5149 will be set, and the array DEVICES initialized, containing descriptors for
5150 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5151 by the others. */
5153 static void
5154 gomp_target_init (void)
5156 const char *prefix ="libgomp-plugin-";
5157 const char *suffix = SONAME_SUFFIX (1);
5158 const char *cur, *next;
5159 char *plugin_name;
5160 int i, new_num_devs;
5161 int num_devs = 0, num_devs_openmp;
5162 struct gomp_device_descr *devs = NULL;
5164 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5165 return;
5167 cur = OFFLOAD_PLUGINS;
5168 if (*cur)
5171 struct gomp_device_descr current_device;
5172 size_t prefix_len, suffix_len, cur_len;
5174 next = strchr (cur, ',');
5176 prefix_len = strlen (prefix);
5177 cur_len = next ? next - cur : strlen (cur);
5178 suffix_len = strlen (suffix);
5180 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5181 if (!plugin_name)
5183 num_devs = 0;
5184 break;
5187 memcpy (plugin_name, prefix, prefix_len);
5188 memcpy (plugin_name + prefix_len, cur, cur_len);
5189 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5191 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5193 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5194 new_num_devs = current_device.get_num_devices_func (omp_req);
5195 if (gomp_debug_var > 0 && new_num_devs < 0)
5197 bool found = false;
5198 int type = current_device.get_type_func ();
5199 for (int img = 0; img < num_offload_images; img++)
5200 if (type == offload_images[img].type)
5201 found = true;
5202 if (found)
5204 char buf[sizeof ("unified_address, unified_shared_memory, "
5205 "reverse_offload")];
5206 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5207 char *name = (char *) malloc (cur_len + 1);
5208 memcpy (name, cur, cur_len);
5209 name[cur_len] = '\0';
5210 gomp_debug (1,
5211 "%s devices present but 'omp requires %s' "
5212 "cannot be fulfilled\n", name, buf);
5213 free (name);
5216 else if (new_num_devs >= 1)
5218 /* Augment DEVICES and NUM_DEVICES. */
5220 devs = realloc (devs, (num_devs + new_num_devs)
5221 * sizeof (struct gomp_device_descr));
5222 if (!devs)
5224 num_devs = 0;
5225 free (plugin_name);
5226 break;
5229 current_device.name = current_device.get_name_func ();
5230 /* current_device.capabilities has already been set. */
5231 current_device.type = current_device.get_type_func ();
5232 current_device.mem_map.root = NULL;
5233 current_device.mem_map_rev.root = NULL;
5234 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5235 for (i = 0; i < new_num_devs; i++)
5237 current_device.target_id = i;
5238 devs[num_devs] = current_device;
5239 gomp_mutex_init (&devs[num_devs].lock);
5240 num_devs++;
5245 free (plugin_name);
5246 cur = next + 1;
5248 while (next);
5250 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5251 NUM_DEVICES_OPENMP. */
5252 struct gomp_device_descr *devs_s
5253 = malloc (num_devs * sizeof (struct gomp_device_descr));
5254 if (!devs_s)
5256 num_devs = 0;
5257 free (devs);
5258 devs = NULL;
5260 num_devs_openmp = 0;
5261 for (i = 0; i < num_devs; i++)
5262 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5263 devs_s[num_devs_openmp++] = devs[i];
5264 int num_devs_after_openmp = num_devs_openmp;
5265 for (i = 0; i < num_devs; i++)
5266 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5267 devs_s[num_devs_after_openmp++] = devs[i];
5268 free (devs);
5269 devs = devs_s;
5271 for (i = 0; i < num_devs; i++)
5273 /* The 'devices' array can be moved (by the realloc call) until we have
5274 found all the plugins, so registering with the OpenACC runtime (which
5275 takes a copy of the pointer argument) must be delayed until now. */
5276 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5277 goacc_register (&devs[i]);
5279 if (gomp_global_icv.default_device_var == INT_MIN)
5281 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5282 struct gomp_icv_list *none;
5283 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5284 gomp_global_icv.default_device_var = (num_devs_openmp
5285 ? 0 : omp_invalid_device);
5286 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5289 num_devices = num_devs;
5290 num_devices_openmp = num_devs_openmp;
5291 devices = devs;
5292 if (atexit (gomp_target_fini) != 0)
5293 gomp_fatal ("atexit failed");
5296 #else /* PLUGIN_SUPPORT */
5297 /* If dlfcn.h is unavailable we always fallback to host execution.
5298 GOMP_target* routines are just stubs for this case. */
5299 static void
5300 gomp_target_init (void)
5303 #endif /* PLUGIN_SUPPORT */