tree-optimization/113080 - missing final value replacement
[official-gcc.git] / libgomp / target.c
blob434d40d4b57a24092e6ad8672457ba8076ce42e2
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
51 #define splay_tree_c
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type;
56 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
57 static inline void htab_free (void *ptr) { free (ptr); }
58 #include "hashtab.h"
60 ialias_redirect (GOMP_task)
62 static inline hashval_t
63 htab_hash (hash_entry_type element)
65 return hash_pointer ((void *) element);
68 static inline bool
69 htab_eq (hash_entry_type x, hash_entry_type y)
71 return x == y;
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr {
88 unsigned version;
89 enum offload_target_type type;
90 const void *host_table;
91 const void *target_data;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr *offload_images;
97 /* Total number of offload images. */
98 static int num_offload_images;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr *devices;
103 /* Total number of available devices. */
104 static int num_devices;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 static void *
115 gomp_realloc_unlock (void *old, size_t size)
117 void *ret = realloc (old, size);
118 if (ret == NULL)
120 gomp_mutex_unlock (&register_lock);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
123 return ret;
126 attribute_hidden void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
132 attribute_hidden int
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp;
139 static struct gomp_device_descr *
140 resolve_device (int device_id, bool remapped)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices = gomp_get_num_devices ();
146 if (remapped && device_id == GOMP_DEVICE_ICV)
148 struct gomp_task_icv *icv = gomp_icv (false);
149 device_id = icv->default_device_var;
150 remapped = false;
153 if (device_id < 0)
155 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device))
157 return NULL;
158 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
159 && num_devices == 0)
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id == omp_invalid_device)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
168 return NULL;
170 else if (device_id >= num_devices)
172 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id != num_devices)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
177 return NULL;
180 gomp_mutex_lock (&devices[device_id].lock);
181 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
182 gomp_init_device (&devices[device_id]);
183 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
185 gomp_mutex_unlock (&devices[device_id].lock);
187 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
191 return NULL;
193 gomp_mutex_unlock (&devices[device_id].lock);
195 return &devices[device_id];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
202 if (key->host_start != key->host_end)
203 return splay_tree_lookup (mem_map, key);
205 key->host_end++;
206 splay_tree_key n = splay_tree_lookup (mem_map, key);
207 key->host_end--;
208 if (n)
209 return n;
210 key->host_start--;
211 n = splay_tree_lookup (mem_map, key);
212 key->host_start++;
213 if (n)
214 return n;
215 return splay_tree_lookup (mem_map, key);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
221 return reverse_splay_tree_lookup (mem_map_rev, key);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
227 if (key->host_start != key->host_end)
228 return splay_tree_lookup (mem_map, key);
230 key->host_end++;
231 splay_tree_key n = splay_tree_lookup (mem_map, key);
232 key->host_end--;
233 return n;
236 static inline void
237 gomp_device_copy (struct gomp_device_descr *devicep,
238 bool (*copy_func) (int, void *, const void *, size_t),
239 const char *dst, void *dstaddr,
240 const char *src, const void *srcaddr,
241 size_t size)
243 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
245 gomp_mutex_unlock (&devicep->lock);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
251 static inline void
252 goacc_device_copy_async (struct gomp_device_descr *devicep,
253 bool (*copy_func) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue *),
255 const char *dst, void *dstaddr,
256 const char *src, const void *srcaddr,
257 const void *srcaddr_orig,
258 size_t size, struct goacc_asyncqueue *aq)
260 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
262 gomp_mutex_unlock (&devicep->lock);
263 if (srcaddr_orig && srcaddr_orig != srcaddr)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src, srcaddr_orig, srcaddr_orig + size,
268 src, srcaddr, srcaddr + size,
269 dst, dstaddr, dstaddr + size);
270 else
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src, srcaddr, srcaddr + size,
274 dst, dstaddr, dstaddr + size);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
284 size_t start, end;
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
291 void *buf;
292 struct target_mem_desc *tgt;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk *chunks;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
298 be performed. */
299 long chunk_cnt;
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
303 long use_cnt;
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
325 static inline void
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
328 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
329 return;
330 if (cbuf->chunk_cnt)
332 if (cbuf->chunk_cnt < 0)
333 return;
334 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
336 cbuf->chunk_cnt = -1;
337 return;
339 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
341 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
342 cbuf->use_cnt++;
343 return;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf->use_cnt == 1)
349 cbuf->chunk_cnt--;
351 cbuf->chunks[cbuf->chunk_cnt].start = start;
352 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
353 cbuf->chunk_cnt++;
354 cbuf->use_cnt = 1;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
360 static inline bool
361 gomp_to_device_kind_p (int kind)
363 switch (kind)
365 case GOMP_MAP_ALLOC:
366 case GOMP_MAP_FROM:
367 case GOMP_MAP_FORCE_ALLOC:
368 case GOMP_MAP_FORCE_FROM:
369 case GOMP_MAP_ALWAYS_FROM:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM:
371 case GOMP_MAP_FORCE_PRESENT:
372 return false;
373 default:
374 return true;
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
381 TRUE. */
383 attribute_hidden void
384 gomp_copy_host2dev (struct gomp_device_descr *devicep,
385 struct goacc_asyncqueue *aq,
386 void *d, const void *h, size_t sz,
387 bool ephemeral, struct gomp_coalesce_buf *cbuf)
389 if (cbuf)
391 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
392 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
394 long first = 0;
395 long last = cbuf->chunk_cnt - 1;
396 while (first <= last)
398 long middle = (first + last) >> 1;
399 if (cbuf->chunks[middle].end <= doff)
400 first = middle + 1;
401 else if (cbuf->chunks[middle].start <= doff)
403 if (doff + sz > cbuf->chunks[middle].end)
405 gomp_mutex_unlock (&devicep->lock);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq != NULL, 0))
412 assert (ephemeral);
414 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
415 h, sz);
416 return;
418 else
419 last = middle - 1;
424 if (__builtin_expect (aq != NULL, 0))
426 void *h_buf = (void *) h;
427 if (ephemeral)
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
433 temporary buffer. */
434 h_buf = gomp_malloc (sz);
435 memcpy (h_buf, h, sz);
437 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
438 "dev", d, "host", h_buf, h, sz, aq);
439 if (ephemeral)
440 /* Free once the transfer has completed. */
441 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
443 else
444 gomp_device_copy (devicep, devicep->host2dev_func,
445 "dev", d, "host", h, sz);
448 attribute_hidden void
449 gomp_copy_dev2host (struct gomp_device_descr *devicep,
450 struct goacc_asyncqueue *aq,
451 void *h, const void *d, size_t sz)
453 if (__builtin_expect (aq != NULL, 0))
454 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
455 "host", h, "dev", d, NULL, sz, aq);
456 else
457 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
460 static void
461 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
463 if (!devicep->free_func (devicep->target_id, devptr))
465 gomp_mutex_unlock (&devicep->lock);
466 gomp_fatal ("error in freeing device memory block at %p", devptr);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
476 static inline void
477 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
479 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
480 return;
482 uintptr_t *refcount_ptr = &k->refcount;
484 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
485 refcount_ptr = &k->structelem_refcount;
486 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
487 refcount_ptr = k->structelem_refcount_ptr;
489 if (refcount_set)
491 if (htab_find (*refcount_set, refcount_ptr))
492 return;
493 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
494 *slot = refcount_ptr;
497 *refcount_ptr += 1;
498 return;
501 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
502 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
503 track already seen refcounts, and only adjust the value if refcount is not
504 yet contained in the set (like gomp_increment_refcount).
506 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
507 it is already zero and we know we decremented it earlier. This signals that
508 associated maps should be copied back to host.
510 *DO_REMOVE is set to true when we this is the first handling of this refcount
511 and we are setting it to zero. This signals a removal of this key from the
512 splay-tree map.
514 Copy and removal are separated due to cases like handling of structure
515 elements, e.g. each map of a structure element representing a possible copy
516 out of a structure field has to be handled individually, but we only signal
517 removal for one (the first encountered) sibing map. */
519 static inline void
520 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
521 bool *do_copy, bool *do_remove)
523 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
525 *do_copy = *do_remove = false;
526 return;
529 uintptr_t *refcount_ptr = &k->refcount;
531 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
532 refcount_ptr = &k->structelem_refcount;
533 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
534 refcount_ptr = k->structelem_refcount_ptr;
536 bool new_encountered_refcount;
537 bool set_to_zero = false;
538 bool is_zero = false;
540 uintptr_t orig_refcount = *refcount_ptr;
542 if (refcount_set)
544 if (htab_find (*refcount_set, refcount_ptr))
546 new_encountered_refcount = false;
547 goto end;
550 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
551 *slot = refcount_ptr;
552 new_encountered_refcount = true;
554 else
555 /* If no refcount_set being used, assume all keys are being decremented
556 for the first time. */
557 new_encountered_refcount = true;
559 if (delete_p)
560 *refcount_ptr = 0;
561 else if (*refcount_ptr > 0)
562 *refcount_ptr -= 1;
564 end:
565 if (*refcount_ptr == 0)
567 if (orig_refcount > 0)
568 set_to_zero = true;
570 is_zero = true;
573 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
574 *do_remove = (new_encountered_refcount && set_to_zero);
577 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
578 gomp_map_0len_lookup found oldn for newn.
579 Helper function of gomp_map_vars. */
581 static inline void
582 gomp_map_vars_existing (struct gomp_device_descr *devicep,
583 struct goacc_asyncqueue *aq, splay_tree_key oldn,
584 splay_tree_key newn, struct target_var_desc *tgt_var,
585 unsigned char kind, bool always_to_flag, bool implicit,
586 struct gomp_coalesce_buf *cbuf,
587 htab_t *refcount_set)
589 assert (kind != GOMP_MAP_ATTACH
590 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
592 tgt_var->key = oldn;
593 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
594 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
595 tgt_var->is_attach = false;
596 tgt_var->offset = newn->host_start - oldn->host_start;
598 /* For implicit maps, old contained in new is valid. */
599 bool implicit_subset = (implicit
600 && newn->host_start <= oldn->host_start
601 && oldn->host_end <= newn->host_end);
602 if (implicit_subset)
603 tgt_var->length = oldn->host_end - oldn->host_start;
604 else
605 tgt_var->length = newn->host_end - newn->host_start;
607 if (GOMP_MAP_FORCE_P (kind)
608 /* For implicit maps, old contained in new is valid. */
609 || !(implicit_subset
610 /* Otherwise, new contained inside old is considered valid. */
611 || (oldn->host_start <= newn->host_start
612 && newn->host_end <= oldn->host_end)))
614 gomp_mutex_unlock (&devicep->lock);
615 gomp_fatal ("Trying to map into device [%p..%p) object when "
616 "[%p..%p) is already mapped",
617 (void *) newn->host_start, (void *) newn->host_end,
618 (void *) oldn->host_start, (void *) oldn->host_end);
621 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
623 /* Implicit + always should not happen. If this does occur, below
624 address/length adjustment is a TODO. */
625 assert (!implicit_subset);
627 if (oldn->aux && oldn->aux->attach_count)
629 /* We have to be careful not to overwrite still attached pointers
630 during the copyback to host. */
631 uintptr_t addr = newn->host_start;
632 while (addr < newn->host_end)
634 size_t i = (addr - oldn->host_start) / sizeof (void *);
635 if (oldn->aux->attach_count[i] == 0)
636 gomp_copy_host2dev (devicep, aq,
637 (void *) (oldn->tgt->tgt_start
638 + oldn->tgt_offset
639 + addr - oldn->host_start),
640 (void *) addr,
641 sizeof (void *), false, cbuf);
642 addr += sizeof (void *);
645 else
646 gomp_copy_host2dev (devicep, aq,
647 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
648 + newn->host_start - oldn->host_start),
649 (void *) newn->host_start,
650 newn->host_end - newn->host_start, false, cbuf);
653 gomp_increment_refcount (oldn, refcount_set);
656 static int
657 get_kind (bool short_mapkind, void *kinds, int idx)
659 if (!short_mapkind)
660 return ((unsigned char *) kinds)[idx];
662 int val = ((unsigned short *) kinds)[idx];
663 if (GOMP_MAP_IMPLICIT_P (val))
664 val &= ~GOMP_MAP_IMPLICIT;
665 return val;
669 static bool
670 get_implicit (bool short_mapkind, void *kinds, int idx)
672 if (!short_mapkind)
673 return false;
675 int val = ((unsigned short *) kinds)[idx];
676 return GOMP_MAP_IMPLICIT_P (val);
679 static void
680 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
681 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
682 struct gomp_coalesce_buf *cbuf,
683 bool allow_zero_length_array_sections)
685 struct gomp_device_descr *devicep = tgt->device_descr;
686 struct splay_tree_s *mem_map = &devicep->mem_map;
687 struct splay_tree_key_s cur_node;
689 cur_node.host_start = host_ptr;
690 if (cur_node.host_start == (uintptr_t) NULL)
692 cur_node.tgt_offset = (uintptr_t) NULL;
693 gomp_copy_host2dev (devicep, aq,
694 (void *) (tgt->tgt_start + target_offset),
695 (void *) &cur_node.tgt_offset, sizeof (void *),
696 true, cbuf);
697 return;
699 /* Add bias to the pointer value. */
700 cur_node.host_start += bias;
701 cur_node.host_end = cur_node.host_start;
702 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
703 if (n == NULL)
705 if (allow_zero_length_array_sections)
706 cur_node.tgt_offset = cur_node.host_start;
707 else
709 gomp_mutex_unlock (&devicep->lock);
710 gomp_fatal ("Pointer target of array section wasn't mapped");
713 else
715 cur_node.host_start -= n->host_start;
716 cur_node.tgt_offset
717 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
718 /* At this point tgt_offset is target address of the
719 array section. Now subtract bias to get what we want
720 to initialize the pointer with. */
721 cur_node.tgt_offset -= bias;
723 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
724 (void *) &cur_node.tgt_offset, sizeof (void *),
725 true, cbuf);
728 static void
729 gomp_map_fields_existing (struct target_mem_desc *tgt,
730 struct goacc_asyncqueue *aq, splay_tree_key n,
731 size_t first, size_t i, void **hostaddrs,
732 size_t *sizes, void *kinds,
733 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
735 struct gomp_device_descr *devicep = tgt->device_descr;
736 struct splay_tree_s *mem_map = &devicep->mem_map;
737 struct splay_tree_key_s cur_node;
738 int kind;
739 bool implicit;
740 const bool short_mapkind = true;
741 const int typemask = short_mapkind ? 0xff : 0x7;
743 cur_node.host_start = (uintptr_t) hostaddrs[i];
744 cur_node.host_end = cur_node.host_start + sizes[i];
745 splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
746 kind = get_kind (short_mapkind, kinds, i);
747 implicit = get_implicit (short_mapkind, kinds, i);
748 if (n2
749 && n2->tgt == n->tgt
750 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
752 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
753 kind & typemask, false, implicit, cbuf,
754 refcount_set);
755 return;
757 if (sizes[i] == 0)
759 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
761 cur_node.host_start--;
762 n2 = splay_tree_lookup (mem_map, &cur_node);
763 cur_node.host_start++;
764 if (n2
765 && n2->tgt == n->tgt
766 && n2->host_start - n->host_start
767 == n2->tgt_offset - n->tgt_offset)
769 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
770 kind & typemask, false, implicit, cbuf,
771 refcount_set);
772 return;
775 cur_node.host_end++;
776 n2 = splay_tree_lookup (mem_map, &cur_node);
777 cur_node.host_end--;
778 if (n2
779 && n2->tgt == n->tgt
780 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
782 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
783 kind & typemask, false, implicit, cbuf,
784 refcount_set);
785 return;
788 gomp_mutex_unlock (&devicep->lock);
789 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
790 "other mapped elements from the same structure weren't mapped "
791 "together with it", (void *) cur_node.host_start,
792 (void *) cur_node.host_end);
795 attribute_hidden void
796 gomp_attach_pointer (struct gomp_device_descr *devicep,
797 struct goacc_asyncqueue *aq, splay_tree mem_map,
798 splay_tree_key n, uintptr_t attach_to, size_t bias,
799 struct gomp_coalesce_buf *cbufp,
800 bool allow_zero_length_array_sections)
802 struct splay_tree_key_s s;
803 size_t size, idx;
805 if (n == NULL)
807 gomp_mutex_unlock (&devicep->lock);
808 gomp_fatal ("enclosing struct not mapped for attach");
811 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
812 /* We might have a pointer in a packed struct: however we cannot have more
813 than one such pointer in each pointer-sized portion of the struct, so
814 this is safe. */
815 idx = (attach_to - n->host_start) / sizeof (void *);
817 if (!n->aux)
818 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
820 if (!n->aux->attach_count)
821 n->aux->attach_count
822 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
824 if (n->aux->attach_count[idx] < UINTPTR_MAX)
825 n->aux->attach_count[idx]++;
826 else
828 gomp_mutex_unlock (&devicep->lock);
829 gomp_fatal ("attach count overflow");
832 if (n->aux->attach_count[idx] == 1)
834 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
835 - n->host_start;
836 uintptr_t target = (uintptr_t) *(void **) attach_to;
837 splay_tree_key tn;
838 uintptr_t data;
840 if ((void *) target == NULL)
842 /* As a special case, allow attaching NULL host pointers. This
843 allows e.g. unassociated Fortran pointers to be mapped
844 properly. */
845 data = 0;
847 gomp_debug (1,
848 "%s: attaching NULL host pointer, target %p "
849 "(struct base %p)\n", __FUNCTION__, (void *) devptr,
850 (void *) (n->tgt->tgt_start + n->tgt_offset));
852 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
853 sizeof (void *), true, cbufp);
855 return;
858 s.host_start = target + bias;
859 s.host_end = s.host_start + 1;
860 tn = splay_tree_lookup (mem_map, &s);
862 if (!tn)
864 if (allow_zero_length_array_sections)
865 /* When allowing attachment to zero-length array sections, we
866 copy the host pointer when the target region is not mapped. */
867 data = target;
868 else
870 gomp_mutex_unlock (&devicep->lock);
871 gomp_fatal ("pointer target not mapped for attach");
874 else
875 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
877 gomp_debug (1,
878 "%s: attaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__, (void *) attach_to, (void *) devptr,
880 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
882 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
883 sizeof (void *), true, cbufp);
885 else
886 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
887 (void *) attach_to, (int) n->aux->attach_count[idx]);
890 attribute_hidden void
891 gomp_detach_pointer (struct gomp_device_descr *devicep,
892 struct goacc_asyncqueue *aq, splay_tree_key n,
893 uintptr_t detach_from, bool finalize,
894 struct gomp_coalesce_buf *cbufp)
896 size_t idx;
898 if (n == NULL)
900 gomp_mutex_unlock (&devicep->lock);
901 gomp_fatal ("enclosing struct not mapped for detach");
904 idx = (detach_from - n->host_start) / sizeof (void *);
906 if (!n->aux || !n->aux->attach_count)
908 gomp_mutex_unlock (&devicep->lock);
909 gomp_fatal ("no attachment counters for struct");
912 if (finalize)
913 n->aux->attach_count[idx] = 1;
915 if (n->aux->attach_count[idx] == 0)
917 gomp_mutex_unlock (&devicep->lock);
918 gomp_fatal ("attach count underflow");
920 else
921 n->aux->attach_count[idx]--;
923 if (n->aux->attach_count[idx] == 0)
925 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
926 - n->host_start;
927 uintptr_t target = (uintptr_t) *(void **) detach_from;
929 gomp_debug (1,
930 "%s: detaching host %p, target %p (struct base %p) to %p\n",
931 __FUNCTION__, (void *) detach_from, (void *) devptr,
932 (void *) (n->tgt->tgt_start + n->tgt_offset),
933 (void *) target);
935 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
936 sizeof (void *), true, cbufp);
938 else
939 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
940 (void *) detach_from, (int) n->aux->attach_count[idx]);
943 attribute_hidden uintptr_t
944 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
946 if (tgt->list[i].key != NULL)
947 return tgt->list[i].key->tgt->tgt_start
948 + tgt->list[i].key->tgt_offset
949 + tgt->list[i].offset;
951 switch (tgt->list[i].offset)
953 case OFFSET_INLINED:
954 return (uintptr_t) hostaddrs[i];
956 case OFFSET_POINTER:
957 return 0;
959 case OFFSET_STRUCT:
960 return tgt->list[i + 1].key->tgt->tgt_start
961 + tgt->list[i + 1].key->tgt_offset
962 + tgt->list[i + 1].offset
963 + (uintptr_t) hostaddrs[i]
964 - (uintptr_t) hostaddrs[i + 1];
966 default:
967 return tgt->tgt_start + tgt->list[i].offset;
971 static inline __attribute__((always_inline)) struct target_mem_desc *
972 gomp_map_vars_internal (struct gomp_device_descr *devicep,
973 struct goacc_asyncqueue *aq, size_t mapnum,
974 void **hostaddrs, void **devaddrs, size_t *sizes,
975 void *kinds, bool short_mapkind,
976 htab_t *refcount_set,
977 enum gomp_map_vars_kind pragma_kind)
979 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
980 bool has_firstprivate = false;
981 bool has_always_ptrset = false;
982 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
983 const int rshift = short_mapkind ? 8 : 3;
984 const int typemask = short_mapkind ? 0xff : 0x7;
985 struct splay_tree_s *mem_map = &devicep->mem_map;
986 struct splay_tree_key_s cur_node;
987 struct target_mem_desc *tgt
988 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
989 tgt->list_count = mapnum;
990 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
991 tgt->device_descr = devicep;
992 tgt->prev = NULL;
993 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
995 if (mapnum == 0)
997 tgt->tgt_start = 0;
998 tgt->tgt_end = 0;
999 return tgt;
1002 tgt_align = sizeof (void *);
1003 tgt_size = 0;
1004 cbuf.chunks = NULL;
1005 cbuf.chunk_cnt = -1;
1006 cbuf.use_cnt = 0;
1007 cbuf.buf = NULL;
1008 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1010 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1011 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1012 cbuf.chunk_cnt = 0;
1014 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1016 size_t align = 4 * sizeof (void *);
1017 tgt_align = align;
1018 tgt_size = mapnum * sizeof (void *);
1019 cbuf.chunk_cnt = 1;
1020 cbuf.use_cnt = 1 + (mapnum > 1);
1021 cbuf.chunks[0].start = 0;
1022 cbuf.chunks[0].end = tgt_size;
1025 gomp_mutex_lock (&devicep->lock);
1026 if (devicep->state == GOMP_DEVICE_FINALIZED)
1028 gomp_mutex_unlock (&devicep->lock);
1029 free (tgt);
1030 return NULL;
1033 for (i = 0; i < mapnum; i++)
1035 int kind = get_kind (short_mapkind, kinds, i);
1036 bool implicit = get_implicit (short_mapkind, kinds, i);
1037 if (hostaddrs[i] == NULL
1038 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1040 tgt->list[i].key = NULL;
1041 tgt->list[i].offset = OFFSET_INLINED;
1042 continue;
1044 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1045 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1047 tgt->list[i].key = NULL;
1048 if (!not_found_cnt)
1050 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1051 on a separate construct prior to using use_device_{addr,ptr}.
1052 In OpenMP 5.0, map directives need to be ordered by the
1053 middle-end before the use_device_* clauses. If
1054 !not_found_cnt, all mappings requested (if any) are already
1055 mapped, so use_device_{addr,ptr} can be resolved right away.
1056 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1057 now but would succeed after performing the mappings in the
1058 following loop. We can't defer this always to the second
1059 loop, because it is not even invoked when !not_found_cnt
1060 after the first loop. */
1061 cur_node.host_start = (uintptr_t) hostaddrs[i];
1062 cur_node.host_end = cur_node.host_start;
1063 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1064 if (n != NULL)
1066 cur_node.host_start -= n->host_start;
1067 hostaddrs[i]
1068 = (void *) (n->tgt->tgt_start + n->tgt_offset
1069 + cur_node.host_start);
1071 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1073 gomp_mutex_unlock (&devicep->lock);
1074 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1076 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1077 /* If not present, continue using the host address. */
1079 else
1080 __builtin_unreachable ();
1081 tgt->list[i].offset = OFFSET_INLINED;
1083 else
1084 tgt->list[i].offset = 0;
1085 continue;
1087 else if ((kind & typemask) == GOMP_MAP_STRUCT
1088 || (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
1090 size_t first = i + 1;
1091 size_t last = i + sizes[i];
1092 cur_node.host_start = (uintptr_t) hostaddrs[i];
1093 cur_node.host_end = (uintptr_t) hostaddrs[last]
1094 + sizes[last];
1095 tgt->list[i].key = NULL;
1096 tgt->list[i].offset = OFFSET_STRUCT;
1097 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1098 if (n == NULL)
1100 size_t align = (size_t) 1 << (kind >> rshift);
1101 if (tgt_align < align)
1102 tgt_align = align;
1103 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1104 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1105 tgt_size += cur_node.host_end - cur_node.host_start;
1106 not_found_cnt += last - i;
1107 for (i = first; i <= last; i++)
1109 tgt->list[i].key = NULL;
1110 if (!aq
1111 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1112 & typemask)
1113 && sizes[i] != 0)
1114 gomp_coalesce_buf_add (&cbuf,
1115 tgt_size - cur_node.host_end
1116 + (uintptr_t) hostaddrs[i],
1117 sizes[i]);
1119 i--;
1120 continue;
1122 for (i = first; i <= last; i++)
1123 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1124 sizes, kinds, NULL, refcount_set);
1125 i--;
1126 continue;
1128 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1130 tgt->list[i].key = NULL;
1131 tgt->list[i].offset = OFFSET_POINTER;
1132 has_firstprivate = true;
1133 continue;
1135 else if ((kind & typemask) == GOMP_MAP_ATTACH
1136 || ((kind & typemask)
1137 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1139 tgt->list[i].key = NULL;
1140 has_firstprivate = true;
1141 continue;
1143 cur_node.host_start = (uintptr_t) hostaddrs[i];
1144 if (!GOMP_MAP_POINTER_P (kind & typemask))
1145 cur_node.host_end = cur_node.host_start + sizes[i];
1146 else
1147 cur_node.host_end = cur_node.host_start + sizeof (void *);
1148 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1150 tgt->list[i].key = NULL;
1152 size_t align = (size_t) 1 << (kind >> rshift);
1153 if (tgt_align < align)
1154 tgt_align = align;
1155 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1156 if (!aq)
1157 gomp_coalesce_buf_add (&cbuf, tgt_size,
1158 cur_node.host_end - cur_node.host_start);
1159 tgt_size += cur_node.host_end - cur_node.host_start;
1160 has_firstprivate = true;
1161 continue;
1163 splay_tree_key n;
1164 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1166 n = gomp_map_0len_lookup (mem_map, &cur_node);
1167 if (!n)
1169 tgt->list[i].key = NULL;
1170 tgt->list[i].offset = OFFSET_INLINED;
1171 continue;
1174 else
1175 n = splay_tree_lookup (mem_map, &cur_node);
1176 if (n && n->refcount != REFCOUNT_LINK)
1178 int always_to_cnt = 0;
1179 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1181 bool has_nullptr = false;
1182 size_t j;
1183 for (j = 0; j < n->tgt->list_count; j++)
1184 if (n->tgt->list[j].key == n)
1186 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1187 break;
1189 if (n->tgt->list_count == 0)
1191 /* 'declare target'; assume has_nullptr; it could also be
1192 statically assigned pointer, but that it should be to
1193 the equivalent variable on the host. */
1194 assert (n->refcount == REFCOUNT_INFINITY);
1195 has_nullptr = true;
1197 else
1198 assert (j < n->tgt->list_count);
1199 /* Re-map the data if there is an 'always' modifier or if it a
1200 null pointer was there and non a nonnull has been found; that
1201 permits transparent re-mapping for Fortran array descriptors
1202 which were previously mapped unallocated. */
1203 for (j = i + 1; j < mapnum; j++)
1205 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1206 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1207 && (!has_nullptr
1208 || !GOMP_MAP_POINTER_P (ptr_kind)
1209 || *(void **) hostaddrs[j] == NULL))
1210 break;
1211 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1212 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1213 > cur_node.host_end))
1214 break;
1215 else
1217 has_always_ptrset = true;
1218 ++always_to_cnt;
1222 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1223 kind & typemask, always_to_cnt > 0, implicit,
1224 NULL, refcount_set);
1225 i += always_to_cnt;
1227 else
1229 tgt->list[i].key = NULL;
1231 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1233 /* Not present, hence, skip entry - including its MAP_POINTER,
1234 when existing. */
1235 tgt->list[i].offset = OFFSET_INLINED;
1236 if (i + 1 < mapnum
1237 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1238 == GOMP_MAP_POINTER))
1240 ++i;
1241 tgt->list[i].key = NULL;
1242 tgt->list[i].offset = 0;
1244 continue;
1246 size_t align = (size_t) 1 << (kind >> rshift);
1247 not_found_cnt++;
1248 if (tgt_align < align)
1249 tgt_align = align;
1250 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1251 if (!aq
1252 && gomp_to_device_kind_p (kind & typemask))
1253 gomp_coalesce_buf_add (&cbuf, tgt_size,
1254 cur_node.host_end - cur_node.host_start);
1255 tgt_size += cur_node.host_end - cur_node.host_start;
1256 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1258 size_t j;
1259 int kind;
1260 for (j = i + 1; j < mapnum; j++)
1261 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1262 kinds, j)) & typemask))
1263 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1264 break;
1265 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1266 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1267 > cur_node.host_end))
1268 break;
1269 else
1271 tgt->list[j].key = NULL;
1272 i++;
1278 if (devaddrs)
1280 if (mapnum != 1)
1282 gomp_mutex_unlock (&devicep->lock);
1283 gomp_fatal ("unexpected aggregation");
1285 tgt->to_free = devaddrs[0];
1286 tgt->tgt_start = (uintptr_t) tgt->to_free;
1287 tgt->tgt_end = tgt->tgt_start + sizes[0];
1289 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1291 /* Allocate tgt_align aligned tgt_size block of memory. */
1292 /* FIXME: Perhaps change interface to allocate properly aligned
1293 memory. */
1294 tgt->to_free = devicep->alloc_func (devicep->target_id,
1295 tgt_size + tgt_align - 1);
1296 if (!tgt->to_free)
1298 gomp_mutex_unlock (&devicep->lock);
1299 gomp_fatal ("device memory allocation fail");
1302 tgt->tgt_start = (uintptr_t) tgt->to_free;
1303 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1304 tgt->tgt_end = tgt->tgt_start + tgt_size;
1306 if (cbuf.use_cnt == 1)
1307 cbuf.chunk_cnt--;
1308 if (cbuf.chunk_cnt > 0)
1310 cbuf.buf
1311 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1312 if (cbuf.buf)
1314 cbuf.tgt = tgt;
1315 cbufp = &cbuf;
1319 else
1321 tgt->to_free = NULL;
1322 tgt->tgt_start = 0;
1323 tgt->tgt_end = 0;
1326 tgt_size = 0;
1327 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1328 tgt_size = mapnum * sizeof (void *);
1330 tgt->array = NULL;
1331 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1333 if (not_found_cnt)
1334 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1335 splay_tree_node array = tgt->array;
1336 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1337 uintptr_t field_tgt_base = 0;
1338 splay_tree_key field_tgt_structelem_first = NULL;
1340 for (i = 0; i < mapnum; i++)
1341 if (has_always_ptrset
1342 && tgt->list[i].key
1343 && (get_kind (short_mapkind, kinds, i) & typemask)
1344 == GOMP_MAP_TO_PSET)
1346 splay_tree_key k = tgt->list[i].key;
1347 bool has_nullptr = false;
1348 size_t j;
1349 for (j = 0; j < k->tgt->list_count; j++)
1350 if (k->tgt->list[j].key == k)
1352 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1353 break;
1355 if (k->tgt->list_count == 0)
1356 has_nullptr = true;
1357 else
1358 assert (j < k->tgt->list_count);
1360 tgt->list[i].has_null_ptr_assoc = false;
1361 for (j = i + 1; j < mapnum; j++)
1363 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1364 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1365 && (!has_nullptr
1366 || !GOMP_MAP_POINTER_P (ptr_kind)
1367 || *(void **) hostaddrs[j] == NULL))
1368 break;
1369 else if ((uintptr_t) hostaddrs[j] < k->host_start
1370 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1371 > k->host_end))
1372 break;
1373 else
1375 if (*(void **) hostaddrs[j] == NULL)
1376 tgt->list[i].has_null_ptr_assoc = true;
1377 tgt->list[j].key = k;
1378 tgt->list[j].copy_from = false;
1379 tgt->list[j].always_copy_from = false;
1380 tgt->list[j].is_attach = false;
1381 gomp_increment_refcount (k, refcount_set);
1382 gomp_map_pointer (k->tgt, aq,
1383 (uintptr_t) *(void **) hostaddrs[j],
1384 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1385 - k->host_start),
1386 sizes[j], cbufp, false);
1389 i = j - 1;
1391 else if (tgt->list[i].key == NULL)
1393 int kind = get_kind (short_mapkind, kinds, i);
1394 bool implicit = get_implicit (short_mapkind, kinds, i);
1395 if (hostaddrs[i] == NULL)
1396 continue;
1397 switch (kind & typemask)
1399 size_t align, len, first, last;
1400 splay_tree_key n;
1401 case GOMP_MAP_FIRSTPRIVATE:
1402 align = (size_t) 1 << (kind >> rshift);
1403 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1404 tgt->list[i].offset = tgt_size;
1405 len = sizes[i];
1406 gomp_copy_host2dev (devicep, aq,
1407 (void *) (tgt->tgt_start + tgt_size),
1408 (void *) hostaddrs[i], len, false, cbufp);
1409 /* Save device address in hostaddr to permit latter availablity
1410 when doing a deep-firstprivate with pointer attach. */
1411 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1412 tgt_size += len;
1414 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1415 firstprivate to hostaddrs[i+1], which is assumed to contain a
1416 device address. */
1417 if (i + 1 < mapnum
1418 && (GOMP_MAP_ATTACH
1419 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1421 uintptr_t target = (uintptr_t) hostaddrs[i];
1422 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1423 /* Per
1424 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1425 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1426 this probably needs revision for 'aq' usage. */
1427 assert (!aq);
1428 gomp_copy_host2dev (devicep, aq, devptr, &target,
1429 sizeof (void *), false, cbufp);
1430 ++i;
1432 continue;
1433 case GOMP_MAP_FIRSTPRIVATE_INT:
1434 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1435 continue;
1436 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1437 /* The OpenACC 'host_data' construct only allows 'use_device'
1438 "mapping" clauses, so in the first loop, 'not_found_cnt'
1439 must always have been zero, so all OpenACC 'use_device'
1440 clauses have already been handled. (We can only easily test
1441 'use_device' with 'if_present' clause here.) */
1442 assert (tgt->list[i].offset == OFFSET_INLINED);
1443 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1444 code conceptually simple, similar to the first loop. */
1445 case GOMP_MAP_USE_DEVICE_PTR:
1446 if (tgt->list[i].offset == 0)
1448 cur_node.host_start = (uintptr_t) hostaddrs[i];
1449 cur_node.host_end = cur_node.host_start;
1450 n = gomp_map_lookup (mem_map, &cur_node);
1451 if (n != NULL)
1453 cur_node.host_start -= n->host_start;
1454 hostaddrs[i]
1455 = (void *) (n->tgt->tgt_start + n->tgt_offset
1456 + cur_node.host_start);
1458 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1460 gomp_mutex_unlock (&devicep->lock);
1461 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1463 else if ((kind & typemask)
1464 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1465 /* If not present, continue using the host address. */
1467 else
1468 __builtin_unreachable ();
1469 tgt->list[i].offset = OFFSET_INLINED;
1471 continue;
1472 case GOMP_MAP_STRUCT_UNORD:
1473 if (sizes[i] > 1)
1475 void *first = hostaddrs[i + 1];
1476 for (size_t j = i + 1; j < i + sizes[i]; j++)
1477 if (hostaddrs[j + 1] != first)
1479 gomp_mutex_unlock (&devicep->lock);
1480 gomp_fatal ("Mapped array elements must be the "
1481 "same (%p vs %p)", first,
1482 hostaddrs[j + 1]);
1485 /* Fallthrough. */
1486 case GOMP_MAP_STRUCT:
1487 first = i + 1;
1488 last = i + sizes[i];
1489 cur_node.host_start = (uintptr_t) hostaddrs[i];
1490 cur_node.host_end = (uintptr_t) hostaddrs[last]
1491 + sizes[last];
1492 if (tgt->list[first].key != NULL)
1493 continue;
1494 if (sizes[last] == 0)
1495 cur_node.host_end++;
1496 n = splay_tree_lookup (mem_map, &cur_node);
1497 if (sizes[last] == 0)
1498 cur_node.host_end--;
1499 if (n == NULL && cur_node.host_start == cur_node.host_end)
1501 gomp_mutex_unlock (&devicep->lock);
1502 gomp_fatal ("Struct pointer member not mapped (%p)",
1503 (void*) hostaddrs[first]);
1505 if (n == NULL)
1507 size_t align = (size_t) 1 << (kind >> rshift);
1508 tgt_size -= (uintptr_t) hostaddrs[first]
1509 - (uintptr_t) hostaddrs[i];
1510 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1511 tgt_size += (uintptr_t) hostaddrs[first]
1512 - (uintptr_t) hostaddrs[i];
1513 field_tgt_base = (uintptr_t) hostaddrs[first];
1514 field_tgt_offset = tgt_size;
1515 field_tgt_clear = last;
1516 field_tgt_structelem_first = NULL;
1517 tgt_size += cur_node.host_end
1518 - (uintptr_t) hostaddrs[first];
1519 continue;
1521 for (i = first; i <= last; i++)
1522 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1523 sizes, kinds, cbufp, refcount_set);
1524 i--;
1525 continue;
1526 case GOMP_MAP_ALWAYS_POINTER:
1527 cur_node.host_start = (uintptr_t) hostaddrs[i];
1528 cur_node.host_end = cur_node.host_start + sizeof (void *);
1529 n = splay_tree_lookup (mem_map, &cur_node);
1530 if (n == NULL
1531 || n->host_start > cur_node.host_start
1532 || n->host_end < cur_node.host_end)
1534 gomp_mutex_unlock (&devicep->lock);
1535 gomp_fatal ("always pointer not mapped");
1537 if (i > 0
1538 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1539 != GOMP_MAP_ALWAYS_POINTER))
1540 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1541 if (cur_node.tgt_offset)
1542 cur_node.tgt_offset -= sizes[i];
1543 gomp_copy_host2dev (devicep, aq,
1544 (void *) (n->tgt->tgt_start
1545 + n->tgt_offset
1546 + cur_node.host_start
1547 - n->host_start),
1548 (void *) &cur_node.tgt_offset,
1549 sizeof (void *), true, cbufp);
1550 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1551 + cur_node.host_start - n->host_start;
1552 continue;
1553 case GOMP_MAP_IF_PRESENT:
1554 /* Not present - otherwise handled above. Skip over its
1555 MAP_POINTER as well. */
1556 if (i + 1 < mapnum
1557 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1558 == GOMP_MAP_POINTER))
1559 ++i;
1560 continue;
1561 case GOMP_MAP_ATTACH:
1562 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1564 cur_node.host_start = (uintptr_t) hostaddrs[i];
1565 cur_node.host_end = cur_node.host_start + sizeof (void *);
1566 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1567 if (n != NULL)
1569 tgt->list[i].key = n;
1570 tgt->list[i].offset = cur_node.host_start - n->host_start;
1571 tgt->list[i].length = n->host_end - n->host_start;
1572 tgt->list[i].copy_from = false;
1573 tgt->list[i].always_copy_from = false;
1574 tgt->list[i].is_attach = true;
1575 /* OpenACC 'attach'/'detach' doesn't affect
1576 structured/dynamic reference counts ('n->refcount',
1577 'n->dynamic_refcount'). */
1579 bool zlas
1580 = ((kind & typemask)
1581 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1582 gomp_attach_pointer (devicep, aq, mem_map, n,
1583 (uintptr_t) hostaddrs[i], sizes[i],
1584 cbufp, zlas);
1586 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1588 gomp_mutex_unlock (&devicep->lock);
1589 gomp_fatal ("outer struct not mapped for attach");
1591 continue;
1593 default:
1594 break;
1596 splay_tree_key k = &array->key;
1597 k->host_start = (uintptr_t) hostaddrs[i];
1598 if (!GOMP_MAP_POINTER_P (kind & typemask))
1599 k->host_end = k->host_start + sizes[i];
1600 else
1601 k->host_end = k->host_start + sizeof (void *);
1602 splay_tree_key n = splay_tree_lookup (mem_map, k);
1603 if (n && n->refcount != REFCOUNT_LINK)
1605 if (field_tgt_clear != FIELD_TGT_EMPTY)
1607 /* For this condition to be true, there must be a
1608 duplicate struct element mapping. This can happen with
1609 GOMP_MAP_STRUCT_UNORD mappings, for example. */
1610 tgt->list[i].key = n;
1611 if (openmp_p)
1613 assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
1614 assert (field_tgt_structelem_first != NULL);
1616 if (i == field_tgt_clear)
1618 n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1619 field_tgt_structelem_first = NULL;
1622 if (i == field_tgt_clear)
1623 field_tgt_clear = FIELD_TGT_EMPTY;
1624 gomp_increment_refcount (n, refcount_set);
1625 tgt->list[i].copy_from
1626 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1627 tgt->list[i].always_copy_from
1628 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1629 tgt->list[i].is_attach = false;
1630 tgt->list[i].offset = 0;
1631 tgt->list[i].length = k->host_end - k->host_start;
1633 else
1634 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1635 kind & typemask, false, implicit,
1636 cbufp, refcount_set);
1638 else
1640 k->aux = NULL;
1641 if (n && n->refcount == REFCOUNT_LINK)
1643 /* Replace target address of the pointer with target address
1644 of mapped object in the splay tree. */
1645 splay_tree_remove (mem_map, n);
1646 k->aux
1647 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1648 k->aux->link_key = n;
1650 size_t align = (size_t) 1 << (kind >> rshift);
1651 tgt->list[i].key = k;
1652 k->tgt = tgt;
1653 k->refcount = 0;
1654 k->dynamic_refcount = 0;
1655 if (field_tgt_clear != FIELD_TGT_EMPTY)
1657 k->tgt_offset = k->host_start - field_tgt_base
1658 + field_tgt_offset;
1659 if (openmp_p)
1661 k->refcount = REFCOUNT_STRUCTELEM;
1662 if (field_tgt_structelem_first == NULL)
1664 /* Set to first structure element of sequence. */
1665 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1666 field_tgt_structelem_first = k;
1668 else
1669 /* Point to refcount of leading element, but do not
1670 increment again. */
1671 k->structelem_refcount_ptr
1672 = &field_tgt_structelem_first->structelem_refcount;
1674 if (i == field_tgt_clear)
1676 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1677 field_tgt_structelem_first = NULL;
1680 if (i == field_tgt_clear)
1681 field_tgt_clear = FIELD_TGT_EMPTY;
1683 else
1685 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1686 k->tgt_offset = tgt_size;
1687 tgt_size += k->host_end - k->host_start;
1689 /* First increment, from 0 to 1. gomp_increment_refcount
1690 encapsulates the different increment cases, so use this
1691 instead of directly setting 1 during initialization. */
1692 gomp_increment_refcount (k, refcount_set);
1694 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1695 tgt->list[i].always_copy_from
1696 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1697 tgt->list[i].is_attach = false;
1698 tgt->list[i].offset = 0;
1699 tgt->list[i].length = k->host_end - k->host_start;
1700 tgt->refcount++;
1701 array->left = NULL;
1702 array->right = NULL;
1703 splay_tree_insert (mem_map, array);
1704 switch (kind & typemask)
1706 case GOMP_MAP_ALLOC:
1707 case GOMP_MAP_FROM:
1708 case GOMP_MAP_FORCE_ALLOC:
1709 case GOMP_MAP_FORCE_FROM:
1710 case GOMP_MAP_ALWAYS_FROM:
1711 break;
1712 case GOMP_MAP_TO:
1713 case GOMP_MAP_TOFROM:
1714 case GOMP_MAP_FORCE_TO:
1715 case GOMP_MAP_FORCE_TOFROM:
1716 case GOMP_MAP_ALWAYS_TO:
1717 case GOMP_MAP_ALWAYS_TOFROM:
1718 gomp_copy_host2dev (devicep, aq,
1719 (void *) (tgt->tgt_start
1720 + k->tgt_offset),
1721 (void *) k->host_start,
1722 k->host_end - k->host_start,
1723 false, cbufp);
1724 break;
1725 case GOMP_MAP_POINTER:
1726 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1727 gomp_map_pointer
1728 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1729 k->tgt_offset, sizes[i], cbufp,
1730 ((kind & typemask)
1731 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1732 break;
1733 case GOMP_MAP_TO_PSET:
1734 gomp_copy_host2dev (devicep, aq,
1735 (void *) (tgt->tgt_start
1736 + k->tgt_offset),
1737 (void *) k->host_start,
1738 k->host_end - k->host_start,
1739 false, cbufp);
1740 tgt->list[i].has_null_ptr_assoc = false;
1742 for (j = i + 1; j < mapnum; j++)
1744 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1745 & typemask);
1746 if (!GOMP_MAP_POINTER_P (ptr_kind)
1747 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1748 break;
1749 else if ((uintptr_t) hostaddrs[j] < k->host_start
1750 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1751 > k->host_end))
1752 break;
1753 else
1755 tgt->list[j].key = k;
1756 tgt->list[j].copy_from = false;
1757 tgt->list[j].always_copy_from = false;
1758 tgt->list[j].is_attach = false;
1759 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1760 /* For OpenMP, the use of refcount_sets causes
1761 errors if we set k->refcount = 1 above but also
1762 increment it again here, for decrementing will
1763 not properly match, since we decrement only once
1764 for each key's refcount. Therefore avoid this
1765 increment for OpenMP constructs. */
1766 if (!openmp_p)
1767 gomp_increment_refcount (k, refcount_set);
1768 gomp_map_pointer (tgt, aq,
1769 (uintptr_t) *(void **) hostaddrs[j],
1770 k->tgt_offset
1771 + ((uintptr_t) hostaddrs[j]
1772 - k->host_start),
1773 sizes[j], cbufp, false);
1776 i = j - 1;
1777 break;
1778 case GOMP_MAP_FORCE_PRESENT:
1779 case GOMP_MAP_ALWAYS_PRESENT_TO:
1780 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1781 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1783 /* We already looked up the memory region above and it
1784 was missing. */
1785 size_t size = k->host_end - k->host_start;
1786 gomp_mutex_unlock (&devicep->lock);
1787 #ifdef HAVE_INTTYPES_H
1788 gomp_fatal ("present clause: not present on the device "
1789 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1790 "dev: %d)", (void *) k->host_start,
1791 (uint64_t) size, (uint64_t) size,
1792 devicep->target_id);
1793 #else
1794 gomp_fatal ("present clause: not present on the device "
1795 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1796 (void *) k->host_start,
1797 (unsigned long) size, (unsigned long) size,
1798 devicep->target_id);
1799 #endif
1801 break;
1802 case GOMP_MAP_FORCE_DEVICEPTR:
1803 assert (k->host_end - k->host_start == sizeof (void *));
1804 gomp_copy_host2dev (devicep, aq,
1805 (void *) (tgt->tgt_start
1806 + k->tgt_offset),
1807 (void *) k->host_start,
1808 sizeof (void *), false, cbufp);
1809 break;
1810 default:
1811 gomp_mutex_unlock (&devicep->lock);
1812 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1813 kind);
1816 if (k->aux && k->aux->link_key)
1818 /* Set link pointer on target to the device address of the
1819 mapped object. */
1820 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1821 /* We intentionally do not use coalescing here, as it's not
1822 data allocated by the current call to this function. */
1823 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1824 &tgt_addr, sizeof (void *), true, NULL);
1826 array++;
1831 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1833 for (i = 0; i < mapnum; i++)
1835 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1836 gomp_copy_host2dev (devicep, aq,
1837 (void *) (tgt->tgt_start + i * sizeof (void *)),
1838 (void *) &cur_node.tgt_offset, sizeof (void *),
1839 true, cbufp);
1843 if (cbufp)
1845 long c = 0;
1846 for (c = 0; c < cbuf.chunk_cnt; ++c)
1847 gomp_copy_host2dev (devicep, aq,
1848 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1849 (char *) cbuf.buf + (cbuf.chunks[c].start
1850 - cbuf.chunks[0].start),
1851 cbuf.chunks[c].end - cbuf.chunks[c].start,
1852 false, NULL);
1853 if (aq)
1854 /* Free once the transfer has completed. */
1855 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1856 else
1857 free (cbuf.buf);
1858 cbuf.buf = NULL;
1859 cbufp = NULL;
1862 /* If the variable from "omp target enter data" map-list was already mapped,
1863 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1864 gomp_exit_data. */
1865 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1867 free (tgt);
1868 tgt = NULL;
1871 gomp_mutex_unlock (&devicep->lock);
1872 return tgt;
1875 static struct target_mem_desc *
1876 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1877 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1878 bool short_mapkind, htab_t *refcount_set,
1879 enum gomp_map_vars_kind pragma_kind)
1881 /* This management of a local refcount_set is for convenience of callers
1882 who do not share a refcount_set over multiple map/unmap uses. */
1883 htab_t local_refcount_set = NULL;
1884 if (refcount_set == NULL)
1886 local_refcount_set = htab_create (mapnum);
1887 refcount_set = &local_refcount_set;
1890 struct target_mem_desc *tgt;
1891 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1892 sizes, kinds, short_mapkind, refcount_set,
1893 pragma_kind);
1894 if (local_refcount_set)
1895 htab_free (local_refcount_set);
1897 return tgt;
1900 attribute_hidden struct target_mem_desc *
1901 goacc_map_vars (struct gomp_device_descr *devicep,
1902 struct goacc_asyncqueue *aq, size_t mapnum,
1903 void **hostaddrs, void **devaddrs, size_t *sizes,
1904 void *kinds, bool short_mapkind,
1905 enum gomp_map_vars_kind pragma_kind)
1907 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1908 sizes, kinds, short_mapkind, NULL,
1909 GOMP_MAP_VARS_OPENACC | pragma_kind);
1912 static void
1913 gomp_unmap_tgt (struct target_mem_desc *tgt)
1915 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1916 if (tgt->tgt_end)
1917 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1919 free (tgt->array);
1920 free (tgt);
1923 static bool
1924 gomp_unref_tgt (void *ptr)
1926 bool is_tgt_unmapped = false;
1928 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1930 if (tgt->refcount > 1)
1931 tgt->refcount--;
1932 else
1934 gomp_unmap_tgt (tgt);
1935 is_tgt_unmapped = true;
1938 return is_tgt_unmapped;
1941 static void
1942 gomp_unref_tgt_void (void *ptr)
1944 (void) gomp_unref_tgt (ptr);
1947 static void
1948 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1950 splay_tree_remove (sp, k);
1951 if (k->aux)
1953 if (k->aux->link_key)
1954 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1955 if (k->aux->attach_count)
1956 free (k->aux->attach_count);
1957 free (k->aux);
1958 k->aux = NULL;
1962 static inline __attribute__((always_inline)) bool
1963 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1964 struct goacc_asyncqueue *aq)
1966 bool is_tgt_unmapped = false;
1968 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1970 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1971 /* Infer the splay_tree_key of the first structelem key using the
1972 pointer to the first structleme_refcount. */
1973 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1974 - offsetof (struct splay_tree_key_s,
1975 structelem_refcount));
1976 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1978 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1979 with the splay_tree_keys embedded inside. */
1980 splay_tree_node node =
1981 (splay_tree_node) ((char *) k
1982 - offsetof (struct splay_tree_node_s, key));
1983 while (true)
1985 /* Starting from the _FIRST key, and continue for all following
1986 sibling keys. */
1987 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1988 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1989 break;
1990 else
1991 k = &(++node)->key;
1994 else
1995 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1997 if (aq)
1998 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1999 (void *) k->tgt);
2000 else
2001 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2002 return is_tgt_unmapped;
2005 attribute_hidden bool
2006 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2008 return gomp_remove_var_internal (devicep, k, NULL);
2011 /* Remove a variable asynchronously. This actually removes the variable
2012 mapping immediately, but retains the linked target_mem_desc until the
2013 asynchronous operation has completed (as it may still refer to target
2014 memory). The device lock must be held before entry, and remains locked on
2015 exit. */
2017 attribute_hidden void
2018 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2019 struct goacc_asyncqueue *aq)
2021 (void) gomp_remove_var_internal (devicep, k, aq);
2024 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2025 variables back from device to host: if it is false, it is assumed that this
2026 has been done already. */
2028 static inline __attribute__((always_inline)) void
2029 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2030 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2032 struct gomp_device_descr *devicep = tgt->device_descr;
2034 if (tgt->list_count == 0)
2036 free (tgt);
2037 return;
2040 gomp_mutex_lock (&devicep->lock);
2041 if (devicep->state == GOMP_DEVICE_FINALIZED)
2043 gomp_mutex_unlock (&devicep->lock);
2044 free (tgt->array);
2045 free (tgt);
2046 return;
2049 size_t i;
2051 /* We must perform detachments before any copies back to the host. */
2052 for (i = 0; i < tgt->list_count; i++)
2054 splay_tree_key k = tgt->list[i].key;
2056 if (k != NULL && tgt->list[i].is_attach)
2057 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2058 + tgt->list[i].offset,
2059 false, NULL);
2062 for (i = 0; i < tgt->list_count; i++)
2064 splay_tree_key k = tgt->list[i].key;
2065 if (k == NULL)
2066 continue;
2068 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2069 counts ('n->refcount', 'n->dynamic_refcount'). */
2070 if (tgt->list[i].is_attach)
2071 continue;
2073 bool do_copy, do_remove;
2074 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2076 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2077 || tgt->list[i].always_copy_from)
2078 gomp_copy_dev2host (devicep, aq,
2079 (void *) (k->host_start + tgt->list[i].offset),
2080 (void *) (k->tgt->tgt_start + k->tgt_offset
2081 + tgt->list[i].offset),
2082 tgt->list[i].length);
2083 if (do_remove)
2085 struct target_mem_desc *k_tgt = k->tgt;
2086 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2087 /* It would be bad if TGT got unmapped while we're still iterating
2088 over its LIST_COUNT, and also expect to use it in the following
2089 code. */
2090 assert (!is_tgt_unmapped
2091 || k_tgt != tgt);
2095 if (aq)
2096 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2097 (void *) tgt);
2098 else
2099 gomp_unref_tgt ((void *) tgt);
2101 gomp_mutex_unlock (&devicep->lock);
2104 static void
2105 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2106 htab_t *refcount_set)
2108 /* This management of a local refcount_set is for convenience of callers
2109 who do not share a refcount_set over multiple map/unmap uses. */
2110 htab_t local_refcount_set = NULL;
2111 if (refcount_set == NULL)
2113 local_refcount_set = htab_create (tgt->list_count);
2114 refcount_set = &local_refcount_set;
2117 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2119 if (local_refcount_set)
2120 htab_free (local_refcount_set);
2123 attribute_hidden void
2124 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2125 struct goacc_asyncqueue *aq)
2127 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2130 static void
2131 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2132 size_t *sizes, void *kinds, bool short_mapkind)
2134 size_t i;
2135 struct splay_tree_key_s cur_node;
2136 const int typemask = short_mapkind ? 0xff : 0x7;
2138 if (!devicep)
2139 return;
2141 if (mapnum == 0)
2142 return;
2144 gomp_mutex_lock (&devicep->lock);
2145 if (devicep->state == GOMP_DEVICE_FINALIZED)
2147 gomp_mutex_unlock (&devicep->lock);
2148 return;
2151 for (i = 0; i < mapnum; i++)
2152 if (sizes[i])
2154 cur_node.host_start = (uintptr_t) hostaddrs[i];
2155 cur_node.host_end = cur_node.host_start + sizes[i];
2156 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2157 if (n)
2159 int kind = get_kind (short_mapkind, kinds, i);
2160 if (n->host_start > cur_node.host_start
2161 || n->host_end < cur_node.host_end)
2163 gomp_mutex_unlock (&devicep->lock);
2164 gomp_fatal ("Trying to update [%p..%p) object when "
2165 "only [%p..%p) is mapped",
2166 (void *) cur_node.host_start,
2167 (void *) cur_node.host_end,
2168 (void *) n->host_start,
2169 (void *) n->host_end);
2172 if (n->aux && n->aux->attach_count)
2174 uintptr_t addr = cur_node.host_start;
2175 while (addr < cur_node.host_end)
2177 /* We have to be careful not to overwrite still attached
2178 pointers during host<->device updates. */
2179 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2180 if (n->aux->attach_count[i] == 0)
2182 void *devaddr = (void *) (n->tgt->tgt_start
2183 + n->tgt_offset
2184 + addr - n->host_start);
2185 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2186 gomp_copy_host2dev (devicep, NULL,
2187 devaddr, (void *) addr,
2188 sizeof (void *), false, NULL);
2189 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2190 gomp_copy_dev2host (devicep, NULL,
2191 (void *) addr, devaddr,
2192 sizeof (void *));
2194 addr += sizeof (void *);
2197 else
2199 void *hostaddr = (void *) cur_node.host_start;
2200 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2201 + cur_node.host_start
2202 - n->host_start);
2203 size_t size = cur_node.host_end - cur_node.host_start;
2205 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2206 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2207 false, NULL);
2208 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2209 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2212 else
2214 int kind = get_kind (short_mapkind, kinds, i);
2216 if (GOMP_MAP_PRESENT_P (kind))
2218 /* We already looked up the memory region above and it
2219 was missing. */
2220 gomp_mutex_unlock (&devicep->lock);
2221 #ifdef HAVE_INTTYPES_H
2222 gomp_fatal ("present clause: not present on the device "
2223 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2224 "dev: %d)", (void *) hostaddrs[i],
2225 (uint64_t) sizes[i], (uint64_t) sizes[i],
2226 devicep->target_id);
2227 #else
2228 gomp_fatal ("present clause: not present on the device "
2229 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2230 (void *) hostaddrs[i], (unsigned long) sizes[i],
2231 (unsigned long) sizes[i], devicep->target_id);
2232 #endif
2236 gomp_mutex_unlock (&devicep->lock);
2239 static struct gomp_offload_icv_list *
2240 gomp_get_offload_icv_item (int dev_num)
2242 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2243 while (l != NULL && l->device_num != dev_num)
2244 l = l->next;
2246 return l;
2249 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2250 depending on the device num and the variable hierarchy
2251 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2252 device and thus no item with that device number is contained in
2253 gomp_offload_icv_list, then a new item is created and added to the list. */
2255 static struct gomp_offload_icvs *
2256 get_gomp_offload_icvs (int dev_num)
2258 struct gomp_icv_list *dev
2259 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2260 struct gomp_icv_list *all
2261 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2262 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2263 struct gomp_offload_icv_list *offload_icvs
2264 = gomp_get_offload_icv_item (dev_num);
2266 if (offload_icvs != NULL)
2267 return &offload_icvs->icvs;
2269 struct gomp_offload_icv_list *new
2270 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2272 new->device_num = dev_num;
2273 new->icvs.device_num = dev_num;
2274 new->next = gomp_offload_icv_list;
2276 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2277 new->icvs.nteams = dev_x->icvs.nteams_var;
2278 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2279 new->icvs.nteams = dev->icvs.nteams_var;
2280 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2281 new->icvs.nteams = all->icvs.nteams_var;
2282 else
2283 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2285 if (dev_x != NULL
2286 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2287 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2288 else if (dev != NULL
2289 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2290 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2291 else if (all != NULL
2292 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2293 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2294 else
2295 new->icvs.teams_thread_limit
2296 = gomp_default_icv_values.teams_thread_limit_var;
2298 if (dev_x != NULL
2299 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2300 new->icvs.default_device = dev_x->icvs.default_device_var;
2301 else if (dev != NULL
2302 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2303 new->icvs.default_device = dev->icvs.default_device_var;
2304 else if (all != NULL
2305 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2306 new->icvs.default_device = all->icvs.default_device_var;
2307 else
2308 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2310 gomp_offload_icv_list = new;
2311 return &new->icvs;
2314 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2315 And insert to splay tree the mapping between addresses from HOST_TABLE and
2316 from loaded target image. We rely in the host and device compiler
2317 emitting variable and functions in the same order. */
2319 static void
2320 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2321 const void *host_table, const void *target_data,
2322 bool is_register_lock)
2324 void **host_func_table = ((void ***) host_table)[0];
2325 void **host_funcs_end = ((void ***) host_table)[1];
2326 void **host_var_table = ((void ***) host_table)[2];
2327 void **host_vars_end = ((void ***) host_table)[3];
2328 void **host_ind_func_table = NULL;
2329 void **host_ind_funcs_end = NULL;
2331 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2333 host_ind_func_table = ((void ***) host_table)[4];
2334 host_ind_funcs_end = ((void ***) host_table)[5];
2337 /* The func and ind_func tables contain only addresses, the var table
2338 contains addresses and corresponding sizes. */
2339 int num_funcs = host_funcs_end - host_func_table;
2340 int num_vars = (host_vars_end - host_var_table) / 2;
2341 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2343 /* Load image to device and get target addresses for the image. */
2344 struct addr_pair *target_table = NULL;
2345 uint64_t *rev_target_fn_table = NULL;
2346 int i, num_target_entries;
2348 /* With reverse offload, insert also target-host addresses. */
2349 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2351 num_target_entries
2352 = devicep->load_image_func (devicep->target_id, version,
2353 target_data, &target_table,
2354 rev_lookup ? &rev_target_fn_table : NULL,
2355 num_ind_funcs
2356 ? (uint64_t *) host_ind_func_table : NULL);
2358 if (num_target_entries != num_funcs + num_vars
2359 /* "+1" due to the additional ICV struct. */
2360 && num_target_entries != num_funcs + num_vars + 1)
2362 gomp_mutex_unlock (&devicep->lock);
2363 if (is_register_lock)
2364 gomp_mutex_unlock (&register_lock);
2365 gomp_fatal ("Cannot map target functions or variables"
2366 " (expected %u, have %u)", num_funcs + num_vars,
2367 num_target_entries);
2370 /* Insert host-target address mapping into splay tree. */
2371 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2372 /* "+1" due to the additional ICV struct. */
2373 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2374 * sizeof (*tgt->array));
2375 if (rev_target_fn_table)
2376 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2377 else
2378 tgt->rev_array = NULL;
2379 tgt->refcount = REFCOUNT_INFINITY;
2380 tgt->tgt_start = 0;
2381 tgt->tgt_end = 0;
2382 tgt->to_free = NULL;
2383 tgt->prev = NULL;
2384 tgt->list_count = 0;
2385 tgt->device_descr = devicep;
2386 splay_tree_node array = tgt->array;
2387 reverse_splay_tree_node rev_array = tgt->rev_array;
2389 for (i = 0; i < num_funcs; i++)
2391 splay_tree_key k = &array->key;
2392 k->host_start = (uintptr_t) host_func_table[i];
2393 k->host_end = k->host_start + 1;
2394 k->tgt = tgt;
2395 k->tgt_offset = target_table[i].start;
2396 k->refcount = REFCOUNT_INFINITY;
2397 k->dynamic_refcount = 0;
2398 k->aux = NULL;
2399 array->left = NULL;
2400 array->right = NULL;
2401 splay_tree_insert (&devicep->mem_map, array);
2402 if (rev_target_fn_table)
2404 reverse_splay_tree_key k2 = &rev_array->key;
2405 k2->dev = rev_target_fn_table[i];
2406 k2->k = k;
2407 rev_array->left = NULL;
2408 rev_array->right = NULL;
2409 if (k2->dev != 0)
2410 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2411 rev_array++;
2413 array++;
2416 /* Most significant bit of the size in host and target tables marks
2417 "omp declare target link" variables. */
2418 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2419 const uintptr_t size_mask = ~link_bit;
2421 for (i = 0; i < num_vars; i++)
2423 struct addr_pair *target_var = &target_table[num_funcs + i];
2424 uintptr_t target_size = target_var->end - target_var->start;
2425 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2427 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2429 gomp_mutex_unlock (&devicep->lock);
2430 if (is_register_lock)
2431 gomp_mutex_unlock (&register_lock);
2432 gomp_fatal ("Cannot map target variables (size mismatch)");
2435 splay_tree_key k = &array->key;
2436 k->host_start = (uintptr_t) host_var_table[i * 2];
2437 k->host_end
2438 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2439 k->tgt = tgt;
2440 k->tgt_offset = target_var->start;
2441 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2442 k->dynamic_refcount = 0;
2443 k->aux = NULL;
2444 array->left = NULL;
2445 array->right = NULL;
2446 splay_tree_insert (&devicep->mem_map, array);
2447 array++;
2450 /* Last entry is for a ICVs variable.
2451 Tolerate case where plugin does not return those entries. */
2452 if (num_funcs + num_vars < num_target_entries)
2454 struct addr_pair *var = &target_table[num_funcs + num_vars];
2456 /* Start address will be non-zero for the ICVs variable if
2457 the variable was found in this image. */
2458 if (var->start != 0)
2460 /* The index of the devicep within devices[] is regarded as its
2461 'device number', which is different from the per-device type
2462 devicep->target_id. */
2463 int dev_num = (int) (devicep - &devices[0]);
2464 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2465 size_t var_size = var->end - var->start;
2466 if (var_size != sizeof (struct gomp_offload_icvs))
2468 gomp_mutex_unlock (&devicep->lock);
2469 if (is_register_lock)
2470 gomp_mutex_unlock (&register_lock);
2471 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2472 "format");
2474 /* Copy the ICVs variable to place on device memory, hereby
2475 actually designating its device number into effect. */
2476 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2477 var_size, false, NULL);
2478 splay_tree_key k = &array->key;
2479 k->host_start = (uintptr_t) icvs;
2480 k->host_end =
2481 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2482 k->tgt = tgt;
2483 k->tgt_offset = var->start;
2484 k->refcount = REFCOUNT_INFINITY;
2485 k->dynamic_refcount = 0;
2486 k->aux = NULL;
2487 array->left = NULL;
2488 array->right = NULL;
2489 splay_tree_insert (&devicep->mem_map, array);
2490 array++;
2494 free (target_table);
2497 /* Unload the mappings described by target_data from device DEVICE_P.
2498 The device must be locked. */
2500 static void
2501 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2502 unsigned version,
2503 const void *host_table, const void *target_data)
2505 void **host_func_table = ((void ***) host_table)[0];
2506 void **host_funcs_end = ((void ***) host_table)[1];
2507 void **host_var_table = ((void ***) host_table)[2];
2508 void **host_vars_end = ((void ***) host_table)[3];
2510 /* The func table contains only addresses, the var table contains addresses
2511 and corresponding sizes. */
2512 int num_funcs = host_funcs_end - host_func_table;
2513 int num_vars = (host_vars_end - host_var_table) / 2;
2515 struct splay_tree_key_s k;
2516 splay_tree_key node = NULL;
2518 /* Find mapping at start of node array */
2519 if (num_funcs || num_vars)
2521 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2522 : (uintptr_t) host_var_table[0]);
2523 k.host_end = k.host_start + 1;
2524 node = splay_tree_lookup (&devicep->mem_map, &k);
2527 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2529 gomp_mutex_unlock (&devicep->lock);
2530 gomp_fatal ("image unload fail");
2532 if (devicep->mem_map_rev.root)
2534 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2535 real allocation. */
2536 assert (node && node->tgt && node->tgt->rev_array);
2537 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2538 free (node->tgt->rev_array);
2539 devicep->mem_map_rev.root = NULL;
2542 /* Remove mappings from splay tree. */
2543 int i;
2544 for (i = 0; i < num_funcs; i++)
2546 k.host_start = (uintptr_t) host_func_table[i];
2547 k.host_end = k.host_start + 1;
2548 splay_tree_remove (&devicep->mem_map, &k);
2551 /* Most significant bit of the size in host and target tables marks
2552 "omp declare target link" variables. */
2553 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2554 const uintptr_t size_mask = ~link_bit;
2555 bool is_tgt_unmapped = false;
2557 for (i = 0; i < num_vars; i++)
2559 k.host_start = (uintptr_t) host_var_table[i * 2];
2560 k.host_end
2561 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2563 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2564 splay_tree_remove (&devicep->mem_map, &k);
2565 else
2567 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2568 is_tgt_unmapped = gomp_remove_var (devicep, n);
2572 if (node && !is_tgt_unmapped)
2574 free (node->tgt);
2575 free (node);
2579 static void
2580 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2582 char *end = buf + size, *p = buf;
2583 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2584 p += snprintf (p, end - p, "unified_address");
2585 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2586 p += snprintf (p, end - p, "%sunified_shared_memory",
2587 (p == buf ? "" : ", "));
2588 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2589 p += snprintf (p, end - p, "%sreverse_offload",
2590 (p == buf ? "" : ", "));
2593 /* This function should be called from every offload image while loading.
2594 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2595 the target, and DATA. */
2597 void
2598 GOMP_offload_register_ver (unsigned version, const void *host_table,
2599 int target_type, const void *data)
2601 int i;
2603 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2604 gomp_fatal ("Library too old for offload (version %u < %u)",
2605 GOMP_VERSION, GOMP_VERSION_LIB (version));
2607 int omp_req;
2608 const void *target_data;
2609 if (GOMP_VERSION_LIB (version) > 1)
2611 omp_req = (int) (size_t) ((void **) data)[0];
2612 target_data = &((void **) data)[1];
2614 else
2616 omp_req = 0;
2617 target_data = data;
2620 gomp_mutex_lock (&register_lock);
2622 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2624 char buf1[sizeof ("unified_address, unified_shared_memory, "
2625 "reverse_offload")];
2626 char buf2[sizeof ("unified_address, unified_shared_memory, "
2627 "reverse_offload")];
2628 gomp_requires_to_name (buf2, sizeof (buf2),
2629 omp_req != GOMP_REQUIRES_TARGET_USED
2630 ? omp_req : omp_requires_mask);
2631 if (omp_req != GOMP_REQUIRES_TARGET_USED
2632 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2634 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2635 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2636 "in multiple compilation units: '%s' vs. '%s'",
2637 buf1, buf2);
2639 else
2640 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2641 "some compilation units", buf2);
2643 omp_requires_mask = omp_req;
2645 /* Load image to all initialized devices. */
2646 for (i = 0; i < num_devices; i++)
2648 struct gomp_device_descr *devicep = &devices[i];
2649 gomp_mutex_lock (&devicep->lock);
2650 if (devicep->type == target_type
2651 && devicep->state == GOMP_DEVICE_INITIALIZED)
2652 gomp_load_image_to_device (devicep, version,
2653 host_table, target_data, true);
2654 gomp_mutex_unlock (&devicep->lock);
2657 /* Insert image to array of pending images. */
2658 offload_images
2659 = gomp_realloc_unlock (offload_images,
2660 (num_offload_images + 1)
2661 * sizeof (struct offload_image_descr));
2662 offload_images[num_offload_images].version = version;
2663 offload_images[num_offload_images].type = target_type;
2664 offload_images[num_offload_images].host_table = host_table;
2665 offload_images[num_offload_images].target_data = target_data;
2667 num_offload_images++;
2668 gomp_mutex_unlock (&register_lock);
2671 /* Legacy entry point. */
2673 void
2674 GOMP_offload_register (const void *host_table, int target_type,
2675 const void *target_data)
2677 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2680 /* This function should be called from every offload image while unloading.
2681 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2682 the target, and DATA. */
2684 void
2685 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2686 int target_type, const void *data)
2688 int i;
2690 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2691 gomp_fatal ("Library too old for offload (version %u < %u)",
2692 GOMP_VERSION, GOMP_VERSION_LIB (version));
2694 const void *target_data;
2695 if (GOMP_VERSION_LIB (version) > 1)
2696 target_data = &((void **) data)[1];
2697 else
2698 target_data = data;
2700 gomp_mutex_lock (&register_lock);
2702 /* Unload image from all initialized devices. */
2703 for (i = 0; i < num_devices; i++)
2705 struct gomp_device_descr *devicep = &devices[i];
2706 gomp_mutex_lock (&devicep->lock);
2707 if (devicep->type == target_type
2708 && devicep->state == GOMP_DEVICE_INITIALIZED)
2709 gomp_unload_image_from_device (devicep, version,
2710 host_table, target_data);
2711 gomp_mutex_unlock (&devicep->lock);
2714 /* Remove image from array of pending images. */
2715 for (i = 0; i < num_offload_images; i++)
2716 if (offload_images[i].target_data == target_data)
2718 offload_images[i] = offload_images[--num_offload_images];
2719 break;
2722 gomp_mutex_unlock (&register_lock);
2725 /* Legacy entry point. */
2727 void
2728 GOMP_offload_unregister (const void *host_table, int target_type,
2729 const void *target_data)
2731 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2734 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2735 must be locked on entry, and remains locked on return. */
2737 attribute_hidden void
2738 gomp_init_device (struct gomp_device_descr *devicep)
2740 int i;
2741 if (!devicep->init_device_func (devicep->target_id))
2743 gomp_mutex_unlock (&devicep->lock);
2744 gomp_fatal ("device initialization failed");
2747 /* Load to device all images registered by the moment. */
2748 for (i = 0; i < num_offload_images; i++)
2750 struct offload_image_descr *image = &offload_images[i];
2751 if (image->type == devicep->type)
2752 gomp_load_image_to_device (devicep, image->version,
2753 image->host_table, image->target_data,
2754 false);
2757 /* Initialize OpenACC asynchronous queues. */
2758 goacc_init_asyncqueues (devicep);
2760 devicep->state = GOMP_DEVICE_INITIALIZED;
2763 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2764 must be locked on entry, and remains locked on return. */
2766 attribute_hidden bool
2767 gomp_fini_device (struct gomp_device_descr *devicep)
2769 bool ret = goacc_fini_asyncqueues (devicep);
2770 ret &= devicep->fini_device_func (devicep->target_id);
2771 devicep->state = GOMP_DEVICE_FINALIZED;
2772 return ret;
2775 attribute_hidden void
2776 gomp_unload_device (struct gomp_device_descr *devicep)
2778 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2780 unsigned i;
2782 /* Unload from device all images registered at the moment. */
2783 for (i = 0; i < num_offload_images; i++)
2785 struct offload_image_descr *image = &offload_images[i];
2786 if (image->type == devicep->type)
2787 gomp_unload_image_from_device (devicep, image->version,
2788 image->host_table,
2789 image->target_data);
2794 /* Host fallback for GOMP_target{,_ext} routines. */
2796 static void
2797 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2798 struct gomp_device_descr *devicep, void **args)
2800 struct gomp_thread old_thr, *thr = gomp_thread ();
2802 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2803 && devicep != NULL)
2804 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2805 "be used for offloading");
2807 old_thr = *thr;
2808 memset (thr, '\0', sizeof (*thr));
2809 if (gomp_places_list)
2811 thr->place = old_thr.place;
2812 thr->ts.place_partition_len = gomp_places_list_len;
2814 if (args)
2815 while (*args)
2817 intptr_t id = (intptr_t) *args++, val;
2818 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2819 val = (intptr_t) *args++;
2820 else
2821 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2822 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2823 continue;
2824 id &= GOMP_TARGET_ARG_ID_MASK;
2825 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2826 continue;
2827 val = val > INT_MAX ? INT_MAX : val;
2828 if (val)
2829 gomp_icv (true)->thread_limit_var = val;
2830 break;
2833 fn (hostaddrs);
2834 gomp_free_thread (thr);
2835 *thr = old_thr;
2838 /* Calculate alignment and size requirements of a private copy of data shared
2839 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2841 static inline void
2842 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2843 unsigned short *kinds, size_t *tgt_align,
2844 size_t *tgt_size)
2846 size_t i;
2847 for (i = 0; i < mapnum; i++)
2848 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2850 size_t align = (size_t) 1 << (kinds[i] >> 8);
2851 if (*tgt_align < align)
2852 *tgt_align = align;
2853 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2854 *tgt_size += sizes[i];
2858 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2860 static inline void
2861 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2862 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2863 size_t tgt_size)
2865 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2866 if (al)
2867 tgt += tgt_align - al;
2868 tgt_size = 0;
2869 size_t i;
2870 for (i = 0; i < mapnum; i++)
2871 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2873 size_t align = (size_t) 1 << (kinds[i] >> 8);
2874 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2875 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2876 hostaddrs[i] = tgt + tgt_size;
2877 tgt_size = tgt_size + sizes[i];
2878 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2880 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2881 ++i;
2886 /* Helper function of GOMP_target{,_ext} routines. */
2888 static void *
2889 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2890 void (*host_fn) (void *))
2892 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2893 return (void *) host_fn;
2894 else
2896 gomp_mutex_lock (&devicep->lock);
2897 if (devicep->state == GOMP_DEVICE_FINALIZED)
2899 gomp_mutex_unlock (&devicep->lock);
2900 return NULL;
2903 struct splay_tree_key_s k;
2904 k.host_start = (uintptr_t) host_fn;
2905 k.host_end = k.host_start + 1;
2906 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2907 gomp_mutex_unlock (&devicep->lock);
2908 if (tgt_fn == NULL)
2909 return NULL;
2911 return (void *) tgt_fn->tgt_offset;
2915 /* Called when encountering a target directive. If DEVICE
2916 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2917 GOMP_DEVICE_HOST_FALLBACK (or any value
2918 larger than last available hw device), use host fallback.
2919 FN is address of host code, UNUSED is part of the current ABI, but
2920 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2921 with MAPNUM entries, with addresses of the host objects,
2922 sizes of the host objects (resp. for pointer kind pointer bias
2923 and assumed sizeof (void *) size) and kinds. */
2925 void
2926 GOMP_target (int device, void (*fn) (void *), const void *unused,
2927 size_t mapnum, void **hostaddrs, size_t *sizes,
2928 unsigned char *kinds)
2930 struct gomp_device_descr *devicep = resolve_device (device, true);
2932 void *fn_addr;
2933 if (devicep == NULL
2934 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2935 /* All shared memory devices should use the GOMP_target_ext function. */
2936 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2937 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2938 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2940 htab_t refcount_set = htab_create (mapnum);
2941 struct target_mem_desc *tgt_vars
2942 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2943 &refcount_set, GOMP_MAP_VARS_TARGET);
2944 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2945 NULL);
2946 htab_clear (refcount_set);
2947 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2948 htab_free (refcount_set);
2951 static inline unsigned int
2952 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2954 /* If we cannot run asynchronously, simply ignore nowait. */
2955 if (devicep != NULL && devicep->async_run_func == NULL)
2956 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2958 return flags;
2961 static void
2962 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2964 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2965 if (item == NULL)
2966 return;
2968 void *host_ptr = &item->icvs;
2969 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2970 if (dev_ptr != NULL)
2971 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2972 sizeof (struct gomp_offload_icvs));
2975 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2976 and several arguments have been added:
2977 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2978 DEPEND is array of dependencies, see GOMP_task for details.
2980 ARGS is a pointer to an array consisting of a variable number of both
2981 device-independent and device-specific arguments, which can take one two
2982 elements where the first specifies for which device it is intended, the type
2983 and optionally also the value. If the value is not present in the first
2984 one, the whole second element the actual value. The last element of the
2985 array is a single NULL. Among the device independent can be for example
2986 NUM_TEAMS and THREAD_LIMIT.
2988 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2989 that value, or 1 if teams construct is not present, or 0, if
2990 teams construct does not have num_teams clause and so the choice is
2991 implementation defined, and -1 if it can't be determined on the host
2992 what value will GOMP_teams have on the device.
2993 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2994 body with that value, or 0, if teams construct does not have thread_limit
2995 clause or the teams construct is not present, or -1 if it can't be
2996 determined on the host what value will GOMP_teams have on the device. */
2998 void
2999 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3000 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3001 unsigned int flags, void **depend, void **args)
3003 struct gomp_device_descr *devicep = resolve_device (device, true);
3004 size_t tgt_align = 0, tgt_size = 0;
3005 bool fpc_done = false;
3007 /* Obtain the original TEAMS and THREADS values from ARGS. */
3008 intptr_t orig_teams = 1, orig_threads = 0;
3009 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3010 void **tmpargs = args;
3011 while (*tmpargs)
3013 intptr_t id = (intptr_t) *tmpargs++, val;
3014 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3016 val = (intptr_t) *tmpargs++;
3017 len = 2;
3019 else
3021 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3022 len = 1;
3024 num_args += len;
3025 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3026 continue;
3027 val = val > INT_MAX ? INT_MAX : val;
3028 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3030 orig_teams = val;
3031 teams_len = len;
3033 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3035 orig_threads = val;
3036 threads_len = len;
3040 intptr_t new_teams = orig_teams, new_threads = orig_threads;
3041 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3042 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3043 value could not be determined. No change.
3044 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3045 Set device-specific value.
3046 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3047 No change. */
3048 if (orig_teams == -2)
3049 new_teams = 1;
3050 else if (orig_teams == 0)
3052 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3053 if (item != NULL)
3054 new_teams = item->icvs.nteams;
3056 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3057 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3058 e.g. a THREAD_LIMIT clause. */
3059 if (orig_teams > -2 && orig_threads == 0)
3061 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3062 if (item != NULL)
3063 new_threads = item->icvs.teams_thread_limit;
3066 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3067 updated. */
3068 void **new_args = args;
3069 if (orig_teams != new_teams || orig_threads != new_threads)
3071 size_t tms_len = (orig_teams == new_teams
3072 ? teams_len
3073 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3074 ? 1 : 2));
3075 size_t ths_len = (orig_threads == new_threads
3076 ? threads_len
3077 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3078 ? 1 : 2));
3079 /* One additional item after the last arg must be NULL. */
3080 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3081 + ths_len + 1;
3082 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3084 tmpargs = args;
3085 void **tmp_new_args = new_args;
3086 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3087 too if they have not been changed and skipped otherwise. */
3088 while (*tmpargs)
3090 intptr_t id = (intptr_t) *tmpargs;
3091 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3092 && orig_teams != new_teams)
3093 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3094 && orig_threads != new_threads))
3096 tmpargs++;
3097 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3098 tmpargs++;
3100 else
3102 *tmp_new_args++ = *tmpargs++;
3103 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3104 *tmp_new_args++ = *tmpargs++;
3108 /* Add the new TEAMS arg to the new args list if it has been changed. */
3109 if (orig_teams != new_teams)
3111 intptr_t new_val = new_teams;
3112 if (tms_len == 1)
3114 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3115 | GOMP_TARGET_ARG_NUM_TEAMS;
3116 *tmp_new_args++ = (void *) new_val;
3118 else
3120 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3121 | GOMP_TARGET_ARG_NUM_TEAMS);
3122 *tmp_new_args++ = (void *) new_val;
3126 /* Add the new THREADS arg to the new args list if it has been changed. */
3127 if (orig_threads != new_threads)
3129 intptr_t new_val = new_threads;
3130 if (ths_len == 1)
3132 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3133 | GOMP_TARGET_ARG_THREAD_LIMIT;
3134 *tmp_new_args++ = (void *) new_val;
3136 else
3138 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3139 | GOMP_TARGET_ARG_THREAD_LIMIT);
3140 *tmp_new_args++ = (void *) new_val;
3144 *tmp_new_args = NULL;
3147 flags = clear_unsupported_flags (devicep, flags);
3149 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3151 struct gomp_thread *thr = gomp_thread ();
3152 /* Create a team if we don't have any around, as nowait
3153 target tasks make sense to run asynchronously even when
3154 outside of any parallel. */
3155 if (__builtin_expect (thr->ts.team == NULL, 0))
3157 struct gomp_team *team = gomp_new_team (1);
3158 struct gomp_task *task = thr->task;
3159 struct gomp_task **implicit_task = &task;
3160 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3161 team->prev_ts = thr->ts;
3162 thr->ts.team = team;
3163 thr->ts.team_id = 0;
3164 thr->ts.work_share = &team->work_shares[0];
3165 thr->ts.last_work_share = NULL;
3166 #ifdef HAVE_SYNC_BUILTINS
3167 thr->ts.single_count = 0;
3168 #endif
3169 thr->ts.static_trip = 0;
3170 thr->task = &team->implicit_task[0];
3171 gomp_init_task (thr->task, NULL, icv);
3172 while (*implicit_task
3173 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3174 implicit_task = &(*implicit_task)->parent;
3175 if (*implicit_task)
3177 thr->task = *implicit_task;
3178 gomp_end_task ();
3179 free (*implicit_task);
3180 thr->task = &team->implicit_task[0];
3182 else
3183 pthread_setspecific (gomp_thread_destructor, thr);
3184 if (implicit_task != &task)
3186 *implicit_task = thr->task;
3187 thr->task = task;
3190 if (thr->ts.team
3191 && !thr->task->final_task)
3193 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3194 sizes, kinds, flags, depend, new_args,
3195 GOMP_TARGET_TASK_BEFORE_MAP);
3196 return;
3200 /* If there are depend clauses, but nowait is not present
3201 (or we are in a final task), block the parent task until the
3202 dependencies are resolved and then just continue with the rest
3203 of the function as if it is a merged task. */
3204 if (depend != NULL)
3206 struct gomp_thread *thr = gomp_thread ();
3207 if (thr->task && thr->task->depend_hash)
3209 /* If we might need to wait, copy firstprivate now. */
3210 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3211 &tgt_align, &tgt_size);
3212 if (tgt_align)
3214 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3215 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3216 tgt_align, tgt_size);
3218 fpc_done = true;
3219 gomp_task_maybe_wait_for_dependencies (depend);
3223 void *fn_addr;
3224 if (devicep == NULL
3225 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3226 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3227 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3229 if (!fpc_done)
3231 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3232 &tgt_align, &tgt_size);
3233 if (tgt_align)
3235 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3236 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3237 tgt_align, tgt_size);
3240 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3241 return;
3244 struct target_mem_desc *tgt_vars;
3245 htab_t refcount_set = NULL;
3247 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3249 if (!fpc_done)
3251 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3252 &tgt_align, &tgt_size);
3253 if (tgt_align)
3255 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3256 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3257 tgt_align, tgt_size);
3260 tgt_vars = NULL;
3262 else
3264 refcount_set = htab_create (mapnum);
3265 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3266 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3268 devicep->run_func (devicep->target_id, fn_addr,
3269 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3270 new_args);
3271 if (tgt_vars)
3273 htab_clear (refcount_set);
3274 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3276 if (refcount_set)
3277 htab_free (refcount_set);
3279 /* Copy back ICVs from device to host.
3280 HOST_PTR is expected to exist since it was added in
3281 gomp_load_image_to_device if not already available. */
3282 gomp_copy_back_icvs (devicep, device);
3287 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3288 keeping track of all variable handling - assuming that reverse offload occurs
3289 ony very rarely. Downside is that the reverse search is slow. */
3291 struct gomp_splay_tree_rev_lookup_data {
3292 uintptr_t tgt_start;
3293 uintptr_t tgt_end;
3294 splay_tree_key key;
3297 static int
3298 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3300 struct gomp_splay_tree_rev_lookup_data *data;
3301 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3302 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3304 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3305 return 0;
3307 size_t j;
3308 for (j = 0; j < key->tgt->list_count; j++)
3309 if (key->tgt->list[j].key == key)
3310 break;
3311 assert (j < key->tgt->list_count);
3312 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3314 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3315 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3317 data->key = key;
3318 return 1;
3320 return 0;
3323 static inline splay_tree_key
3324 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3325 bool zero_len)
3327 struct gomp_splay_tree_rev_lookup_data data;
3328 data.key = NULL;
3329 data.tgt_start = tgt_start;
3330 data.tgt_end = tgt_end;
3332 if (tgt_start != tgt_end)
3334 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3335 return data.key;
3338 data.tgt_end++;
3339 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3340 if (data.key != NULL || zero_len)
3341 return data.key;
3342 data.tgt_end--;
3344 data.tgt_start--;
3345 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3346 return data.key;
3349 struct cpy_data
3351 uint64_t devaddr;
3352 bool present, aligned;
3356 /* Search just mapped reverse-offload data; returns index if found,
3357 otherwise >= n. */
3359 static inline int
3360 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3361 unsigned short *kinds, uint64_t *sizes, size_t n,
3362 uint64_t tgt_start, uint64_t tgt_end)
3364 const bool short_mapkind = true;
3365 const int typemask = short_mapkind ? 0xff : 0x7;
3366 size_t i;
3367 for (i = 0; i < n; i++)
3369 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3370 == GOMP_MAP_STRUCT);
3371 uint64_t dev_end;
3372 if (!is_struct)
3373 dev_end = d[i].devaddr + sizes[i];
3374 else
3376 if (i + sizes[i] < n)
3377 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3378 else
3379 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3381 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3382 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3383 break;
3384 if (is_struct)
3385 i += sizes[i];
3387 return i;
3390 static inline int
3391 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3392 unsigned short *kinds, uint64_t *sizes,
3393 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3394 bool zero_len)
3396 size_t i;
3397 if (tgt_start != tgt_end)
3398 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3399 tgt_start, tgt_end);
3400 tgt_end++;
3401 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3402 tgt_start, tgt_end);
3403 if (i < n || zero_len)
3404 return i;
3405 tgt_end--;
3407 tgt_start--;
3408 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3409 tgt_start, tgt_end);
3412 /* Handle reverse offload. This is called by the device plugins for a
3413 reverse offload; it is not called if the outer target runs on the host.
3414 The mapping is simplified device-affecting constructs (except for target
3415 with device(ancestor:1)) must not be encountered; in particular not
3416 target (enter/exit) data. */
3418 void
3419 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3420 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3421 struct goacc_asyncqueue *aq)
3423 /* Return early if there is no offload code. */
3424 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3425 return;
3426 /* Currently, this fails because of calculate_firstprivate_requirements
3427 below; it could be fixed but additional code needs to be updated to
3428 handle 32bit hosts - thus, it is not worthwhile. */
3429 if (sizeof (void *) != sizeof (uint64_t))
3430 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3432 struct cpy_data *cdata = NULL;
3433 uint64_t *devaddrs;
3434 uint64_t *sizes;
3435 unsigned short *kinds;
3436 const bool short_mapkind = true;
3437 const int typemask = short_mapkind ? 0xff : 0x7;
3438 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3440 reverse_splay_tree_key n;
3441 struct reverse_splay_tree_key_s k;
3442 k.dev = fn_ptr;
3444 gomp_mutex_lock (&devicep->lock);
3445 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3446 gomp_mutex_unlock (&devicep->lock);
3448 if (n == NULL)
3449 gomp_fatal ("Cannot find reverse-offload function");
3450 void (*host_fn)() = (void (*)()) n->k->host_start;
3452 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3454 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3455 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3456 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3458 else
3460 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3461 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3462 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3463 gomp_copy_dev2host (devicep, aq, devaddrs,
3464 (const void *) (uintptr_t) devaddrs_ptr,
3465 mapnum * sizeof (uint64_t));
3466 gomp_copy_dev2host (devicep, aq, sizes,
3467 (const void *) (uintptr_t) sizes_ptr,
3468 mapnum * sizeof (uint64_t));
3469 gomp_copy_dev2host (devicep, aq, kinds,
3470 (const void *) (uintptr_t) kinds_ptr,
3471 mapnum * sizeof (unsigned short));
3472 if (aq && !devicep->openacc.async.synchronize_func (aq))
3473 exit (EXIT_FAILURE);
3476 size_t tgt_align = 0, tgt_size = 0;
3478 /* If actually executed on 32bit systems, the casts lead to wrong code;
3479 but 32bit with offloading is not supported; see top of this function. */
3480 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3481 (void *) (uintptr_t) kinds,
3482 &tgt_align, &tgt_size);
3484 if (tgt_align)
3486 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3487 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3488 if (al)
3489 tgt += tgt_align - al;
3490 tgt_size = 0;
3491 for (uint64_t i = 0; i < mapnum; i++)
3492 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3493 && devaddrs[i] != 0)
3495 size_t align = (size_t) 1 << (kinds[i] >> 8);
3496 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3497 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3498 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3499 (size_t) sizes[i]);
3500 else
3502 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3503 (void *) (uintptr_t) devaddrs[i],
3504 (size_t) sizes[i]);
3505 if (aq && !devicep->openacc.async.synchronize_func (aq))
3506 exit (EXIT_FAILURE);
3508 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3509 tgt_size = tgt_size + sizes[i];
3510 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3511 && i + 1 < mapnum
3512 && ((get_kind (short_mapkind, kinds, i) & typemask)
3513 == GOMP_MAP_ATTACH))
3515 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3516 = (uint64_t) devaddrs[i];
3517 ++i;
3522 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3524 size_t j, struct_cpy = 0;
3525 splay_tree_key n2;
3526 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3527 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3528 gomp_mutex_lock (&devicep->lock);
3529 for (uint64_t i = 0; i < mapnum; i++)
3531 if (devaddrs[i] == 0)
3532 continue;
3533 n = NULL;
3534 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3535 switch (kind)
3537 case GOMP_MAP_FIRSTPRIVATE:
3538 case GOMP_MAP_FIRSTPRIVATE_INT:
3539 continue;
3541 case GOMP_MAP_DELETE:
3542 case GOMP_MAP_RELEASE:
3543 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3544 /* Assume it is present; look it up - but ignore unless the
3545 present clause is there. */
3546 case GOMP_MAP_ALLOC:
3547 case GOMP_MAP_FROM:
3548 case GOMP_MAP_FORCE_ALLOC:
3549 case GOMP_MAP_FORCE_FROM:
3550 case GOMP_MAP_ALWAYS_FROM:
3551 case GOMP_MAP_TO:
3552 case GOMP_MAP_TOFROM:
3553 case GOMP_MAP_FORCE_TO:
3554 case GOMP_MAP_FORCE_TOFROM:
3555 case GOMP_MAP_ALWAYS_TO:
3556 case GOMP_MAP_ALWAYS_TOFROM:
3557 case GOMP_MAP_FORCE_PRESENT:
3558 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3559 case GOMP_MAP_ALWAYS_PRESENT_TO:
3560 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3561 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3562 cdata[i].devaddr = devaddrs[i];
3563 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3564 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3565 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3566 devaddrs[i],
3567 devaddrs[i] + sizes[i], zero_len);
3568 if (j < i)
3570 n2 = NULL;
3571 cdata[i].present = true;
3572 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3574 else
3576 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3577 devaddrs[i],
3578 devaddrs[i] + sizes[i], zero_len);
3579 cdata[i].present = n2 != NULL;
3581 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3583 gomp_mutex_unlock (&devicep->lock);
3584 #ifdef HAVE_INTTYPES_H
3585 gomp_fatal ("present clause: no corresponding data on "
3586 "parent device at %p with size %"PRIu64,
3587 (void *) (uintptr_t) devaddrs[i],
3588 (uint64_t) sizes[i]);
3589 #else
3590 gomp_fatal ("present clause: no corresponding data on "
3591 "parent device at %p with size %lu",
3592 (void *) (uintptr_t) devaddrs[i],
3593 (unsigned long) sizes[i]);
3594 #endif
3595 break;
3597 else if (!cdata[i].present
3598 && kind != GOMP_MAP_DELETE
3599 && kind != GOMP_MAP_RELEASE
3600 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3602 cdata[i].aligned = true;
3603 size_t align = (size_t) 1 << (kinds[i] >> 8);
3604 devaddrs[i]
3605 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3606 sizes[i]);
3608 else if (n2 != NULL)
3609 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3610 - (n2->tgt->tgt_start + n2->tgt_offset));
3611 if (((!cdata[i].present || struct_cpy)
3612 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3613 || kind == GOMP_MAP_FORCE_TO
3614 || kind == GOMP_MAP_FORCE_TOFROM
3615 || GOMP_MAP_ALWAYS_TO_P (kind))
3617 gomp_copy_dev2host (devicep, aq,
3618 (void *) (uintptr_t) devaddrs[i],
3619 (void *) (uintptr_t) cdata[i].devaddr,
3620 sizes[i]);
3621 if (aq && !devicep->openacc.async.synchronize_func (aq))
3623 gomp_mutex_unlock (&devicep->lock);
3624 exit (EXIT_FAILURE);
3627 if (struct_cpy)
3628 struct_cpy--;
3629 break;
3630 case GOMP_MAP_ATTACH:
3631 case GOMP_MAP_POINTER:
3632 case GOMP_MAP_ALWAYS_POINTER:
3633 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3634 devaddrs[i] + sizes[i],
3635 devaddrs[i] + sizes[i]
3636 + sizeof (void*), false);
3637 cdata[i].present = n2 != NULL;
3638 cdata[i].devaddr = devaddrs[i];
3639 if (n2)
3640 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3641 - (n2->tgt->tgt_start + n2->tgt_offset));
3642 else
3644 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3645 devaddrs[i] + sizes[i],
3646 devaddrs[i] + sizes[i]
3647 + sizeof (void*), false);
3648 if (j < i)
3650 cdata[i].present = true;
3651 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3652 - cdata[j].devaddr);
3655 if (!cdata[i].present)
3656 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3657 /* Assume that when present, the pointer is already correct. */
3658 if (!n2)
3659 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3660 = devaddrs[i-1];
3661 break;
3662 case GOMP_MAP_TO_PSET:
3663 /* Assume that when present, the pointers are fine and no 'to:'
3664 is required. */
3665 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3666 devaddrs[i], devaddrs[i] + sizes[i],
3667 false);
3668 cdata[i].present = n2 != NULL;
3669 cdata[i].devaddr = devaddrs[i];
3670 if (n2)
3671 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3672 - (n2->tgt->tgt_start + n2->tgt_offset));
3673 else
3675 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3676 devaddrs[i],
3677 devaddrs[i] + sizes[i], false);
3678 if (j < i)
3680 cdata[i].present = true;
3681 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3682 - cdata[j].devaddr);
3685 if (!cdata[i].present)
3687 cdata[i].aligned = true;
3688 size_t align = (size_t) 1 << (kinds[i] >> 8);
3689 devaddrs[i]
3690 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3691 sizes[i]);
3692 gomp_copy_dev2host (devicep, aq,
3693 (void *) (uintptr_t) devaddrs[i],
3694 (void *) (uintptr_t) cdata[i].devaddr,
3695 sizes[i]);
3696 if (aq && !devicep->openacc.async.synchronize_func (aq))
3698 gomp_mutex_unlock (&devicep->lock);
3699 exit (EXIT_FAILURE);
3702 for (j = i + 1; j < mapnum; j++)
3704 kind = get_kind (short_mapkind, kinds, j) & typemask;
3705 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3706 && !GOMP_MAP_POINTER_P (kind))
3707 break;
3708 if (devaddrs[j] < devaddrs[i])
3709 break;
3710 if (cdata[i].present)
3711 continue;
3712 if (devaddrs[j] == 0)
3714 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3715 continue;
3717 int k;
3718 n2 = NULL;
3719 /* Dereference devaddrs[j] to get the device addr. */
3720 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3721 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3722 + sizes[j]);
3723 cdata[j].present = true;
3724 cdata[j].devaddr = devaddrs[j];
3725 if (devaddrs[j] == 0)
3726 continue;
3727 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3728 devaddrs[j],
3729 devaddrs[j] + sizeof (void*),
3730 false);
3731 if (k < j)
3732 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3733 - cdata[k].devaddr);
3734 else
3736 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3737 devaddrs[j],
3738 devaddrs[j] + sizeof (void*),
3739 false);
3740 if (n2 == NULL)
3742 gomp_mutex_unlock (&devicep->lock);
3743 gomp_fatal ("Pointer target wasn't mapped");
3745 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3746 - (n2->tgt->tgt_start + n2->tgt_offset));
3748 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3749 = (void *) (uintptr_t) devaddrs[j];
3751 i = j -1;
3752 break;
3753 case GOMP_MAP_STRUCT:
3754 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3755 devaddrs[i + sizes[i]]
3756 + sizes[i + sizes[i]], false);
3757 cdata[i].present = n2 != NULL;
3758 cdata[i].devaddr = devaddrs[i];
3759 struct_cpy = cdata[i].present ? 0 : sizes[i];
3760 if (!n2)
3762 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3763 - devaddrs[i+1]
3764 + sizes[i + sizes[i]]);
3765 size_t align = (size_t) 1 << (kinds[i] >> 8);
3766 cdata[i].aligned = true;
3767 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3768 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3770 else
3771 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3772 - (n2->tgt->tgt_start + n2->tgt_offset));
3773 break;
3774 default:
3775 gomp_mutex_unlock (&devicep->lock);
3776 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3779 gomp_mutex_unlock (&devicep->lock);
3782 host_fn (devaddrs);
3784 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3786 uint64_t struct_cpy = 0;
3787 bool clean_struct = false;
3788 for (uint64_t i = 0; i < mapnum; i++)
3790 if (cdata[i].devaddr == 0)
3791 continue;
3792 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3793 bool copy = !cdata[i].present || struct_cpy;
3794 switch (kind)
3796 case GOMP_MAP_FORCE_FROM:
3797 case GOMP_MAP_FORCE_TOFROM:
3798 case GOMP_MAP_ALWAYS_FROM:
3799 case GOMP_MAP_ALWAYS_TOFROM:
3800 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3801 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3802 copy = true;
3803 /* FALLTHRU */
3804 case GOMP_MAP_FROM:
3805 case GOMP_MAP_TOFROM:
3806 if (copy)
3808 gomp_copy_host2dev (devicep, aq,
3809 (void *) (uintptr_t) cdata[i].devaddr,
3810 (void *) (uintptr_t) devaddrs[i],
3811 sizes[i], false, NULL);
3812 if (aq && !devicep->openacc.async.synchronize_func (aq))
3813 exit (EXIT_FAILURE);
3815 default:
3816 break;
3818 if (struct_cpy)
3820 struct_cpy--;
3821 continue;
3823 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3825 clean_struct = true;
3826 struct_cpy = sizes[i];
3828 else if (!cdata[i].present && cdata[i].aligned)
3829 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3830 else if (!cdata[i].present)
3831 free ((void *) (uintptr_t) devaddrs[i]);
3833 if (clean_struct)
3834 for (uint64_t i = 0; i < mapnum; i++)
3835 if (!cdata[i].present
3836 && ((get_kind (short_mapkind, kinds, i) & typemask)
3837 == GOMP_MAP_STRUCT))
3839 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3840 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3843 free (devaddrs);
3844 free (sizes);
3845 free (kinds);
3849 /* Host fallback for GOMP_target_data{,_ext} routines. */
3851 static void
3852 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3854 struct gomp_task_icv *icv = gomp_icv (false);
3856 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3857 && devicep != NULL)
3858 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3859 "be used for offloading");
3861 if (icv->target_data)
3863 /* Even when doing a host fallback, if there are any active
3864 #pragma omp target data constructs, need to remember the
3865 new #pragma omp target data, otherwise GOMP_target_end_data
3866 would get out of sync. */
3867 struct target_mem_desc *tgt
3868 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3869 NULL, GOMP_MAP_VARS_DATA);
3870 tgt->prev = icv->target_data;
3871 icv->target_data = tgt;
3875 void
3876 GOMP_target_data (int device, const void *unused, size_t mapnum,
3877 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3879 struct gomp_device_descr *devicep = resolve_device (device, true);
3881 if (devicep == NULL
3882 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3883 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3884 return gomp_target_data_fallback (devicep);
3886 struct target_mem_desc *tgt
3887 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3888 NULL, GOMP_MAP_VARS_DATA);
3889 struct gomp_task_icv *icv = gomp_icv (true);
3890 tgt->prev = icv->target_data;
3891 icv->target_data = tgt;
3894 void
3895 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3896 size_t *sizes, unsigned short *kinds)
3898 struct gomp_device_descr *devicep = resolve_device (device, true);
3900 if (devicep == NULL
3901 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3902 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3903 return gomp_target_data_fallback (devicep);
3905 struct target_mem_desc *tgt
3906 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3907 NULL, GOMP_MAP_VARS_DATA);
3908 struct gomp_task_icv *icv = gomp_icv (true);
3909 tgt->prev = icv->target_data;
3910 icv->target_data = tgt;
3913 void
3914 GOMP_target_end_data (void)
3916 struct gomp_task_icv *icv = gomp_icv (false);
3917 if (icv->target_data)
3919 struct target_mem_desc *tgt = icv->target_data;
3920 icv->target_data = tgt->prev;
3921 gomp_unmap_vars (tgt, true, NULL);
3925 void
3926 GOMP_target_update (int device, const void *unused, size_t mapnum,
3927 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3929 struct gomp_device_descr *devicep = resolve_device (device, true);
3931 if (devicep == NULL
3932 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3933 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3934 return;
3936 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3939 void
3940 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3941 size_t *sizes, unsigned short *kinds,
3942 unsigned int flags, void **depend)
3944 struct gomp_device_descr *devicep = resolve_device (device, true);
3946 /* If there are depend clauses, but nowait is not present,
3947 block the parent task until the dependencies are resolved
3948 and then just continue with the rest of the function as if it
3949 is a merged task. Until we are able to schedule task during
3950 variable mapping or unmapping, ignore nowait if depend clauses
3951 are not present. */
3952 if (depend != NULL)
3954 struct gomp_thread *thr = gomp_thread ();
3955 if (thr->task && thr->task->depend_hash)
3957 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3958 && thr->ts.team
3959 && !thr->task->final_task)
3961 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3962 mapnum, hostaddrs, sizes, kinds,
3963 flags | GOMP_TARGET_FLAG_UPDATE,
3964 depend, NULL, GOMP_TARGET_TASK_DATA))
3965 return;
3967 else
3969 struct gomp_team *team = thr->ts.team;
3970 /* If parallel or taskgroup has been cancelled, don't start new
3971 tasks. */
3972 if (__builtin_expect (gomp_cancel_var, 0) && team)
3974 if (gomp_team_barrier_cancelled (&team->barrier))
3975 return;
3976 if (thr->task->taskgroup)
3978 if (thr->task->taskgroup->cancelled)
3979 return;
3980 if (thr->task->taskgroup->workshare
3981 && thr->task->taskgroup->prev
3982 && thr->task->taskgroup->prev->cancelled)
3983 return;
3987 gomp_task_maybe_wait_for_dependencies (depend);
3992 if (devicep == NULL
3993 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3994 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3995 return;
3997 struct gomp_thread *thr = gomp_thread ();
3998 struct gomp_team *team = thr->ts.team;
3999 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4000 if (__builtin_expect (gomp_cancel_var, 0) && team)
4002 if (gomp_team_barrier_cancelled (&team->barrier))
4003 return;
4004 if (thr->task->taskgroup)
4006 if (thr->task->taskgroup->cancelled)
4007 return;
4008 if (thr->task->taskgroup->workshare
4009 && thr->task->taskgroup->prev
4010 && thr->task->taskgroup->prev->cancelled)
4011 return;
4015 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4018 static void
4019 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4020 void **hostaddrs, size_t *sizes, unsigned short *kinds,
4021 htab_t *refcount_set)
4023 const int typemask = 0xff;
4024 size_t i;
4025 gomp_mutex_lock (&devicep->lock);
4026 if (devicep->state == GOMP_DEVICE_FINALIZED)
4028 gomp_mutex_unlock (&devicep->lock);
4029 return;
4032 for (i = 0; i < mapnum; i++)
4033 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4035 struct splay_tree_key_s cur_node;
4036 cur_node.host_start = (uintptr_t) hostaddrs[i];
4037 cur_node.host_end = cur_node.host_start + sizeof (void *);
4038 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4040 if (n)
4041 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4042 false, NULL);
4045 int nrmvars = 0;
4046 splay_tree_key remove_vars[mapnum];
4048 for (i = 0; i < mapnum; i++)
4050 struct splay_tree_key_s cur_node;
4051 unsigned char kind = kinds[i] & typemask;
4052 switch (kind)
4054 case GOMP_MAP_FROM:
4055 case GOMP_MAP_ALWAYS_FROM:
4056 case GOMP_MAP_DELETE:
4057 case GOMP_MAP_RELEASE:
4058 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4059 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4060 cur_node.host_start = (uintptr_t) hostaddrs[i];
4061 cur_node.host_end = cur_node.host_start + sizes[i];
4062 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4063 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4064 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4065 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4066 if (!k)
4067 continue;
4069 bool delete_p = (kind == GOMP_MAP_DELETE
4070 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4071 bool do_copy, do_remove;
4072 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4073 &do_remove);
4075 if ((kind == GOMP_MAP_FROM && do_copy)
4076 || kind == GOMP_MAP_ALWAYS_FROM)
4078 if (k->aux && k->aux->attach_count)
4080 /* We have to be careful not to overwrite still attached
4081 pointers during the copyback to host. */
4082 uintptr_t addr = k->host_start;
4083 while (addr < k->host_end)
4085 size_t i = (addr - k->host_start) / sizeof (void *);
4086 if (k->aux->attach_count[i] == 0)
4087 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4088 (void *) (k->tgt->tgt_start
4089 + k->tgt_offset
4090 + addr - k->host_start),
4091 sizeof (void *));
4092 addr += sizeof (void *);
4095 else
4096 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4097 (void *) (k->tgt->tgt_start + k->tgt_offset
4098 + cur_node.host_start
4099 - k->host_start),
4100 cur_node.host_end - cur_node.host_start);
4103 /* Structure elements lists are removed altogether at once, which
4104 may cause immediate deallocation of the target_mem_desc, causing
4105 errors if we still have following element siblings to copy back.
4106 While we're at it, it also seems more disciplined to simply
4107 queue all removals together for processing below.
4109 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4110 not have this problem, since they maintain an additional
4111 tgt->refcount = 1 reference to the target_mem_desc to start with.
4113 if (do_remove)
4114 remove_vars[nrmvars++] = k;
4115 break;
4117 case GOMP_MAP_DETACH:
4118 break;
4119 default:
4120 gomp_mutex_unlock (&devicep->lock);
4121 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4122 kind);
4126 for (int i = 0; i < nrmvars; i++)
4127 gomp_remove_var (devicep, remove_vars[i]);
4129 gomp_mutex_unlock (&devicep->lock);
4132 void
4133 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4134 size_t *sizes, unsigned short *kinds,
4135 unsigned int flags, void **depend)
4137 struct gomp_device_descr *devicep = resolve_device (device, true);
4139 /* If there are depend clauses, but nowait is not present,
4140 block the parent task until the dependencies are resolved
4141 and then just continue with the rest of the function as if it
4142 is a merged task. Until we are able to schedule task during
4143 variable mapping or unmapping, ignore nowait if depend clauses
4144 are not present. */
4145 if (depend != NULL)
4147 struct gomp_thread *thr = gomp_thread ();
4148 if (thr->task && thr->task->depend_hash)
4150 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4151 && thr->ts.team
4152 && !thr->task->final_task)
4154 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4155 mapnum, hostaddrs, sizes, kinds,
4156 flags, depend, NULL,
4157 GOMP_TARGET_TASK_DATA))
4158 return;
4160 else
4162 struct gomp_team *team = thr->ts.team;
4163 /* If parallel or taskgroup has been cancelled, don't start new
4164 tasks. */
4165 if (__builtin_expect (gomp_cancel_var, 0) && team)
4167 if (gomp_team_barrier_cancelled (&team->barrier))
4168 return;
4169 if (thr->task->taskgroup)
4171 if (thr->task->taskgroup->cancelled)
4172 return;
4173 if (thr->task->taskgroup->workshare
4174 && thr->task->taskgroup->prev
4175 && thr->task->taskgroup->prev->cancelled)
4176 return;
4180 gomp_task_maybe_wait_for_dependencies (depend);
4185 if (devicep == NULL
4186 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4187 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4188 return;
4190 struct gomp_thread *thr = gomp_thread ();
4191 struct gomp_team *team = thr->ts.team;
4192 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4193 if (__builtin_expect (gomp_cancel_var, 0) && team)
4195 if (gomp_team_barrier_cancelled (&team->barrier))
4196 return;
4197 if (thr->task->taskgroup)
4199 if (thr->task->taskgroup->cancelled)
4200 return;
4201 if (thr->task->taskgroup->workshare
4202 && thr->task->taskgroup->prev
4203 && thr->task->taskgroup->prev->cancelled)
4204 return;
4208 htab_t refcount_set = htab_create (mapnum);
4210 /* The variables are mapped separately such that they can be released
4211 independently. */
4212 size_t i, j;
4213 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4214 for (i = 0; i < mapnum; i++)
4215 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
4216 || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4218 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4219 &kinds[i], true, &refcount_set,
4220 GOMP_MAP_VARS_ENTER_DATA);
4221 i += sizes[i];
4223 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4225 for (j = i + 1; j < mapnum; j++)
4226 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4227 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4228 break;
4229 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4230 &kinds[i], true, &refcount_set,
4231 GOMP_MAP_VARS_ENTER_DATA);
4232 i += j - i - 1;
4234 else if (i + 1 < mapnum
4235 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4236 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4237 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4239 /* An attach operation must be processed together with the mapped
4240 base-pointer list item. */
4241 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4242 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4243 i += 1;
4245 else
4246 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4247 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4248 else
4249 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4250 htab_free (refcount_set);
4253 bool
4254 gomp_target_task_fn (void *data)
4256 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4257 struct gomp_device_descr *devicep = ttask->devicep;
4259 if (ttask->fn != NULL)
4261 void *fn_addr;
4262 if (devicep == NULL
4263 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4264 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4265 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4267 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4268 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4269 ttask->args);
4270 return false;
4273 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4275 if (ttask->tgt)
4276 gomp_unmap_vars (ttask->tgt, true, NULL);
4277 return false;
4280 void *actual_arguments;
4281 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4283 ttask->tgt = NULL;
4284 actual_arguments = ttask->hostaddrs;
4286 else
4288 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4289 NULL, ttask->sizes, ttask->kinds, true,
4290 NULL, GOMP_MAP_VARS_TARGET);
4291 actual_arguments = (void *) ttask->tgt->tgt_start;
4293 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4295 assert (devicep->async_run_func);
4296 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4297 ttask->args, (void *) ttask);
4298 return true;
4300 else if (devicep == NULL
4301 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4302 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4303 return false;
4305 size_t i;
4306 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4307 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4308 ttask->kinds, true);
4309 else
4311 htab_t refcount_set = htab_create (ttask->mapnum);
4312 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4313 for (i = 0; i < ttask->mapnum; i++)
4314 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
4315 || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4317 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4318 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4319 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4320 i += ttask->sizes[i];
4322 else
4323 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4324 &ttask->kinds[i], true, &refcount_set,
4325 GOMP_MAP_VARS_ENTER_DATA);
4326 else
4327 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4328 ttask->kinds, &refcount_set);
4329 htab_free (refcount_set);
4331 return false;
4334 void
4335 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4337 if (thread_limit)
4339 struct gomp_task_icv *icv = gomp_icv (true);
4340 icv->thread_limit_var
4341 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4343 (void) num_teams;
4346 bool
4347 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4348 unsigned int thread_limit, bool first)
4350 struct gomp_thread *thr = gomp_thread ();
4351 if (first)
4353 if (thread_limit)
4355 struct gomp_task_icv *icv = gomp_icv (true);
4356 icv->thread_limit_var
4357 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4359 (void) num_teams_high;
4360 if (num_teams_low == 0)
4361 num_teams_low = 1;
4362 thr->num_teams = num_teams_low - 1;
4363 thr->team_num = 0;
4365 else if (thr->team_num == thr->num_teams)
4366 return false;
4367 else
4368 ++thr->team_num;
4369 return true;
4372 void *
4373 omp_target_alloc (size_t size, int device_num)
4375 if (device_num == omp_initial_device
4376 || device_num == gomp_get_num_devices ())
4377 return malloc (size);
4379 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4380 if (devicep == NULL)
4381 return NULL;
4383 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4384 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4385 return malloc (size);
4387 gomp_mutex_lock (&devicep->lock);
4388 void *ret = devicep->alloc_func (devicep->target_id, size);
4389 gomp_mutex_unlock (&devicep->lock);
4390 return ret;
4393 void
4394 omp_target_free (void *device_ptr, int device_num)
4396 if (device_num == omp_initial_device
4397 || device_num == gomp_get_num_devices ())
4399 free (device_ptr);
4400 return;
4403 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4404 if (devicep == NULL || device_ptr == NULL)
4405 return;
4407 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4408 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4410 free (device_ptr);
4411 return;
4414 gomp_mutex_lock (&devicep->lock);
4415 gomp_free_device_memory (devicep, device_ptr);
4416 gomp_mutex_unlock (&devicep->lock);
4420 omp_target_is_present (const void *ptr, int device_num)
4422 if (device_num == omp_initial_device
4423 || device_num == gomp_get_num_devices ())
4424 return 1;
4426 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4427 if (devicep == NULL)
4428 return 0;
4430 if (ptr == NULL)
4431 return 1;
4433 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4434 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4435 return 1;
4437 gomp_mutex_lock (&devicep->lock);
4438 struct splay_tree_s *mem_map = &devicep->mem_map;
4439 struct splay_tree_key_s cur_node;
4441 cur_node.host_start = (uintptr_t) ptr;
4442 cur_node.host_end = cur_node.host_start;
4443 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4444 int ret = n != NULL;
4445 gomp_mutex_unlock (&devicep->lock);
4446 return ret;
4449 static int
4450 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4451 struct gomp_device_descr **dst_devicep,
4452 struct gomp_device_descr **src_devicep)
4454 if (dst_device_num != gomp_get_num_devices ()
4455 /* Above gomp_get_num_devices has to be called unconditionally. */
4456 && dst_device_num != omp_initial_device)
4458 *dst_devicep = resolve_device (dst_device_num, false);
4459 if (*dst_devicep == NULL)
4460 return EINVAL;
4462 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4463 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4464 *dst_devicep = NULL;
4467 if (src_device_num != num_devices_openmp
4468 && src_device_num != omp_initial_device)
4470 *src_devicep = resolve_device (src_device_num, false);
4471 if (*src_devicep == NULL)
4472 return EINVAL;
4474 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4475 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4476 *src_devicep = NULL;
4479 return 0;
4482 static int
4483 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4484 size_t dst_offset, size_t src_offset,
4485 struct gomp_device_descr *dst_devicep,
4486 struct gomp_device_descr *src_devicep)
4488 bool ret;
4489 if (src_devicep == NULL && dst_devicep == NULL)
4491 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4492 return 0;
4494 if (src_devicep == NULL)
4496 gomp_mutex_lock (&dst_devicep->lock);
4497 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4498 (char *) dst + dst_offset,
4499 (char *) src + src_offset, length);
4500 gomp_mutex_unlock (&dst_devicep->lock);
4501 return (ret ? 0 : EINVAL);
4503 if (dst_devicep == NULL)
4505 gomp_mutex_lock (&src_devicep->lock);
4506 ret = src_devicep->dev2host_func (src_devicep->target_id,
4507 (char *) dst + dst_offset,
4508 (char *) src + src_offset, length);
4509 gomp_mutex_unlock (&src_devicep->lock);
4510 return (ret ? 0 : EINVAL);
4512 if (src_devicep == dst_devicep)
4514 gomp_mutex_lock (&src_devicep->lock);
4515 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4516 (char *) dst + dst_offset,
4517 (char *) src + src_offset, length);
4518 gomp_mutex_unlock (&src_devicep->lock);
4519 return (ret ? 0 : EINVAL);
4521 return EINVAL;
4525 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4526 size_t src_offset, int dst_device_num, int src_device_num)
4528 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4529 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4530 &dst_devicep, &src_devicep);
4532 if (ret)
4533 return ret;
4535 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4536 dst_devicep, src_devicep);
4538 return ret;
4541 typedef struct
4543 void *dst;
4544 const void *src;
4545 size_t length;
4546 size_t dst_offset;
4547 size_t src_offset;
4548 struct gomp_device_descr *dst_devicep;
4549 struct gomp_device_descr *src_devicep;
4550 } omp_target_memcpy_data;
4552 static void
4553 omp_target_memcpy_async_helper (void *args)
4555 omp_target_memcpy_data *a = args;
4556 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4557 a->src_offset, a->dst_devicep, a->src_devicep))
4558 gomp_fatal ("omp_target_memcpy failed");
4562 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4563 size_t dst_offset, size_t src_offset,
4564 int dst_device_num, int src_device_num,
4565 int depobj_count, omp_depend_t *depobj_list)
4567 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4568 unsigned int flags = 0;
4569 void *depend[depobj_count + 5];
4570 int i;
4571 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4572 &dst_devicep, &src_devicep);
4574 omp_target_memcpy_data s = {
4575 .dst = dst,
4576 .src = src,
4577 .length = length,
4578 .dst_offset = dst_offset,
4579 .src_offset = src_offset,
4580 .dst_devicep = dst_devicep,
4581 .src_devicep = src_devicep
4584 if (check)
4585 return check;
4587 if (depobj_count > 0 && depobj_list != NULL)
4589 flags |= GOMP_TASK_FLAG_DEPEND;
4590 depend[0] = 0;
4591 depend[1] = (void *) (uintptr_t) depobj_count;
4592 depend[2] = depend[3] = depend[4] = 0;
4593 for (i = 0; i < depobj_count; ++i)
4594 depend[i + 5] = &depobj_list[i];
4597 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4598 __alignof__ (s), true, flags, depend, 0, NULL);
4600 return 0;
4603 static int
4604 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4605 int num_dims, const size_t *volume,
4606 const size_t *dst_offsets,
4607 const size_t *src_offsets,
4608 const size_t *dst_dimensions,
4609 const size_t *src_dimensions,
4610 struct gomp_device_descr *dst_devicep,
4611 struct gomp_device_descr *src_devicep,
4612 size_t *tmp_size, void **tmp)
4614 size_t dst_slice = element_size;
4615 size_t src_slice = element_size;
4616 size_t j, dst_off, src_off, length;
4617 int i, ret;
4619 if (num_dims == 1)
4621 if (__builtin_mul_overflow (element_size, volume[0], &length)
4622 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4623 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4624 return EINVAL;
4625 if (dst_devicep == NULL && src_devicep == NULL)
4627 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4628 length);
4629 ret = 1;
4631 else if (src_devicep == NULL)
4632 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4633 (char *) dst + dst_off,
4634 (const char *) src + src_off,
4635 length);
4636 else if (dst_devicep == NULL)
4637 ret = src_devicep->dev2host_func (src_devicep->target_id,
4638 (char *) dst + dst_off,
4639 (const char *) src + src_off,
4640 length);
4641 else if (src_devicep == dst_devicep)
4642 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4643 (char *) dst + dst_off,
4644 (const char *) src + src_off,
4645 length);
4646 else
4648 if (*tmp_size == 0)
4650 *tmp_size = length;
4651 *tmp = malloc (length);
4652 if (*tmp == NULL)
4653 return ENOMEM;
4655 else if (*tmp_size < length)
4657 *tmp_size = length;
4658 free (*tmp);
4659 *tmp = malloc (length);
4660 if (*tmp == NULL)
4661 return ENOMEM;
4663 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4664 (const char *) src + src_off,
4665 length);
4666 if (ret == 1)
4667 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4668 (char *) dst + dst_off, *tmp,
4669 length);
4671 return ret ? 0 : EINVAL;
4674 /* host->device, device->host and intra device. */
4675 if (num_dims == 2
4676 && ((src_devicep
4677 && src_devicep == dst_devicep
4678 && src_devicep->memcpy2d_func)
4679 || (!src_devicep != !dst_devicep
4680 && ((src_devicep && src_devicep->memcpy2d_func)
4681 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4683 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4684 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4685 int src_id = src_devicep ? src_devicep->target_id : -1;
4686 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4688 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4689 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4690 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4691 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4692 || __builtin_mul_overflow (src_offsets[1], element_size,
4693 &src_off_sz1))
4694 return EINVAL;
4695 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4696 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4697 src, src_off_sz1, src_offsets[0], src_sz1);
4698 if (ret != -1)
4699 return ret ? 0 : EINVAL;
4701 else if (num_dims == 3
4702 && ((src_devicep
4703 && src_devicep == dst_devicep
4704 && src_devicep->memcpy3d_func)
4705 || (!src_devicep != !dst_devicep
4706 && ((src_devicep && src_devicep->memcpy3d_func)
4707 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4709 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4710 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4711 int src_id = src_devicep ? src_devicep->target_id : -1;
4712 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4714 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4715 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4716 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4717 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4718 || __builtin_mul_overflow (src_offsets[2], element_size,
4719 &src_off_sz2))
4720 return EINVAL;
4721 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4722 dst, dst_off_sz2, dst_offsets[1],
4723 dst_offsets[0], dst_sz2, dst_dimensions[1],
4724 src, src_off_sz2, src_offsets[1],
4725 src_offsets[0], src_sz2, src_dimensions[1]);
4726 if (ret != -1)
4727 return ret ? 0 : EINVAL;
4730 for (i = 1; i < num_dims; i++)
4731 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4732 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4733 return EINVAL;
4734 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4735 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4736 return EINVAL;
4737 for (j = 0; j < volume[0]; j++)
4739 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4740 (const char *) src + src_off,
4741 element_size, num_dims - 1,
4742 volume + 1, dst_offsets + 1,
4743 src_offsets + 1, dst_dimensions + 1,
4744 src_dimensions + 1, dst_devicep,
4745 src_devicep, tmp_size, tmp);
4746 if (ret)
4747 return ret;
4748 dst_off += dst_slice;
4749 src_off += src_slice;
4751 return 0;
4754 static int
4755 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4756 int src_device_num,
4757 struct gomp_device_descr **dst_devicep,
4758 struct gomp_device_descr **src_devicep)
4760 if (!dst && !src)
4761 return INT_MAX;
4763 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4764 dst_devicep, src_devicep);
4765 if (ret)
4766 return ret;
4768 return 0;
4771 static int
4772 omp_target_memcpy_rect_copy (void *dst, const void *src,
4773 size_t element_size, int num_dims,
4774 const size_t *volume, const size_t *dst_offsets,
4775 const size_t *src_offsets,
4776 const size_t *dst_dimensions,
4777 const size_t *src_dimensions,
4778 struct gomp_device_descr *dst_devicep,
4779 struct gomp_device_descr *src_devicep)
4781 size_t tmp_size = 0;
4782 void *tmp = NULL;
4783 bool lock_src;
4784 bool lock_dst;
4786 lock_src = src_devicep != NULL;
4787 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4788 if (lock_src)
4789 gomp_mutex_lock (&src_devicep->lock);
4790 if (lock_dst)
4791 gomp_mutex_lock (&dst_devicep->lock);
4792 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4793 volume, dst_offsets, src_offsets,
4794 dst_dimensions, src_dimensions,
4795 dst_devicep, src_devicep,
4796 &tmp_size, &tmp);
4797 if (lock_src)
4798 gomp_mutex_unlock (&src_devicep->lock);
4799 if (lock_dst)
4800 gomp_mutex_unlock (&dst_devicep->lock);
4801 if (tmp)
4802 free (tmp);
4804 return ret;
4808 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4809 int num_dims, const size_t *volume,
4810 const size_t *dst_offsets,
4811 const size_t *src_offsets,
4812 const size_t *dst_dimensions,
4813 const size_t *src_dimensions,
4814 int dst_device_num, int src_device_num)
4816 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4818 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4819 src_device_num, &dst_devicep,
4820 &src_devicep);
4822 if (check)
4823 return check;
4825 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4826 volume, dst_offsets, src_offsets,
4827 dst_dimensions, src_dimensions,
4828 dst_devicep, src_devicep);
4830 return ret;
4833 typedef struct
4835 void *dst;
4836 const void *src;
4837 size_t element_size;
4838 const size_t *volume;
4839 const size_t *dst_offsets;
4840 const size_t *src_offsets;
4841 const size_t *dst_dimensions;
4842 const size_t *src_dimensions;
4843 struct gomp_device_descr *dst_devicep;
4844 struct gomp_device_descr *src_devicep;
4845 int num_dims;
4846 } omp_target_memcpy_rect_data;
4848 static void
4849 omp_target_memcpy_rect_async_helper (void *args)
4851 omp_target_memcpy_rect_data *a = args;
4852 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4853 a->num_dims, a->volume, a->dst_offsets,
4854 a->src_offsets, a->dst_dimensions,
4855 a->src_dimensions, a->dst_devicep,
4856 a->src_devicep);
4857 if (ret)
4858 gomp_fatal ("omp_target_memcpy_rect failed");
4862 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4863 int num_dims, const size_t *volume,
4864 const size_t *dst_offsets,
4865 const size_t *src_offsets,
4866 const size_t *dst_dimensions,
4867 const size_t *src_dimensions,
4868 int dst_device_num, int src_device_num,
4869 int depobj_count, omp_depend_t *depobj_list)
4871 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4872 unsigned flags = 0;
4873 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4874 src_device_num, &dst_devicep,
4875 &src_devicep);
4876 void *depend[depobj_count + 5];
4877 int i;
4879 omp_target_memcpy_rect_data s = {
4880 .dst = dst,
4881 .src = src,
4882 .element_size = element_size,
4883 .num_dims = num_dims,
4884 .volume = volume,
4885 .dst_offsets = dst_offsets,
4886 .src_offsets = src_offsets,
4887 .dst_dimensions = dst_dimensions,
4888 .src_dimensions = src_dimensions,
4889 .dst_devicep = dst_devicep,
4890 .src_devicep = src_devicep
4893 if (check)
4894 return check;
4896 if (depobj_count > 0 && depobj_list != NULL)
4898 flags |= GOMP_TASK_FLAG_DEPEND;
4899 depend[0] = 0;
4900 depend[1] = (void *) (uintptr_t) depobj_count;
4901 depend[2] = depend[3] = depend[4] = 0;
4902 for (i = 0; i < depobj_count; ++i)
4903 depend[i + 5] = &depobj_list[i];
4906 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4907 __alignof__ (s), true, flags, depend, 0, NULL);
4909 return 0;
4913 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4914 size_t size, size_t device_offset, int device_num)
4916 if (device_num == omp_initial_device
4917 || device_num == gomp_get_num_devices ())
4918 return EINVAL;
4920 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4921 if (devicep == NULL)
4922 return EINVAL;
4924 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4925 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4926 return EINVAL;
4928 gomp_mutex_lock (&devicep->lock);
4930 struct splay_tree_s *mem_map = &devicep->mem_map;
4931 struct splay_tree_key_s cur_node;
4932 int ret = EINVAL;
4934 cur_node.host_start = (uintptr_t) host_ptr;
4935 cur_node.host_end = cur_node.host_start + size;
4936 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4937 if (n)
4939 if (n->tgt->tgt_start + n->tgt_offset
4940 == (uintptr_t) device_ptr + device_offset
4941 && n->host_start <= cur_node.host_start
4942 && n->host_end >= cur_node.host_end)
4943 ret = 0;
4945 else
4947 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4948 tgt->array = gomp_malloc (sizeof (*tgt->array));
4949 tgt->refcount = 1;
4950 tgt->tgt_start = 0;
4951 tgt->tgt_end = 0;
4952 tgt->to_free = NULL;
4953 tgt->prev = NULL;
4954 tgt->list_count = 0;
4955 tgt->device_descr = devicep;
4956 splay_tree_node array = tgt->array;
4957 splay_tree_key k = &array->key;
4958 k->host_start = cur_node.host_start;
4959 k->host_end = cur_node.host_end;
4960 k->tgt = tgt;
4961 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4962 k->refcount = REFCOUNT_INFINITY;
4963 k->dynamic_refcount = 0;
4964 k->aux = NULL;
4965 array->left = NULL;
4966 array->right = NULL;
4967 splay_tree_insert (&devicep->mem_map, array);
4968 ret = 0;
4970 gomp_mutex_unlock (&devicep->lock);
4971 return ret;
4975 omp_target_disassociate_ptr (const void *ptr, int device_num)
4977 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4978 if (devicep == NULL)
4979 return EINVAL;
4981 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4982 return EINVAL;
4984 gomp_mutex_lock (&devicep->lock);
4986 struct splay_tree_s *mem_map = &devicep->mem_map;
4987 struct splay_tree_key_s cur_node;
4988 int ret = EINVAL;
4990 cur_node.host_start = (uintptr_t) ptr;
4991 cur_node.host_end = cur_node.host_start;
4992 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4993 if (n
4994 && n->host_start == cur_node.host_start
4995 && n->refcount == REFCOUNT_INFINITY
4996 && n->tgt->tgt_start == 0
4997 && n->tgt->to_free == NULL
4998 && n->tgt->refcount == 1
4999 && n->tgt->list_count == 0)
5001 splay_tree_remove (&devicep->mem_map, n);
5002 gomp_unmap_tgt (n->tgt);
5003 ret = 0;
5006 gomp_mutex_unlock (&devicep->lock);
5007 return ret;
5010 void *
5011 omp_get_mapped_ptr (const void *ptr, int device_num)
5013 if (device_num == omp_initial_device
5014 || device_num == omp_get_initial_device ())
5015 return (void *) ptr;
5017 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5018 if (devicep == NULL)
5019 return NULL;
5021 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5022 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5023 return (void *) ptr;
5025 gomp_mutex_lock (&devicep->lock);
5027 struct splay_tree_s *mem_map = &devicep->mem_map;
5028 struct splay_tree_key_s cur_node;
5029 void *ret = NULL;
5031 cur_node.host_start = (uintptr_t) ptr;
5032 cur_node.host_end = cur_node.host_start;
5033 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5035 if (n)
5037 uintptr_t offset = cur_node.host_start - n->host_start;
5038 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5041 gomp_mutex_unlock (&devicep->lock);
5043 return ret;
5047 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5049 if (device_num == omp_initial_device
5050 || device_num == gomp_get_num_devices ())
5051 return true;
5053 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5054 if (devicep == NULL)
5055 return false;
5057 /* TODO: Unified shared memory must be handled when available. */
5059 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
5063 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5065 (void) kind;
5066 if (device_num == omp_initial_device
5067 || device_num == gomp_get_num_devices ())
5068 return gomp_pause_host ();
5070 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5071 if (devicep == NULL)
5072 return -1;
5074 /* Do nothing for target devices for now. */
5075 return 0;
5079 omp_pause_resource_all (omp_pause_resource_t kind)
5081 (void) kind;
5082 if (gomp_pause_host ())
5083 return -1;
5084 /* Do nothing for target devices for now. */
5085 return 0;
5088 ialias (omp_pause_resource)
5089 ialias (omp_pause_resource_all)
5091 #ifdef PLUGIN_SUPPORT
5093 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5094 in PLUGIN_NAME.
5095 The handles of the found functions are stored in the corresponding fields
5096 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5098 static bool
5099 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5100 const char *plugin_name)
5102 const char *err = NULL, *last_missing = NULL;
5104 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5105 if (!plugin_handle)
5106 #if OFFLOAD_DEFAULTED
5107 return 0;
5108 #else
5109 goto dl_fail;
5110 #endif
5112 /* Check if all required functions are available in the plugin and store
5113 their handlers. None of the symbols can legitimately be NULL,
5114 so we don't need to check dlerror all the time. */
5115 #define DLSYM(f) \
5116 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5117 goto dl_fail
5118 /* Similar, but missing functions are not an error. Return false if
5119 failed, true otherwise. */
5120 #define DLSYM_OPT(f, n) \
5121 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5122 || (last_missing = #n, 0))
5124 DLSYM (version);
5125 if (device->version_func () != GOMP_VERSION)
5127 err = "plugin version mismatch";
5128 goto fail;
5131 DLSYM (get_name);
5132 DLSYM (get_caps);
5133 DLSYM (get_type);
5134 DLSYM (get_num_devices);
5135 DLSYM (init_device);
5136 DLSYM (fini_device);
5137 DLSYM (load_image);
5138 DLSYM (unload_image);
5139 DLSYM (alloc);
5140 DLSYM (free);
5141 DLSYM (dev2host);
5142 DLSYM (host2dev);
5143 DLSYM_OPT (memcpy2d, memcpy2d);
5144 DLSYM_OPT (memcpy3d, memcpy3d);
5145 device->capabilities = device->get_caps_func ();
5146 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5148 DLSYM (run);
5149 DLSYM_OPT (async_run, async_run);
5150 DLSYM_OPT (can_run, can_run);
5151 DLSYM (dev2dev);
5153 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5155 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5156 || !DLSYM_OPT (openacc.create_thread_data,
5157 openacc_create_thread_data)
5158 || !DLSYM_OPT (openacc.destroy_thread_data,
5159 openacc_destroy_thread_data)
5160 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5161 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5162 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5163 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5164 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5165 || !DLSYM_OPT (openacc.async.queue_callback,
5166 openacc_async_queue_callback)
5167 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5168 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5169 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5170 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5172 /* Require all the OpenACC handlers if we have
5173 GOMP_OFFLOAD_CAP_OPENACC_200. */
5174 err = "plugin missing OpenACC handler function";
5175 goto fail;
5178 unsigned cuda = 0;
5179 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5180 openacc_cuda_get_current_device);
5181 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5182 openacc_cuda_get_current_context);
5183 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5184 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5185 if (cuda && cuda != 4)
5187 /* Make sure all the CUDA functions are there if any of them are. */
5188 err = "plugin missing OpenACC CUDA handler function";
5189 goto fail;
5192 #undef DLSYM
5193 #undef DLSYM_OPT
5195 return 1;
5197 dl_fail:
5198 err = dlerror ();
5199 fail:
5200 gomp_error ("while loading %s: %s", plugin_name, err);
5201 if (last_missing)
5202 gomp_error ("missing function was %s", last_missing);
5203 if (plugin_handle)
5204 dlclose (plugin_handle);
5206 return 0;
5209 /* This function finalizes all initialized devices. */
5211 static void
5212 gomp_target_fini (void)
5214 int i;
5215 for (i = 0; i < num_devices; i++)
5217 bool ret = true;
5218 struct gomp_device_descr *devicep = &devices[i];
5219 gomp_mutex_lock (&devicep->lock);
5220 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5221 ret = gomp_fini_device (devicep);
5222 gomp_mutex_unlock (&devicep->lock);
5223 if (!ret)
5224 gomp_fatal ("device finalization failed");
5228 /* This function initializes the runtime for offloading.
5229 It parses the list of offload plugins, and tries to load these.
5230 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5231 will be set, and the array DEVICES initialized, containing descriptors for
5232 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5233 by the others. */
5235 static void
5236 gomp_target_init (void)
5238 const char *prefix ="libgomp-plugin-";
5239 const char *suffix = SONAME_SUFFIX (1);
5240 const char *cur, *next;
5241 char *plugin_name;
5242 int i, new_num_devs;
5243 int num_devs = 0, num_devs_openmp;
5244 struct gomp_device_descr *devs = NULL;
5246 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5247 return;
5249 cur = OFFLOAD_PLUGINS;
5250 if (*cur)
5253 struct gomp_device_descr current_device;
5254 size_t prefix_len, suffix_len, cur_len;
5256 next = strchr (cur, ',');
5258 prefix_len = strlen (prefix);
5259 cur_len = next ? next - cur : strlen (cur);
5260 suffix_len = strlen (suffix);
5262 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5263 if (!plugin_name)
5265 num_devs = 0;
5266 break;
5269 memcpy (plugin_name, prefix, prefix_len);
5270 memcpy (plugin_name + prefix_len, cur, cur_len);
5271 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5273 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5275 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5276 new_num_devs = current_device.get_num_devices_func (omp_req);
5277 if (gomp_debug_var > 0 && new_num_devs < 0)
5279 bool found = false;
5280 int type = current_device.get_type_func ();
5281 for (int img = 0; img < num_offload_images; img++)
5282 if (type == offload_images[img].type)
5283 found = true;
5284 if (found)
5286 char buf[sizeof ("unified_address, unified_shared_memory, "
5287 "reverse_offload")];
5288 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5289 char *name = (char *) malloc (cur_len + 1);
5290 memcpy (name, cur, cur_len);
5291 name[cur_len] = '\0';
5292 gomp_debug (1,
5293 "%s devices present but 'omp requires %s' "
5294 "cannot be fulfilled\n", name, buf);
5295 free (name);
5298 else if (new_num_devs >= 1)
5300 /* Augment DEVICES and NUM_DEVICES. */
5302 devs = realloc (devs, (num_devs + new_num_devs)
5303 * sizeof (struct gomp_device_descr));
5304 if (!devs)
5306 num_devs = 0;
5307 free (plugin_name);
5308 break;
5311 current_device.name = current_device.get_name_func ();
5312 /* current_device.capabilities has already been set. */
5313 current_device.type = current_device.get_type_func ();
5314 current_device.mem_map.root = NULL;
5315 current_device.mem_map_rev.root = NULL;
5316 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5317 for (i = 0; i < new_num_devs; i++)
5319 current_device.target_id = i;
5320 devs[num_devs] = current_device;
5321 gomp_mutex_init (&devs[num_devs].lock);
5322 num_devs++;
5327 free (plugin_name);
5328 cur = next + 1;
5330 while (next);
5332 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5333 NUM_DEVICES_OPENMP. */
5334 struct gomp_device_descr *devs_s
5335 = malloc (num_devs * sizeof (struct gomp_device_descr));
5336 if (!devs_s)
5338 num_devs = 0;
5339 free (devs);
5340 devs = NULL;
5342 num_devs_openmp = 0;
5343 for (i = 0; i < num_devs; i++)
5344 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5345 devs_s[num_devs_openmp++] = devs[i];
5346 int num_devs_after_openmp = num_devs_openmp;
5347 for (i = 0; i < num_devs; i++)
5348 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5349 devs_s[num_devs_after_openmp++] = devs[i];
5350 free (devs);
5351 devs = devs_s;
5353 for (i = 0; i < num_devs; i++)
5355 /* The 'devices' array can be moved (by the realloc call) until we have
5356 found all the plugins, so registering with the OpenACC runtime (which
5357 takes a copy of the pointer argument) must be delayed until now. */
5358 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5359 goacc_register (&devs[i]);
5361 if (gomp_global_icv.default_device_var == INT_MIN)
5363 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5364 struct gomp_icv_list *none;
5365 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5366 gomp_global_icv.default_device_var = (num_devs_openmp
5367 ? 0 : omp_invalid_device);
5368 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5371 num_devices = num_devs;
5372 num_devices_openmp = num_devs_openmp;
5373 devices = devs;
5374 if (atexit (gomp_target_fini) != 0)
5375 gomp_fatal ("atexit failed");
5378 #else /* PLUGIN_SUPPORT */
5379 /* If dlfcn.h is unavailable we always fallback to host execution.
5380 GOMP_target* routines are just stubs for this case. */
5381 static void
5382 gomp_target_init (void)
5385 #endif /* PLUGIN_SUPPORT */