Skip gcc.dg/guality/example.c on hppa-linux.
[official-gcc.git] / libgomp / target.c
blob040acbfb7ed830b5439c7b70b6ea5529bf188bf0
1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 typedef uintptr_t *hash_entry_type;
48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
52 static inline hashval_t
53 htab_hash (hash_entry_type element)
55 return hash_pointer ((void *) element);
58 static inline bool
59 htab_eq (hash_entry_type x, hash_entry_type y)
61 return x == y;
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr {
78 unsigned version;
79 enum offload_target_type type;
80 const void *host_table;
81 const void *target_data;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr *offload_images;
87 /* Total number of offload images. */
88 static int num_offload_images;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr *devices;
93 /* Total number of available devices. */
94 static int num_devices;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
101 static void *
102 gomp_realloc_unlock (void *old, size_t size)
104 void *ret = realloc (old, size);
105 if (ret == NULL)
107 gomp_mutex_unlock (&register_lock);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
110 return ret;
113 attribute_hidden void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
119 attribute_hidden int
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp;
126 static struct gomp_device_descr *
127 resolve_device (int device_id)
129 if (device_id == GOMP_DEVICE_ICV)
131 struct gomp_task_icv *icv = gomp_icv (false);
132 device_id = icv->default_device_var;
135 if (device_id < 0 || device_id >= gomp_get_num_devices ())
137 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id != GOMP_DEVICE_HOST_FALLBACK
139 && device_id != num_devices_openmp)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
143 return NULL;
146 gomp_mutex_lock (&devices[device_id].lock);
147 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148 gomp_init_device (&devices[device_id]);
149 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
151 gomp_mutex_unlock (&devices[device_id].lock);
153 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
157 return NULL;
159 gomp_mutex_unlock (&devices[device_id].lock);
161 return &devices[device_id];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
168 if (key->host_start != key->host_end)
169 return splay_tree_lookup (mem_map, key);
171 key->host_end++;
172 splay_tree_key n = splay_tree_lookup (mem_map, key);
173 key->host_end--;
174 if (n)
175 return n;
176 key->host_start--;
177 n = splay_tree_lookup (mem_map, key);
178 key->host_start++;
179 if (n)
180 return n;
181 return splay_tree_lookup (mem_map, key);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
187 if (key->host_start != key->host_end)
188 return splay_tree_lookup (mem_map, key);
190 key->host_end++;
191 splay_tree_key n = splay_tree_lookup (mem_map, key);
192 key->host_end--;
193 return n;
196 static inline void
197 gomp_device_copy (struct gomp_device_descr *devicep,
198 bool (*copy_func) (int, void *, const void *, size_t),
199 const char *dst, void *dstaddr,
200 const char *src, const void *srcaddr,
201 size_t size)
203 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
205 gomp_mutex_unlock (&devicep->lock);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
211 static inline void
212 goacc_device_copy_async (struct gomp_device_descr *devicep,
213 bool (*copy_func) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue *),
215 const char *dst, void *dstaddr,
216 const char *src, const void *srcaddr,
217 const void *srcaddr_orig,
218 size_t size, struct goacc_asyncqueue *aq)
220 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
222 gomp_mutex_unlock (&devicep->lock);
223 if (srcaddr_orig && srcaddr_orig != srcaddr)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src, srcaddr_orig, srcaddr_orig + size,
228 src, srcaddr, srcaddr + size,
229 dst, dstaddr, dstaddr + size);
230 else
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src, srcaddr, srcaddr + size,
234 dst, dstaddr, dstaddr + size);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
244 size_t start, end;
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
251 void *buf;
252 struct target_mem_desc *tgt;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk *chunks;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258 be performed. */
259 long chunk_cnt;
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
263 long use_cnt;
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
282 example).
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
287 static inline void
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
290 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291 return;
292 if (cbuf->chunk_cnt)
294 if (cbuf->chunk_cnt < 0)
295 return;
296 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
298 cbuf->chunk_cnt = -1;
299 return;
301 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
303 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304 cbuf->use_cnt++;
305 return;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf->use_cnt == 1)
311 cbuf->chunk_cnt--;
313 cbuf->chunks[cbuf->chunk_cnt].start = start;
314 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315 cbuf->chunk_cnt++;
316 cbuf->use_cnt = 1;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
322 static inline bool
323 gomp_to_device_kind_p (int kind)
325 switch (kind)
327 case GOMP_MAP_ALLOC:
328 case GOMP_MAP_FROM:
329 case GOMP_MAP_FORCE_ALLOC:
330 case GOMP_MAP_FORCE_FROM:
331 case GOMP_MAP_ALWAYS_FROM:
332 return false;
333 default:
334 return true;
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
341 TRUE. */
343 attribute_hidden void
344 gomp_copy_host2dev (struct gomp_device_descr *devicep,
345 struct goacc_asyncqueue *aq,
346 void *d, const void *h, size_t sz,
347 bool ephemeral, struct gomp_coalesce_buf *cbuf)
349 if (__builtin_expect (aq != NULL, 0))
351 /* See 'gomp_coalesce_buf_add'. */
352 assert (!cbuf);
354 void *h_buf = (void *) h;
355 if (ephemeral)
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf = gomp_malloc (sz);
362 memcpy (h_buf, h, sz);
364 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 "dev", d, "host", h_buf, h, sz, aq);
366 if (ephemeral)
367 /* Free temporary buffer once the transfer has completed. */
368 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
370 return;
373 if (cbuf)
375 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
378 long first = 0;
379 long last = cbuf->chunk_cnt - 1;
380 while (first <= last)
382 long middle = (first + last) >> 1;
383 if (cbuf->chunks[middle].end <= doff)
384 first = middle + 1;
385 else if (cbuf->chunks[middle].start <= doff)
387 if (doff + sz > cbuf->chunks[middle].end)
389 gomp_mutex_unlock (&devicep->lock);
390 gomp_fatal ("internal libgomp cbuf error");
392 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393 h, sz);
394 return;
396 else
397 last = middle - 1;
402 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
405 attribute_hidden void
406 gomp_copy_dev2host (struct gomp_device_descr *devicep,
407 struct goacc_asyncqueue *aq,
408 void *h, const void *d, size_t sz)
410 if (__builtin_expect (aq != NULL, 0))
411 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412 "host", h, "dev", d, NULL, sz, aq);
413 else
414 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
417 static void
418 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
420 if (!devicep->free_func (devicep->target_id, devptr))
422 gomp_mutex_unlock (&devicep->lock);
423 gomp_fatal ("error in freeing device memory block at %p", devptr);
427 /* Increment reference count of a splay_tree_key region K by 1.
428 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429 increment the value if refcount is not yet contained in the set (used for
430 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431 once for each construct). */
433 static inline void
434 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
436 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437 return;
439 uintptr_t *refcount_ptr = &k->refcount;
441 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442 refcount_ptr = &k->structelem_refcount;
443 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444 refcount_ptr = k->structelem_refcount_ptr;
446 if (refcount_set)
448 if (htab_find (*refcount_set, refcount_ptr))
449 return;
450 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451 *slot = refcount_ptr;
454 *refcount_ptr += 1;
455 return;
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460 track already seen refcounts, and only adjust the value if refcount is not
461 yet contained in the set (like gomp_increment_refcount).
463 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464 it is already zero and we know we decremented it earlier. This signals that
465 associated maps should be copied back to host.
467 *DO_REMOVE is set to true when we this is the first handling of this refcount
468 and we are setting it to zero. This signals a removal of this key from the
469 splay-tree map.
471 Copy and removal are separated due to cases like handling of structure
472 elements, e.g. each map of a structure element representing a possible copy
473 out of a structure field has to be handled individually, but we only signal
474 removal for one (the first encountered) sibing map. */
476 static inline void
477 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478 bool *do_copy, bool *do_remove)
480 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
482 *do_copy = *do_remove = false;
483 return;
486 uintptr_t *refcount_ptr = &k->refcount;
488 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489 refcount_ptr = &k->structelem_refcount;
490 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491 refcount_ptr = k->structelem_refcount_ptr;
493 bool new_encountered_refcount;
494 bool set_to_zero = false;
495 bool is_zero = false;
497 uintptr_t orig_refcount = *refcount_ptr;
499 if (refcount_set)
501 if (htab_find (*refcount_set, refcount_ptr))
503 new_encountered_refcount = false;
504 goto end;
507 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508 *slot = refcount_ptr;
509 new_encountered_refcount = true;
511 else
512 /* If no refcount_set being used, assume all keys are being decremented
513 for the first time. */
514 new_encountered_refcount = true;
516 if (delete_p)
517 *refcount_ptr = 0;
518 else if (*refcount_ptr > 0)
519 *refcount_ptr -= 1;
521 end:
522 if (*refcount_ptr == 0)
524 if (orig_refcount > 0)
525 set_to_zero = true;
527 is_zero = true;
530 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531 *do_remove = (new_encountered_refcount && set_to_zero);
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535 gomp_map_0len_lookup found oldn for newn.
536 Helper function of gomp_map_vars. */
538 static inline void
539 gomp_map_vars_existing (struct gomp_device_descr *devicep,
540 struct goacc_asyncqueue *aq, splay_tree_key oldn,
541 splay_tree_key newn, struct target_var_desc *tgt_var,
542 unsigned char kind, bool always_to_flag, bool implicit,
543 struct gomp_coalesce_buf *cbuf,
544 htab_t *refcount_set)
546 assert (kind != GOMP_MAP_ATTACH
547 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
549 tgt_var->key = oldn;
550 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
551 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
552 tgt_var->is_attach = false;
553 tgt_var->offset = newn->host_start - oldn->host_start;
555 /* For implicit maps, old contained in new is valid. */
556 bool implicit_subset = (implicit
557 && newn->host_start <= oldn->host_start
558 && oldn->host_end <= newn->host_end);
559 if (implicit_subset)
560 tgt_var->length = oldn->host_end - oldn->host_start;
561 else
562 tgt_var->length = newn->host_end - newn->host_start;
564 if ((kind & GOMP_MAP_FLAG_FORCE)
565 /* For implicit maps, old contained in new is valid. */
566 || !(implicit_subset
567 /* Otherwise, new contained inside old is considered valid. */
568 || (oldn->host_start <= newn->host_start
569 && newn->host_end <= oldn->host_end)))
571 gomp_mutex_unlock (&devicep->lock);
572 gomp_fatal ("Trying to map into device [%p..%p) object when "
573 "[%p..%p) is already mapped",
574 (void *) newn->host_start, (void *) newn->host_end,
575 (void *) oldn->host_start, (void *) oldn->host_end);
578 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
580 /* Implicit + always should not happen. If this does occur, below
581 address/length adjustment is a TODO. */
582 assert (!implicit_subset);
584 if (oldn->aux && oldn->aux->attach_count)
586 /* We have to be careful not to overwrite still attached pointers
587 during the copyback to host. */
588 uintptr_t addr = newn->host_start;
589 while (addr < newn->host_end)
591 size_t i = (addr - oldn->host_start) / sizeof (void *);
592 if (oldn->aux->attach_count[i] == 0)
593 gomp_copy_host2dev (devicep, aq,
594 (void *) (oldn->tgt->tgt_start
595 + oldn->tgt_offset
596 + addr - oldn->host_start),
597 (void *) addr,
598 sizeof (void *), false, cbuf);
599 addr += sizeof (void *);
602 else
603 gomp_copy_host2dev (devicep, aq,
604 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
605 + newn->host_start - oldn->host_start),
606 (void *) newn->host_start,
607 newn->host_end - newn->host_start, false, cbuf);
610 gomp_increment_refcount (oldn, refcount_set);
613 static int
614 get_kind (bool short_mapkind, void *kinds, int idx)
616 if (!short_mapkind)
617 return ((unsigned char *) kinds)[idx];
619 int val = ((unsigned short *) kinds)[idx];
620 if (GOMP_MAP_IMPLICIT_P (val))
621 val &= ~GOMP_MAP_IMPLICIT;
622 return val;
626 static bool
627 get_implicit (bool short_mapkind, void *kinds, int idx)
629 if (!short_mapkind)
630 return false;
632 int val = ((unsigned short *) kinds)[idx];
633 return GOMP_MAP_IMPLICIT_P (val);
636 static void
637 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
638 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
639 struct gomp_coalesce_buf *cbuf,
640 bool allow_zero_length_array_sections)
642 struct gomp_device_descr *devicep = tgt->device_descr;
643 struct splay_tree_s *mem_map = &devicep->mem_map;
644 struct splay_tree_key_s cur_node;
646 cur_node.host_start = host_ptr;
647 if (cur_node.host_start == (uintptr_t) NULL)
649 cur_node.tgt_offset = (uintptr_t) NULL;
650 gomp_copy_host2dev (devicep, aq,
651 (void *) (tgt->tgt_start + target_offset),
652 (void *) &cur_node.tgt_offset, sizeof (void *),
653 true, cbuf);
654 return;
656 /* Add bias to the pointer value. */
657 cur_node.host_start += bias;
658 cur_node.host_end = cur_node.host_start;
659 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
660 if (n == NULL)
662 if (allow_zero_length_array_sections)
663 cur_node.tgt_offset = 0;
664 else
666 gomp_mutex_unlock (&devicep->lock);
667 gomp_fatal ("Pointer target of array section wasn't mapped");
670 else
672 cur_node.host_start -= n->host_start;
673 cur_node.tgt_offset
674 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
675 /* At this point tgt_offset is target address of the
676 array section. Now subtract bias to get what we want
677 to initialize the pointer with. */
678 cur_node.tgt_offset -= bias;
680 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
681 (void *) &cur_node.tgt_offset, sizeof (void *),
682 true, cbuf);
685 static void
686 gomp_map_fields_existing (struct target_mem_desc *tgt,
687 struct goacc_asyncqueue *aq, splay_tree_key n,
688 size_t first, size_t i, void **hostaddrs,
689 size_t *sizes, void *kinds,
690 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
692 struct gomp_device_descr *devicep = tgt->device_descr;
693 struct splay_tree_s *mem_map = &devicep->mem_map;
694 struct splay_tree_key_s cur_node;
695 int kind;
696 bool implicit;
697 const bool short_mapkind = true;
698 const int typemask = short_mapkind ? 0xff : 0x7;
700 cur_node.host_start = (uintptr_t) hostaddrs[i];
701 cur_node.host_end = cur_node.host_start + sizes[i];
702 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
703 kind = get_kind (short_mapkind, kinds, i);
704 implicit = get_implicit (short_mapkind, kinds, i);
705 if (n2
706 && n2->tgt == n->tgt
707 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
709 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
710 kind & typemask, false, implicit, cbuf,
711 refcount_set);
712 return;
714 if (sizes[i] == 0)
716 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
718 cur_node.host_start--;
719 n2 = splay_tree_lookup (mem_map, &cur_node);
720 cur_node.host_start++;
721 if (n2
722 && n2->tgt == n->tgt
723 && n2->host_start - n->host_start
724 == n2->tgt_offset - n->tgt_offset)
726 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
727 kind & typemask, false, implicit, cbuf,
728 refcount_set);
729 return;
732 cur_node.host_end++;
733 n2 = splay_tree_lookup (mem_map, &cur_node);
734 cur_node.host_end--;
735 if (n2
736 && n2->tgt == n->tgt
737 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
739 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740 kind & typemask, false, implicit, cbuf,
741 refcount_set);
742 return;
745 gomp_mutex_unlock (&devicep->lock);
746 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747 "other mapped elements from the same structure weren't mapped "
748 "together with it", (void *) cur_node.host_start,
749 (void *) cur_node.host_end);
752 attribute_hidden void
753 gomp_attach_pointer (struct gomp_device_descr *devicep,
754 struct goacc_asyncqueue *aq, splay_tree mem_map,
755 splay_tree_key n, uintptr_t attach_to, size_t bias,
756 struct gomp_coalesce_buf *cbufp,
757 bool allow_zero_length_array_sections)
759 struct splay_tree_key_s s;
760 size_t size, idx;
762 if (n == NULL)
764 gomp_mutex_unlock (&devicep->lock);
765 gomp_fatal ("enclosing struct not mapped for attach");
768 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
769 /* We might have a pointer in a packed struct: however we cannot have more
770 than one such pointer in each pointer-sized portion of the struct, so
771 this is safe. */
772 idx = (attach_to - n->host_start) / sizeof (void *);
774 if (!n->aux)
775 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
777 if (!n->aux->attach_count)
778 n->aux->attach_count
779 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
781 if (n->aux->attach_count[idx] < UINTPTR_MAX)
782 n->aux->attach_count[idx]++;
783 else
785 gomp_mutex_unlock (&devicep->lock);
786 gomp_fatal ("attach count overflow");
789 if (n->aux->attach_count[idx] == 1)
791 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
792 - n->host_start;
793 uintptr_t target = (uintptr_t) *(void **) attach_to;
794 splay_tree_key tn;
795 uintptr_t data;
797 if ((void *) target == NULL)
799 gomp_mutex_unlock (&devicep->lock);
800 gomp_fatal ("attempt to attach null pointer");
803 s.host_start = target + bias;
804 s.host_end = s.host_start + 1;
805 tn = splay_tree_lookup (mem_map, &s);
807 if (!tn)
809 if (allow_zero_length_array_sections)
810 /* When allowing attachment to zero-length array sections, we
811 allow attaching to NULL pointers when the target region is not
812 mapped. */
813 data = 0;
814 else
816 gomp_mutex_unlock (&devicep->lock);
817 gomp_fatal ("pointer target not mapped for attach");
820 else
821 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
823 gomp_debug (1,
824 "%s: attaching host %p, target %p (struct base %p) to %p\n",
825 __FUNCTION__, (void *) attach_to, (void *) devptr,
826 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
828 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
829 sizeof (void *), true, cbufp);
831 else
832 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
833 (void *) attach_to, (int) n->aux->attach_count[idx]);
836 attribute_hidden void
837 gomp_detach_pointer (struct gomp_device_descr *devicep,
838 struct goacc_asyncqueue *aq, splay_tree_key n,
839 uintptr_t detach_from, bool finalize,
840 struct gomp_coalesce_buf *cbufp)
842 size_t idx;
844 if (n == NULL)
846 gomp_mutex_unlock (&devicep->lock);
847 gomp_fatal ("enclosing struct not mapped for detach");
850 idx = (detach_from - n->host_start) / sizeof (void *);
852 if (!n->aux || !n->aux->attach_count)
854 gomp_mutex_unlock (&devicep->lock);
855 gomp_fatal ("no attachment counters for struct");
858 if (finalize)
859 n->aux->attach_count[idx] = 1;
861 if (n->aux->attach_count[idx] == 0)
863 gomp_mutex_unlock (&devicep->lock);
864 gomp_fatal ("attach count underflow");
866 else
867 n->aux->attach_count[idx]--;
869 if (n->aux->attach_count[idx] == 0)
871 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
872 - n->host_start;
873 uintptr_t target = (uintptr_t) *(void **) detach_from;
875 gomp_debug (1,
876 "%s: detaching host %p, target %p (struct base %p) to %p\n",
877 __FUNCTION__, (void *) detach_from, (void *) devptr,
878 (void *) (n->tgt->tgt_start + n->tgt_offset),
879 (void *) target);
881 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
882 sizeof (void *), true, cbufp);
884 else
885 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
886 (void *) detach_from, (int) n->aux->attach_count[idx]);
889 attribute_hidden uintptr_t
890 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
892 if (tgt->list[i].key != NULL)
893 return tgt->list[i].key->tgt->tgt_start
894 + tgt->list[i].key->tgt_offset
895 + tgt->list[i].offset;
897 switch (tgt->list[i].offset)
899 case OFFSET_INLINED:
900 return (uintptr_t) hostaddrs[i];
902 case OFFSET_POINTER:
903 return 0;
905 case OFFSET_STRUCT:
906 return tgt->list[i + 1].key->tgt->tgt_start
907 + tgt->list[i + 1].key->tgt_offset
908 + tgt->list[i + 1].offset
909 + (uintptr_t) hostaddrs[i]
910 - (uintptr_t) hostaddrs[i + 1];
912 default:
913 return tgt->tgt_start + tgt->list[i].offset;
917 static inline __attribute__((always_inline)) struct target_mem_desc *
918 gomp_map_vars_internal (struct gomp_device_descr *devicep,
919 struct goacc_asyncqueue *aq, size_t mapnum,
920 void **hostaddrs, void **devaddrs, size_t *sizes,
921 void *kinds, bool short_mapkind,
922 htab_t *refcount_set,
923 enum gomp_map_vars_kind pragma_kind)
925 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
926 bool has_firstprivate = false;
927 bool has_always_ptrset = false;
928 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
929 const int rshift = short_mapkind ? 8 : 3;
930 const int typemask = short_mapkind ? 0xff : 0x7;
931 struct splay_tree_s *mem_map = &devicep->mem_map;
932 struct splay_tree_key_s cur_node;
933 struct target_mem_desc *tgt
934 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
935 tgt->list_count = mapnum;
936 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
937 tgt->device_descr = devicep;
938 tgt->prev = NULL;
939 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
941 if (mapnum == 0)
943 tgt->tgt_start = 0;
944 tgt->tgt_end = 0;
945 return tgt;
948 tgt_align = sizeof (void *);
949 tgt_size = 0;
950 cbuf.chunks = NULL;
951 cbuf.chunk_cnt = -1;
952 cbuf.use_cnt = 0;
953 cbuf.buf = NULL;
954 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
956 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
957 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
958 cbuf.chunk_cnt = 0;
960 if (pragma_kind == GOMP_MAP_VARS_TARGET)
962 size_t align = 4 * sizeof (void *);
963 tgt_align = align;
964 tgt_size = mapnum * sizeof (void *);
965 cbuf.chunk_cnt = 1;
966 cbuf.use_cnt = 1 + (mapnum > 1);
967 cbuf.chunks[0].start = 0;
968 cbuf.chunks[0].end = tgt_size;
971 gomp_mutex_lock (&devicep->lock);
972 if (devicep->state == GOMP_DEVICE_FINALIZED)
974 gomp_mutex_unlock (&devicep->lock);
975 free (tgt);
976 return NULL;
979 for (i = 0; i < mapnum; i++)
981 int kind = get_kind (short_mapkind, kinds, i);
982 bool implicit = get_implicit (short_mapkind, kinds, i);
983 if (hostaddrs[i] == NULL
984 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
986 tgt->list[i].key = NULL;
987 tgt->list[i].offset = OFFSET_INLINED;
988 continue;
990 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
991 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
993 tgt->list[i].key = NULL;
994 if (!not_found_cnt)
996 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997 on a separate construct prior to using use_device_{addr,ptr}.
998 In OpenMP 5.0, map directives need to be ordered by the
999 middle-end before the use_device_* clauses. If
1000 !not_found_cnt, all mappings requested (if any) are already
1001 mapped, so use_device_{addr,ptr} can be resolved right away.
1002 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003 now but would succeed after performing the mappings in the
1004 following loop. We can't defer this always to the second
1005 loop, because it is not even invoked when !not_found_cnt
1006 after the first loop. */
1007 cur_node.host_start = (uintptr_t) hostaddrs[i];
1008 cur_node.host_end = cur_node.host_start;
1009 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1010 if (n != NULL)
1012 cur_node.host_start -= n->host_start;
1013 hostaddrs[i]
1014 = (void *) (n->tgt->tgt_start + n->tgt_offset
1015 + cur_node.host_start);
1017 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1019 gomp_mutex_unlock (&devicep->lock);
1020 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1022 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023 /* If not present, continue using the host address. */
1025 else
1026 __builtin_unreachable ();
1027 tgt->list[i].offset = OFFSET_INLINED;
1029 else
1030 tgt->list[i].offset = 0;
1031 continue;
1033 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1035 size_t first = i + 1;
1036 size_t last = i + sizes[i];
1037 cur_node.host_start = (uintptr_t) hostaddrs[i];
1038 cur_node.host_end = (uintptr_t) hostaddrs[last]
1039 + sizes[last];
1040 tgt->list[i].key = NULL;
1041 tgt->list[i].offset = OFFSET_STRUCT;
1042 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1043 if (n == NULL)
1045 size_t align = (size_t) 1 << (kind >> rshift);
1046 if (tgt_align < align)
1047 tgt_align = align;
1048 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1049 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1050 tgt_size += cur_node.host_end - cur_node.host_start;
1051 not_found_cnt += last - i;
1052 for (i = first; i <= last; i++)
1054 tgt->list[i].key = NULL;
1055 if (!aq
1056 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1057 & typemask))
1058 gomp_coalesce_buf_add (&cbuf,
1059 tgt_size - cur_node.host_end
1060 + (uintptr_t) hostaddrs[i],
1061 sizes[i]);
1063 i--;
1064 continue;
1066 for (i = first; i <= last; i++)
1067 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1068 sizes, kinds, NULL, refcount_set);
1069 i--;
1070 continue;
1072 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1074 tgt->list[i].key = NULL;
1075 tgt->list[i].offset = OFFSET_POINTER;
1076 has_firstprivate = true;
1077 continue;
1079 else if ((kind & typemask) == GOMP_MAP_ATTACH
1080 || ((kind & typemask)
1081 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1083 tgt->list[i].key = NULL;
1084 has_firstprivate = true;
1085 continue;
1087 cur_node.host_start = (uintptr_t) hostaddrs[i];
1088 if (!GOMP_MAP_POINTER_P (kind & typemask))
1089 cur_node.host_end = cur_node.host_start + sizes[i];
1090 else
1091 cur_node.host_end = cur_node.host_start + sizeof (void *);
1092 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1094 tgt->list[i].key = NULL;
1096 size_t align = (size_t) 1 << (kind >> rshift);
1097 if (tgt_align < align)
1098 tgt_align = align;
1099 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1100 if (!aq)
1101 gomp_coalesce_buf_add (&cbuf, tgt_size,
1102 cur_node.host_end - cur_node.host_start);
1103 tgt_size += cur_node.host_end - cur_node.host_start;
1104 has_firstprivate = true;
1105 continue;
1107 splay_tree_key n;
1108 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1110 n = gomp_map_0len_lookup (mem_map, &cur_node);
1111 if (!n)
1113 tgt->list[i].key = NULL;
1114 tgt->list[i].offset = OFFSET_POINTER;
1115 continue;
1118 else
1119 n = splay_tree_lookup (mem_map, &cur_node);
1120 if (n && n->refcount != REFCOUNT_LINK)
1122 int always_to_cnt = 0;
1123 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1125 bool has_nullptr = false;
1126 size_t j;
1127 for (j = 0; j < n->tgt->list_count; j++)
1128 if (n->tgt->list[j].key == n)
1130 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1131 break;
1133 if (n->tgt->list_count == 0)
1135 /* 'declare target'; assume has_nullptr; it could also be
1136 statically assigned pointer, but that it should be to
1137 the equivalent variable on the host. */
1138 assert (n->refcount == REFCOUNT_INFINITY);
1139 has_nullptr = true;
1141 else
1142 assert (j < n->tgt->list_count);
1143 /* Re-map the data if there is an 'always' modifier or if it a
1144 null pointer was there and non a nonnull has been found; that
1145 permits transparent re-mapping for Fortran array descriptors
1146 which were previously mapped unallocated. */
1147 for (j = i + 1; j < mapnum; j++)
1149 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1150 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1151 && (!has_nullptr
1152 || !GOMP_MAP_POINTER_P (ptr_kind)
1153 || *(void **) hostaddrs[j] == NULL))
1154 break;
1155 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1156 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1157 > cur_node.host_end))
1158 break;
1159 else
1161 has_always_ptrset = true;
1162 ++always_to_cnt;
1166 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1167 kind & typemask, always_to_cnt > 0, implicit,
1168 NULL, refcount_set);
1169 i += always_to_cnt;
1171 else
1173 tgt->list[i].key = NULL;
1175 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1177 /* Not present, hence, skip entry - including its MAP_POINTER,
1178 when existing. */
1179 tgt->list[i].offset = OFFSET_POINTER;
1180 if (i + 1 < mapnum
1181 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1182 == GOMP_MAP_POINTER))
1184 ++i;
1185 tgt->list[i].key = NULL;
1186 tgt->list[i].offset = 0;
1188 continue;
1190 size_t align = (size_t) 1 << (kind >> rshift);
1191 not_found_cnt++;
1192 if (tgt_align < align)
1193 tgt_align = align;
1194 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1195 if (!aq
1196 && gomp_to_device_kind_p (kind & typemask))
1197 gomp_coalesce_buf_add (&cbuf, tgt_size,
1198 cur_node.host_end - cur_node.host_start);
1199 tgt_size += cur_node.host_end - cur_node.host_start;
1200 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1202 size_t j;
1203 int kind;
1204 for (j = i + 1; j < mapnum; j++)
1205 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1206 kinds, j)) & typemask))
1207 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1208 break;
1209 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1210 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1211 > cur_node.host_end))
1212 break;
1213 else
1215 tgt->list[j].key = NULL;
1216 i++;
1222 if (devaddrs)
1224 if (mapnum != 1)
1226 gomp_mutex_unlock (&devicep->lock);
1227 gomp_fatal ("unexpected aggregation");
1229 tgt->to_free = devaddrs[0];
1230 tgt->tgt_start = (uintptr_t) tgt->to_free;
1231 tgt->tgt_end = tgt->tgt_start + sizes[0];
1233 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1235 /* Allocate tgt_align aligned tgt_size block of memory. */
1236 /* FIXME: Perhaps change interface to allocate properly aligned
1237 memory. */
1238 tgt->to_free = devicep->alloc_func (devicep->target_id,
1239 tgt_size + tgt_align - 1);
1240 if (!tgt->to_free)
1242 gomp_mutex_unlock (&devicep->lock);
1243 gomp_fatal ("device memory allocation fail");
1246 tgt->tgt_start = (uintptr_t) tgt->to_free;
1247 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1248 tgt->tgt_end = tgt->tgt_start + tgt_size;
1250 if (cbuf.use_cnt == 1)
1251 cbuf.chunk_cnt--;
1252 if (cbuf.chunk_cnt > 0)
1254 cbuf.buf
1255 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1256 if (cbuf.buf)
1258 cbuf.tgt = tgt;
1259 cbufp = &cbuf;
1263 else
1265 tgt->to_free = NULL;
1266 tgt->tgt_start = 0;
1267 tgt->tgt_end = 0;
1270 tgt_size = 0;
1271 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1272 tgt_size = mapnum * sizeof (void *);
1274 tgt->array = NULL;
1275 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1277 if (not_found_cnt)
1278 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1279 splay_tree_node array = tgt->array;
1280 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1281 uintptr_t field_tgt_base = 0;
1282 splay_tree_key field_tgt_structelem_first = NULL;
1284 for (i = 0; i < mapnum; i++)
1285 if (has_always_ptrset
1286 && tgt->list[i].key
1287 && (get_kind (short_mapkind, kinds, i) & typemask)
1288 == GOMP_MAP_TO_PSET)
1290 splay_tree_key k = tgt->list[i].key;
1291 bool has_nullptr = false;
1292 size_t j;
1293 for (j = 0; j < k->tgt->list_count; j++)
1294 if (k->tgt->list[j].key == k)
1296 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1297 break;
1299 if (k->tgt->list_count == 0)
1300 has_nullptr = true;
1301 else
1302 assert (j < k->tgt->list_count);
1304 tgt->list[i].has_null_ptr_assoc = false;
1305 for (j = i + 1; j < mapnum; j++)
1307 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1308 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1309 && (!has_nullptr
1310 || !GOMP_MAP_POINTER_P (ptr_kind)
1311 || *(void **) hostaddrs[j] == NULL))
1312 break;
1313 else if ((uintptr_t) hostaddrs[j] < k->host_start
1314 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1315 > k->host_end))
1316 break;
1317 else
1319 if (*(void **) hostaddrs[j] == NULL)
1320 tgt->list[i].has_null_ptr_assoc = true;
1321 tgt->list[j].key = k;
1322 tgt->list[j].copy_from = false;
1323 tgt->list[j].always_copy_from = false;
1324 tgt->list[j].is_attach = false;
1325 gomp_increment_refcount (k, refcount_set);
1326 gomp_map_pointer (k->tgt, aq,
1327 (uintptr_t) *(void **) hostaddrs[j],
1328 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1329 - k->host_start),
1330 sizes[j], cbufp, false);
1333 i = j - 1;
1335 else if (tgt->list[i].key == NULL)
1337 int kind = get_kind (short_mapkind, kinds, i);
1338 bool implicit = get_implicit (short_mapkind, kinds, i);
1339 if (hostaddrs[i] == NULL)
1340 continue;
1341 switch (kind & typemask)
1343 size_t align, len, first, last;
1344 splay_tree_key n;
1345 case GOMP_MAP_FIRSTPRIVATE:
1346 align = (size_t) 1 << (kind >> rshift);
1347 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1348 tgt->list[i].offset = tgt_size;
1349 len = sizes[i];
1350 gomp_copy_host2dev (devicep, aq,
1351 (void *) (tgt->tgt_start + tgt_size),
1352 (void *) hostaddrs[i], len, false, cbufp);
1353 tgt_size += len;
1354 continue;
1355 case GOMP_MAP_FIRSTPRIVATE_INT:
1356 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1357 continue;
1358 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1359 /* The OpenACC 'host_data' construct only allows 'use_device'
1360 "mapping" clauses, so in the first loop, 'not_found_cnt'
1361 must always have been zero, so all OpenACC 'use_device'
1362 clauses have already been handled. (We can only easily test
1363 'use_device' with 'if_present' clause here.) */
1364 assert (tgt->list[i].offset == OFFSET_INLINED);
1365 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1366 code conceptually simple, similar to the first loop. */
1367 case GOMP_MAP_USE_DEVICE_PTR:
1368 if (tgt->list[i].offset == 0)
1370 cur_node.host_start = (uintptr_t) hostaddrs[i];
1371 cur_node.host_end = cur_node.host_start;
1372 n = gomp_map_lookup (mem_map, &cur_node);
1373 if (n != NULL)
1375 cur_node.host_start -= n->host_start;
1376 hostaddrs[i]
1377 = (void *) (n->tgt->tgt_start + n->tgt_offset
1378 + cur_node.host_start);
1380 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1382 gomp_mutex_unlock (&devicep->lock);
1383 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1385 else if ((kind & typemask)
1386 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1387 /* If not present, continue using the host address. */
1389 else
1390 __builtin_unreachable ();
1391 tgt->list[i].offset = OFFSET_INLINED;
1393 continue;
1394 case GOMP_MAP_STRUCT:
1395 first = i + 1;
1396 last = i + sizes[i];
1397 cur_node.host_start = (uintptr_t) hostaddrs[i];
1398 cur_node.host_end = (uintptr_t) hostaddrs[last]
1399 + sizes[last];
1400 if (tgt->list[first].key != NULL)
1401 continue;
1402 n = splay_tree_lookup (mem_map, &cur_node);
1403 if (n == NULL)
1405 size_t align = (size_t) 1 << (kind >> rshift);
1406 tgt_size -= (uintptr_t) hostaddrs[first]
1407 - (uintptr_t) hostaddrs[i];
1408 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1409 tgt_size += (uintptr_t) hostaddrs[first]
1410 - (uintptr_t) hostaddrs[i];
1411 field_tgt_base = (uintptr_t) hostaddrs[first];
1412 field_tgt_offset = tgt_size;
1413 field_tgt_clear = last;
1414 field_tgt_structelem_first = NULL;
1415 tgt_size += cur_node.host_end
1416 - (uintptr_t) hostaddrs[first];
1417 continue;
1419 for (i = first; i <= last; i++)
1420 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1421 sizes, kinds, cbufp, refcount_set);
1422 i--;
1423 continue;
1424 case GOMP_MAP_ALWAYS_POINTER:
1425 cur_node.host_start = (uintptr_t) hostaddrs[i];
1426 cur_node.host_end = cur_node.host_start + sizeof (void *);
1427 n = splay_tree_lookup (mem_map, &cur_node);
1428 if (n == NULL
1429 || n->host_start > cur_node.host_start
1430 || n->host_end < cur_node.host_end)
1432 gomp_mutex_unlock (&devicep->lock);
1433 gomp_fatal ("always pointer not mapped");
1435 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1436 != GOMP_MAP_ALWAYS_POINTER)
1437 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1438 if (cur_node.tgt_offset)
1439 cur_node.tgt_offset -= sizes[i];
1440 gomp_copy_host2dev (devicep, aq,
1441 (void *) (n->tgt->tgt_start
1442 + n->tgt_offset
1443 + cur_node.host_start
1444 - n->host_start),
1445 (void *) &cur_node.tgt_offset,
1446 sizeof (void *), true, cbufp);
1447 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1448 + cur_node.host_start - n->host_start;
1449 continue;
1450 case GOMP_MAP_IF_PRESENT:
1451 /* Not present - otherwise handled above. Skip over its
1452 MAP_POINTER as well. */
1453 if (i + 1 < mapnum
1454 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1455 == GOMP_MAP_POINTER))
1456 ++i;
1457 continue;
1458 case GOMP_MAP_ATTACH:
1459 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1461 cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 cur_node.host_end = cur_node.host_start + sizeof (void *);
1463 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1464 if (n != NULL)
1466 tgt->list[i].key = n;
1467 tgt->list[i].offset = cur_node.host_start - n->host_start;
1468 tgt->list[i].length = n->host_end - n->host_start;
1469 tgt->list[i].copy_from = false;
1470 tgt->list[i].always_copy_from = false;
1471 tgt->list[i].is_attach = true;
1472 /* OpenACC 'attach'/'detach' doesn't affect
1473 structured/dynamic reference counts ('n->refcount',
1474 'n->dynamic_refcount'). */
1476 bool zlas
1477 = ((kind & typemask)
1478 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1479 gomp_attach_pointer (devicep, aq, mem_map, n,
1480 (uintptr_t) hostaddrs[i], sizes[i],
1481 cbufp, zlas);
1483 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1485 gomp_mutex_unlock (&devicep->lock);
1486 gomp_fatal ("outer struct not mapped for attach");
1488 continue;
1490 default:
1491 break;
1493 splay_tree_key k = &array->key;
1494 k->host_start = (uintptr_t) hostaddrs[i];
1495 if (!GOMP_MAP_POINTER_P (kind & typemask))
1496 k->host_end = k->host_start + sizes[i];
1497 else
1498 k->host_end = k->host_start + sizeof (void *);
1499 splay_tree_key n = splay_tree_lookup (mem_map, k);
1500 if (n && n->refcount != REFCOUNT_LINK)
1501 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1502 kind & typemask, false, implicit, cbufp,
1503 refcount_set);
1504 else
1506 k->aux = NULL;
1507 if (n && n->refcount == REFCOUNT_LINK)
1509 /* Replace target address of the pointer with target address
1510 of mapped object in the splay tree. */
1511 splay_tree_remove (mem_map, n);
1512 k->aux
1513 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1514 k->aux->link_key = n;
1516 size_t align = (size_t) 1 << (kind >> rshift);
1517 tgt->list[i].key = k;
1518 k->tgt = tgt;
1519 k->refcount = 0;
1520 k->dynamic_refcount = 0;
1521 if (field_tgt_clear != FIELD_TGT_EMPTY)
1523 k->tgt_offset = k->host_start - field_tgt_base
1524 + field_tgt_offset;
1525 if (openmp_p)
1527 k->refcount = REFCOUNT_STRUCTELEM;
1528 if (field_tgt_structelem_first == NULL)
1530 /* Set to first structure element of sequence. */
1531 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1532 field_tgt_structelem_first = k;
1534 else
1535 /* Point to refcount of leading element, but do not
1536 increment again. */
1537 k->structelem_refcount_ptr
1538 = &field_tgt_structelem_first->structelem_refcount;
1540 if (i == field_tgt_clear)
1542 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1543 field_tgt_structelem_first = NULL;
1546 if (i == field_tgt_clear)
1547 field_tgt_clear = FIELD_TGT_EMPTY;
1549 else
1551 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1552 k->tgt_offset = tgt_size;
1553 tgt_size += k->host_end - k->host_start;
1555 /* First increment, from 0 to 1. gomp_increment_refcount
1556 encapsulates the different increment cases, so use this
1557 instead of directly setting 1 during initialization. */
1558 gomp_increment_refcount (k, refcount_set);
1560 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1561 tgt->list[i].always_copy_from
1562 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1563 tgt->list[i].is_attach = false;
1564 tgt->list[i].offset = 0;
1565 tgt->list[i].length = k->host_end - k->host_start;
1566 tgt->refcount++;
1567 array->left = NULL;
1568 array->right = NULL;
1569 splay_tree_insert (mem_map, array);
1570 switch (kind & typemask)
1572 case GOMP_MAP_ALLOC:
1573 case GOMP_MAP_FROM:
1574 case GOMP_MAP_FORCE_ALLOC:
1575 case GOMP_MAP_FORCE_FROM:
1576 case GOMP_MAP_ALWAYS_FROM:
1577 break;
1578 case GOMP_MAP_TO:
1579 case GOMP_MAP_TOFROM:
1580 case GOMP_MAP_FORCE_TO:
1581 case GOMP_MAP_FORCE_TOFROM:
1582 case GOMP_MAP_ALWAYS_TO:
1583 case GOMP_MAP_ALWAYS_TOFROM:
1584 gomp_copy_host2dev (devicep, aq,
1585 (void *) (tgt->tgt_start
1586 + k->tgt_offset),
1587 (void *) k->host_start,
1588 k->host_end - k->host_start,
1589 false, cbufp);
1590 break;
1591 case GOMP_MAP_POINTER:
1592 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1593 gomp_map_pointer
1594 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1595 k->tgt_offset, sizes[i], cbufp,
1596 ((kind & typemask)
1597 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1598 break;
1599 case GOMP_MAP_TO_PSET:
1600 gomp_copy_host2dev (devicep, aq,
1601 (void *) (tgt->tgt_start
1602 + k->tgt_offset),
1603 (void *) k->host_start,
1604 k->host_end - k->host_start,
1605 false, cbufp);
1606 tgt->list[i].has_null_ptr_assoc = false;
1608 for (j = i + 1; j < mapnum; j++)
1610 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1611 & typemask);
1612 if (!GOMP_MAP_POINTER_P (ptr_kind)
1613 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1614 break;
1615 else if ((uintptr_t) hostaddrs[j] < k->host_start
1616 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1617 > k->host_end))
1618 break;
1619 else
1621 tgt->list[j].key = k;
1622 tgt->list[j].copy_from = false;
1623 tgt->list[j].always_copy_from = false;
1624 tgt->list[j].is_attach = false;
1625 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1626 /* For OpenMP, the use of refcount_sets causes
1627 errors if we set k->refcount = 1 above but also
1628 increment it again here, for decrementing will
1629 not properly match, since we decrement only once
1630 for each key's refcount. Therefore avoid this
1631 increment for OpenMP constructs. */
1632 if (!openmp_p)
1633 gomp_increment_refcount (k, refcount_set);
1634 gomp_map_pointer (tgt, aq,
1635 (uintptr_t) *(void **) hostaddrs[j],
1636 k->tgt_offset
1637 + ((uintptr_t) hostaddrs[j]
1638 - k->host_start),
1639 sizes[j], cbufp, false);
1642 i = j - 1;
1643 break;
1644 case GOMP_MAP_FORCE_PRESENT:
1646 /* We already looked up the memory region above and it
1647 was missing. */
1648 size_t size = k->host_end - k->host_start;
1649 gomp_mutex_unlock (&devicep->lock);
1650 #ifdef HAVE_INTTYPES_H
1651 gomp_fatal ("present clause: !acc_is_present (%p, "
1652 "%"PRIu64" (0x%"PRIx64"))",
1653 (void *) k->host_start,
1654 (uint64_t) size, (uint64_t) size);
1655 #else
1656 gomp_fatal ("present clause: !acc_is_present (%p, "
1657 "%lu (0x%lx))", (void *) k->host_start,
1658 (unsigned long) size, (unsigned long) size);
1659 #endif
1661 break;
1662 case GOMP_MAP_FORCE_DEVICEPTR:
1663 assert (k->host_end - k->host_start == sizeof (void *));
1664 gomp_copy_host2dev (devicep, aq,
1665 (void *) (tgt->tgt_start
1666 + k->tgt_offset),
1667 (void *) k->host_start,
1668 sizeof (void *), false, cbufp);
1669 break;
1670 default:
1671 gomp_mutex_unlock (&devicep->lock);
1672 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1673 kind);
1676 if (k->aux && k->aux->link_key)
1678 /* Set link pointer on target to the device address of the
1679 mapped object. */
1680 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1681 /* We intentionally do not use coalescing here, as it's not
1682 data allocated by the current call to this function. */
1683 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1684 &tgt_addr, sizeof (void *), true, NULL);
1686 array++;
1691 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1693 for (i = 0; i < mapnum; i++)
1695 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1696 gomp_copy_host2dev (devicep, aq,
1697 (void *) (tgt->tgt_start + i * sizeof (void *)),
1698 (void *) &cur_node.tgt_offset, sizeof (void *),
1699 true, cbufp);
1703 if (cbufp)
1705 /* See 'gomp_coalesce_buf_add'. */
1706 assert (!aq);
1708 long c = 0;
1709 for (c = 0; c < cbuf.chunk_cnt; ++c)
1710 gomp_copy_host2dev (devicep, aq,
1711 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1712 (char *) cbuf.buf + (cbuf.chunks[c].start
1713 - cbuf.chunks[0].start),
1714 cbuf.chunks[c].end - cbuf.chunks[c].start,
1715 true, NULL);
1716 free (cbuf.buf);
1717 cbuf.buf = NULL;
1718 cbufp = NULL;
1721 /* If the variable from "omp target enter data" map-list was already mapped,
1722 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1723 gomp_exit_data. */
1724 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1726 free (tgt);
1727 tgt = NULL;
1730 gomp_mutex_unlock (&devicep->lock);
1731 return tgt;
1734 static struct target_mem_desc *
1735 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1736 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1737 bool short_mapkind, htab_t *refcount_set,
1738 enum gomp_map_vars_kind pragma_kind)
1740 /* This management of a local refcount_set is for convenience of callers
1741 who do not share a refcount_set over multiple map/unmap uses. */
1742 htab_t local_refcount_set = NULL;
1743 if (refcount_set == NULL)
1745 local_refcount_set = htab_create (mapnum);
1746 refcount_set = &local_refcount_set;
1749 struct target_mem_desc *tgt;
1750 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1751 sizes, kinds, short_mapkind, refcount_set,
1752 pragma_kind);
1753 if (local_refcount_set)
1754 htab_free (local_refcount_set);
1756 return tgt;
1759 attribute_hidden struct target_mem_desc *
1760 goacc_map_vars (struct gomp_device_descr *devicep,
1761 struct goacc_asyncqueue *aq, size_t mapnum,
1762 void **hostaddrs, void **devaddrs, size_t *sizes,
1763 void *kinds, bool short_mapkind,
1764 enum gomp_map_vars_kind pragma_kind)
1766 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1767 sizes, kinds, short_mapkind, NULL,
1768 GOMP_MAP_VARS_OPENACC | pragma_kind);
1771 static void
1772 gomp_unmap_tgt (struct target_mem_desc *tgt)
1774 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1775 if (tgt->tgt_end)
1776 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1778 free (tgt->array);
1779 free (tgt);
1782 static bool
1783 gomp_unref_tgt (void *ptr)
1785 bool is_tgt_unmapped = false;
1787 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1789 if (tgt->refcount > 1)
1790 tgt->refcount--;
1791 else
1793 gomp_unmap_tgt (tgt);
1794 is_tgt_unmapped = true;
1797 return is_tgt_unmapped;
1800 static void
1801 gomp_unref_tgt_void (void *ptr)
1803 (void) gomp_unref_tgt (ptr);
1806 static void
1807 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1809 splay_tree_remove (sp, k);
1810 if (k->aux)
1812 if (k->aux->link_key)
1813 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1814 if (k->aux->attach_count)
1815 free (k->aux->attach_count);
1816 free (k->aux);
1817 k->aux = NULL;
1821 static inline __attribute__((always_inline)) bool
1822 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1823 struct goacc_asyncqueue *aq)
1825 bool is_tgt_unmapped = false;
1827 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1829 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1830 /* Infer the splay_tree_key of the first structelem key using the
1831 pointer to the first structleme_refcount. */
1832 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1833 - offsetof (struct splay_tree_key_s,
1834 structelem_refcount));
1835 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1837 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838 with the splay_tree_keys embedded inside. */
1839 splay_tree_node node =
1840 (splay_tree_node) ((char *) k
1841 - offsetof (struct splay_tree_node_s, key));
1842 while (true)
1844 /* Starting from the _FIRST key, and continue for all following
1845 sibling keys. */
1846 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1847 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1848 break;
1849 else
1850 k = &(++node)->key;
1853 else
1854 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1856 if (aq)
1857 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1858 (void *) k->tgt);
1859 else
1860 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1861 return is_tgt_unmapped;
1864 attribute_hidden bool
1865 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1867 return gomp_remove_var_internal (devicep, k, NULL);
1870 /* Remove a variable asynchronously. This actually removes the variable
1871 mapping immediately, but retains the linked target_mem_desc until the
1872 asynchronous operation has completed (as it may still refer to target
1873 memory). The device lock must be held before entry, and remains locked on
1874 exit. */
1876 attribute_hidden void
1877 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1878 struct goacc_asyncqueue *aq)
1880 (void) gomp_remove_var_internal (devicep, k, aq);
1883 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1884 variables back from device to host: if it is false, it is assumed that this
1885 has been done already. */
1887 static inline __attribute__((always_inline)) void
1888 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1889 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1891 struct gomp_device_descr *devicep = tgt->device_descr;
1893 if (tgt->list_count == 0)
1895 free (tgt);
1896 return;
1899 gomp_mutex_lock (&devicep->lock);
1900 if (devicep->state == GOMP_DEVICE_FINALIZED)
1902 gomp_mutex_unlock (&devicep->lock);
1903 free (tgt->array);
1904 free (tgt);
1905 return;
1908 size_t i;
1910 /* We must perform detachments before any copies back to the host. */
1911 for (i = 0; i < tgt->list_count; i++)
1913 splay_tree_key k = tgt->list[i].key;
1915 if (k != NULL && tgt->list[i].is_attach)
1916 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1917 + tgt->list[i].offset,
1918 false, NULL);
1921 for (i = 0; i < tgt->list_count; i++)
1923 splay_tree_key k = tgt->list[i].key;
1924 if (k == NULL)
1925 continue;
1927 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928 counts ('n->refcount', 'n->dynamic_refcount'). */
1929 if (tgt->list[i].is_attach)
1930 continue;
1932 bool do_copy, do_remove;
1933 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1935 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1936 || tgt->list[i].always_copy_from)
1937 gomp_copy_dev2host (devicep, aq,
1938 (void *) (k->host_start + tgt->list[i].offset),
1939 (void *) (k->tgt->tgt_start + k->tgt_offset
1940 + tgt->list[i].offset),
1941 tgt->list[i].length);
1942 if (do_remove)
1944 struct target_mem_desc *k_tgt = k->tgt;
1945 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1946 /* It would be bad if TGT got unmapped while we're still iterating
1947 over its LIST_COUNT, and also expect to use it in the following
1948 code. */
1949 assert (!is_tgt_unmapped
1950 || k_tgt != tgt);
1954 if (aq)
1955 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1956 (void *) tgt);
1957 else
1958 gomp_unref_tgt ((void *) tgt);
1960 gomp_mutex_unlock (&devicep->lock);
1963 static void
1964 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1965 htab_t *refcount_set)
1967 /* This management of a local refcount_set is for convenience of callers
1968 who do not share a refcount_set over multiple map/unmap uses. */
1969 htab_t local_refcount_set = NULL;
1970 if (refcount_set == NULL)
1972 local_refcount_set = htab_create (tgt->list_count);
1973 refcount_set = &local_refcount_set;
1976 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1978 if (local_refcount_set)
1979 htab_free (local_refcount_set);
1982 attribute_hidden void
1983 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1984 struct goacc_asyncqueue *aq)
1986 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1989 static void
1990 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1991 size_t *sizes, void *kinds, bool short_mapkind)
1993 size_t i;
1994 struct splay_tree_key_s cur_node;
1995 const int typemask = short_mapkind ? 0xff : 0x7;
1997 if (!devicep)
1998 return;
2000 if (mapnum == 0)
2001 return;
2003 gomp_mutex_lock (&devicep->lock);
2004 if (devicep->state == GOMP_DEVICE_FINALIZED)
2006 gomp_mutex_unlock (&devicep->lock);
2007 return;
2010 for (i = 0; i < mapnum; i++)
2011 if (sizes[i])
2013 cur_node.host_start = (uintptr_t) hostaddrs[i];
2014 cur_node.host_end = cur_node.host_start + sizes[i];
2015 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2016 if (n)
2018 int kind = get_kind (short_mapkind, kinds, i);
2019 if (n->host_start > cur_node.host_start
2020 || n->host_end < cur_node.host_end)
2022 gomp_mutex_unlock (&devicep->lock);
2023 gomp_fatal ("Trying to update [%p..%p) object when "
2024 "only [%p..%p) is mapped",
2025 (void *) cur_node.host_start,
2026 (void *) cur_node.host_end,
2027 (void *) n->host_start,
2028 (void *) n->host_end);
2031 if (n->aux && n->aux->attach_count)
2033 uintptr_t addr = cur_node.host_start;
2034 while (addr < cur_node.host_end)
2036 /* We have to be careful not to overwrite still attached
2037 pointers during host<->device updates. */
2038 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2039 if (n->aux->attach_count[i] == 0)
2041 void *devaddr = (void *) (n->tgt->tgt_start
2042 + n->tgt_offset
2043 + addr - n->host_start);
2044 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2045 gomp_copy_host2dev (devicep, NULL,
2046 devaddr, (void *) addr,
2047 sizeof (void *), false, NULL);
2048 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2049 gomp_copy_dev2host (devicep, NULL,
2050 (void *) addr, devaddr,
2051 sizeof (void *));
2053 addr += sizeof (void *);
2056 else
2058 void *hostaddr = (void *) cur_node.host_start;
2059 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2060 + cur_node.host_start
2061 - n->host_start);
2062 size_t size = cur_node.host_end - cur_node.host_start;
2064 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2065 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2066 false, NULL);
2067 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2068 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2072 gomp_mutex_unlock (&devicep->lock);
2075 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076 And insert to splay tree the mapping between addresses from HOST_TABLE and
2077 from loaded target image. We rely in the host and device compiler
2078 emitting variable and functions in the same order. */
2080 static void
2081 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2082 const void *host_table, const void *target_data,
2083 bool is_register_lock)
2085 void **host_func_table = ((void ***) host_table)[0];
2086 void **host_funcs_end = ((void ***) host_table)[1];
2087 void **host_var_table = ((void ***) host_table)[2];
2088 void **host_vars_end = ((void ***) host_table)[3];
2090 /* The func table contains only addresses, the var table contains addresses
2091 and corresponding sizes. */
2092 int num_funcs = host_funcs_end - host_func_table;
2093 int num_vars = (host_vars_end - host_var_table) / 2;
2095 /* Others currently is only 'device_num' */
2096 int num_others = 1;
2098 /* Load image to device and get target addresses for the image. */
2099 struct addr_pair *target_table = NULL;
2100 int i, num_target_entries;
2102 num_target_entries
2103 = devicep->load_image_func (devicep->target_id, version,
2104 target_data, &target_table);
2106 if (num_target_entries != num_funcs + num_vars
2107 /* Others (device_num) are included as trailing entries in pair list. */
2108 && num_target_entries != num_funcs + num_vars + num_others)
2110 gomp_mutex_unlock (&devicep->lock);
2111 if (is_register_lock)
2112 gomp_mutex_unlock (&register_lock);
2113 gomp_fatal ("Cannot map target functions or variables"
2114 " (expected %u, have %u)", num_funcs + num_vars,
2115 num_target_entries);
2118 /* Insert host-target address mapping into splay tree. */
2119 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2120 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2121 tgt->refcount = REFCOUNT_INFINITY;
2122 tgt->tgt_start = 0;
2123 tgt->tgt_end = 0;
2124 tgt->to_free = NULL;
2125 tgt->prev = NULL;
2126 tgt->list_count = 0;
2127 tgt->device_descr = devicep;
2128 splay_tree_node array = tgt->array;
2130 for (i = 0; i < num_funcs; i++)
2132 splay_tree_key k = &array->key;
2133 k->host_start = (uintptr_t) host_func_table[i];
2134 k->host_end = k->host_start + 1;
2135 k->tgt = tgt;
2136 k->tgt_offset = target_table[i].start;
2137 k->refcount = REFCOUNT_INFINITY;
2138 k->dynamic_refcount = 0;
2139 k->aux = NULL;
2140 array->left = NULL;
2141 array->right = NULL;
2142 splay_tree_insert (&devicep->mem_map, array);
2143 array++;
2146 /* Most significant bit of the size in host and target tables marks
2147 "omp declare target link" variables. */
2148 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2149 const uintptr_t size_mask = ~link_bit;
2151 for (i = 0; i < num_vars; i++)
2153 struct addr_pair *target_var = &target_table[num_funcs + i];
2154 uintptr_t target_size = target_var->end - target_var->start;
2155 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2157 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2159 gomp_mutex_unlock (&devicep->lock);
2160 if (is_register_lock)
2161 gomp_mutex_unlock (&register_lock);
2162 gomp_fatal ("Cannot map target variables (size mismatch)");
2165 splay_tree_key k = &array->key;
2166 k->host_start = (uintptr_t) host_var_table[i * 2];
2167 k->host_end
2168 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2169 k->tgt = tgt;
2170 k->tgt_offset = target_var->start;
2171 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2172 k->dynamic_refcount = 0;
2173 k->aux = NULL;
2174 array->left = NULL;
2175 array->right = NULL;
2176 splay_tree_insert (&devicep->mem_map, array);
2177 array++;
2180 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181 where plugin does not return this entry. */
2182 if (num_funcs + num_vars < num_target_entries)
2184 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2185 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186 was found in this image. */
2187 if (device_num_var->start != 0)
2189 /* The index of the devicep within devices[] is regarded as its
2190 'device number', which is different from the per-device type
2191 devicep->target_id. */
2192 int device_num_val = (int) (devicep - &devices[0]);
2193 if (device_num_var->end - device_num_var->start != sizeof (int))
2195 gomp_mutex_unlock (&devicep->lock);
2196 if (is_register_lock)
2197 gomp_mutex_unlock (&register_lock);
2198 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2199 "format");
2202 /* Copy device_num value to place on device memory, hereby actually
2203 designating its device number into effect. */
2204 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2205 &device_num_val, sizeof (int), false, NULL);
2209 free (target_table);
2212 /* Unload the mappings described by target_data from device DEVICE_P.
2213 The device must be locked. */
2215 static void
2216 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2217 unsigned version,
2218 const void *host_table, const void *target_data)
2220 void **host_func_table = ((void ***) host_table)[0];
2221 void **host_funcs_end = ((void ***) host_table)[1];
2222 void **host_var_table = ((void ***) host_table)[2];
2223 void **host_vars_end = ((void ***) host_table)[3];
2225 /* The func table contains only addresses, the var table contains addresses
2226 and corresponding sizes. */
2227 int num_funcs = host_funcs_end - host_func_table;
2228 int num_vars = (host_vars_end - host_var_table) / 2;
2230 struct splay_tree_key_s k;
2231 splay_tree_key node = NULL;
2233 /* Find mapping at start of node array */
2234 if (num_funcs || num_vars)
2236 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2237 : (uintptr_t) host_var_table[0]);
2238 k.host_end = k.host_start + 1;
2239 node = splay_tree_lookup (&devicep->mem_map, &k);
2242 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2244 gomp_mutex_unlock (&devicep->lock);
2245 gomp_fatal ("image unload fail");
2248 /* Remove mappings from splay tree. */
2249 int i;
2250 for (i = 0; i < num_funcs; i++)
2252 k.host_start = (uintptr_t) host_func_table[i];
2253 k.host_end = k.host_start + 1;
2254 splay_tree_remove (&devicep->mem_map, &k);
2257 /* Most significant bit of the size in host and target tables marks
2258 "omp declare target link" variables. */
2259 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2260 const uintptr_t size_mask = ~link_bit;
2261 bool is_tgt_unmapped = false;
2263 for (i = 0; i < num_vars; i++)
2265 k.host_start = (uintptr_t) host_var_table[i * 2];
2266 k.host_end
2267 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2269 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2270 splay_tree_remove (&devicep->mem_map, &k);
2271 else
2273 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2274 is_tgt_unmapped = gomp_remove_var (devicep, n);
2278 if (node && !is_tgt_unmapped)
2280 free (node->tgt);
2281 free (node);
2285 /* This function should be called from every offload image while loading.
2286 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287 the target, and TARGET_DATA needed by target plugin. */
2289 void
2290 GOMP_offload_register_ver (unsigned version, const void *host_table,
2291 int target_type, const void *target_data)
2293 int i;
2295 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2296 gomp_fatal ("Library too old for offload (version %u < %u)",
2297 GOMP_VERSION, GOMP_VERSION_LIB (version));
2299 gomp_mutex_lock (&register_lock);
2301 /* Load image to all initialized devices. */
2302 for (i = 0; i < num_devices; i++)
2304 struct gomp_device_descr *devicep = &devices[i];
2305 gomp_mutex_lock (&devicep->lock);
2306 if (devicep->type == target_type
2307 && devicep->state == GOMP_DEVICE_INITIALIZED)
2308 gomp_load_image_to_device (devicep, version,
2309 host_table, target_data, true);
2310 gomp_mutex_unlock (&devicep->lock);
2313 /* Insert image to array of pending images. */
2314 offload_images
2315 = gomp_realloc_unlock (offload_images,
2316 (num_offload_images + 1)
2317 * sizeof (struct offload_image_descr));
2318 offload_images[num_offload_images].version = version;
2319 offload_images[num_offload_images].type = target_type;
2320 offload_images[num_offload_images].host_table = host_table;
2321 offload_images[num_offload_images].target_data = target_data;
2323 num_offload_images++;
2324 gomp_mutex_unlock (&register_lock);
2327 void
2328 GOMP_offload_register (const void *host_table, int target_type,
2329 const void *target_data)
2331 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2334 /* This function should be called from every offload image while unloading.
2335 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336 the target, and TARGET_DATA needed by target plugin. */
2338 void
2339 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2340 int target_type, const void *target_data)
2342 int i;
2344 gomp_mutex_lock (&register_lock);
2346 /* Unload image from all initialized devices. */
2347 for (i = 0; i < num_devices; i++)
2349 struct gomp_device_descr *devicep = &devices[i];
2350 gomp_mutex_lock (&devicep->lock);
2351 if (devicep->type == target_type
2352 && devicep->state == GOMP_DEVICE_INITIALIZED)
2353 gomp_unload_image_from_device (devicep, version,
2354 host_table, target_data);
2355 gomp_mutex_unlock (&devicep->lock);
2358 /* Remove image from array of pending images. */
2359 for (i = 0; i < num_offload_images; i++)
2360 if (offload_images[i].target_data == target_data)
2362 offload_images[i] = offload_images[--num_offload_images];
2363 break;
2366 gomp_mutex_unlock (&register_lock);
2369 void
2370 GOMP_offload_unregister (const void *host_table, int target_type,
2371 const void *target_data)
2373 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2376 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2377 must be locked on entry, and remains locked on return. */
2379 attribute_hidden void
2380 gomp_init_device (struct gomp_device_descr *devicep)
2382 int i;
2383 if (!devicep->init_device_func (devicep->target_id))
2385 gomp_mutex_unlock (&devicep->lock);
2386 gomp_fatal ("device initialization failed");
2389 /* Load to device all images registered by the moment. */
2390 for (i = 0; i < num_offload_images; i++)
2392 struct offload_image_descr *image = &offload_images[i];
2393 if (image->type == devicep->type)
2394 gomp_load_image_to_device (devicep, image->version,
2395 image->host_table, image->target_data,
2396 false);
2399 /* Initialize OpenACC asynchronous queues. */
2400 goacc_init_asyncqueues (devicep);
2402 devicep->state = GOMP_DEVICE_INITIALIZED;
2405 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2406 must be locked on entry, and remains locked on return. */
2408 attribute_hidden bool
2409 gomp_fini_device (struct gomp_device_descr *devicep)
2411 bool ret = goacc_fini_asyncqueues (devicep);
2412 ret &= devicep->fini_device_func (devicep->target_id);
2413 devicep->state = GOMP_DEVICE_FINALIZED;
2414 return ret;
2417 attribute_hidden void
2418 gomp_unload_device (struct gomp_device_descr *devicep)
2420 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2422 unsigned i;
2424 /* Unload from device all images registered at the moment. */
2425 for (i = 0; i < num_offload_images; i++)
2427 struct offload_image_descr *image = &offload_images[i];
2428 if (image->type == devicep->type)
2429 gomp_unload_image_from_device (devicep, image->version,
2430 image->host_table,
2431 image->target_data);
2436 /* Host fallback for GOMP_target{,_ext} routines. */
2438 static void
2439 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2440 struct gomp_device_descr *devicep, void **args)
2442 struct gomp_thread old_thr, *thr = gomp_thread ();
2444 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2445 && devicep != NULL)
2446 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447 "be used for offloading");
2449 old_thr = *thr;
2450 memset (thr, '\0', sizeof (*thr));
2451 if (gomp_places_list)
2453 thr->place = old_thr.place;
2454 thr->ts.place_partition_len = gomp_places_list_len;
2456 if (args)
2457 while (*args)
2459 intptr_t id = (intptr_t) *args++, val;
2460 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2461 val = (intptr_t) *args++;
2462 else
2463 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2464 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2465 continue;
2466 id &= GOMP_TARGET_ARG_ID_MASK;
2467 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2468 continue;
2469 val = val > INT_MAX ? INT_MAX : val;
2470 if (val)
2471 gomp_icv (true)->thread_limit_var = val;
2472 break;
2475 fn (hostaddrs);
2476 gomp_free_thread (thr);
2477 *thr = old_thr;
2480 /* Calculate alignment and size requirements of a private copy of data shared
2481 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2483 static inline void
2484 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2485 unsigned short *kinds, size_t *tgt_align,
2486 size_t *tgt_size)
2488 size_t i;
2489 for (i = 0; i < mapnum; i++)
2490 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2492 size_t align = (size_t) 1 << (kinds[i] >> 8);
2493 if (*tgt_align < align)
2494 *tgt_align = align;
2495 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2496 *tgt_size += sizes[i];
2500 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2502 static inline void
2503 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2504 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2505 size_t tgt_size)
2507 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2508 if (al)
2509 tgt += tgt_align - al;
2510 tgt_size = 0;
2511 size_t i;
2512 for (i = 0; i < mapnum; i++)
2513 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2515 size_t align = (size_t) 1 << (kinds[i] >> 8);
2516 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2517 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2518 hostaddrs[i] = tgt + tgt_size;
2519 tgt_size = tgt_size + sizes[i];
2523 /* Helper function of GOMP_target{,_ext} routines. */
2525 static void *
2526 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2527 void (*host_fn) (void *))
2529 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2530 return (void *) host_fn;
2531 else
2533 gomp_mutex_lock (&devicep->lock);
2534 if (devicep->state == GOMP_DEVICE_FINALIZED)
2536 gomp_mutex_unlock (&devicep->lock);
2537 return NULL;
2540 struct splay_tree_key_s k;
2541 k.host_start = (uintptr_t) host_fn;
2542 k.host_end = k.host_start + 1;
2543 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2544 gomp_mutex_unlock (&devicep->lock);
2545 if (tgt_fn == NULL)
2546 return NULL;
2548 return (void *) tgt_fn->tgt_offset;
2552 /* Called when encountering a target directive. If DEVICE
2553 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2554 GOMP_DEVICE_HOST_FALLBACK (or any value
2555 larger than last available hw device), use host fallback.
2556 FN is address of host code, UNUSED is part of the current ABI, but
2557 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2558 with MAPNUM entries, with addresses of the host objects,
2559 sizes of the host objects (resp. for pointer kind pointer bias
2560 and assumed sizeof (void *) size) and kinds. */
2562 void
2563 GOMP_target (int device, void (*fn) (void *), const void *unused,
2564 size_t mapnum, void **hostaddrs, size_t *sizes,
2565 unsigned char *kinds)
2567 struct gomp_device_descr *devicep = resolve_device (device);
2569 void *fn_addr;
2570 if (devicep == NULL
2571 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2572 /* All shared memory devices should use the GOMP_target_ext function. */
2573 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2574 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2575 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2577 htab_t refcount_set = htab_create (mapnum);
2578 struct target_mem_desc *tgt_vars
2579 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2580 &refcount_set, GOMP_MAP_VARS_TARGET);
2581 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2582 NULL);
2583 htab_clear (refcount_set);
2584 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2585 htab_free (refcount_set);
2588 static inline unsigned int
2589 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2591 /* If we cannot run asynchronously, simply ignore nowait. */
2592 if (devicep != NULL && devicep->async_run_func == NULL)
2593 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2595 return flags;
2598 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599 and several arguments have been added:
2600 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601 DEPEND is array of dependencies, see GOMP_task for details.
2603 ARGS is a pointer to an array consisting of a variable number of both
2604 device-independent and device-specific arguments, which can take one two
2605 elements where the first specifies for which device it is intended, the type
2606 and optionally also the value. If the value is not present in the first
2607 one, the whole second element the actual value. The last element of the
2608 array is a single NULL. Among the device independent can be for example
2609 NUM_TEAMS and THREAD_LIMIT.
2611 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612 that value, or 1 if teams construct is not present, or 0, if
2613 teams construct does not have num_teams clause and so the choice is
2614 implementation defined, and -1 if it can't be determined on the host
2615 what value will GOMP_teams have on the device.
2616 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617 body with that value, or 0, if teams construct does not have thread_limit
2618 clause or the teams construct is not present, or -1 if it can't be
2619 determined on the host what value will GOMP_teams have on the device. */
2621 void
2622 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2623 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2624 unsigned int flags, void **depend, void **args)
2626 struct gomp_device_descr *devicep = resolve_device (device);
2627 size_t tgt_align = 0, tgt_size = 0;
2628 bool fpc_done = false;
2630 flags = clear_unsupported_flags (devicep, flags);
2632 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2634 struct gomp_thread *thr = gomp_thread ();
2635 /* Create a team if we don't have any around, as nowait
2636 target tasks make sense to run asynchronously even when
2637 outside of any parallel. */
2638 if (__builtin_expect (thr->ts.team == NULL, 0))
2640 struct gomp_team *team = gomp_new_team (1);
2641 struct gomp_task *task = thr->task;
2642 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2643 team->prev_ts = thr->ts;
2644 thr->ts.team = team;
2645 thr->ts.team_id = 0;
2646 thr->ts.work_share = &team->work_shares[0];
2647 thr->ts.last_work_share = NULL;
2648 #ifdef HAVE_SYNC_BUILTINS
2649 thr->ts.single_count = 0;
2650 #endif
2651 thr->ts.static_trip = 0;
2652 thr->task = &team->implicit_task[0];
2653 gomp_init_task (thr->task, NULL, icv);
2654 if (task)
2656 thr->task = task;
2657 gomp_end_task ();
2658 free (task);
2659 thr->task = &team->implicit_task[0];
2661 else
2662 pthread_setspecific (gomp_thread_destructor, thr);
2664 if (thr->ts.team
2665 && !thr->task->final_task)
2667 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2668 sizes, kinds, flags, depend, args,
2669 GOMP_TARGET_TASK_BEFORE_MAP);
2670 return;
2674 /* If there are depend clauses, but nowait is not present
2675 (or we are in a final task), block the parent task until the
2676 dependencies are resolved and then just continue with the rest
2677 of the function as if it is a merged task. */
2678 if (depend != NULL)
2680 struct gomp_thread *thr = gomp_thread ();
2681 if (thr->task && thr->task->depend_hash)
2683 /* If we might need to wait, copy firstprivate now. */
2684 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2685 &tgt_align, &tgt_size);
2686 if (tgt_align)
2688 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2689 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2690 tgt_align, tgt_size);
2692 fpc_done = true;
2693 gomp_task_maybe_wait_for_dependencies (depend);
2697 void *fn_addr;
2698 if (devicep == NULL
2699 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2700 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2701 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2703 if (!fpc_done)
2705 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2706 &tgt_align, &tgt_size);
2707 if (tgt_align)
2709 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2710 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2711 tgt_align, tgt_size);
2714 gomp_target_fallback (fn, hostaddrs, devicep, args);
2715 return;
2718 struct target_mem_desc *tgt_vars;
2719 htab_t refcount_set = NULL;
2721 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2723 if (!fpc_done)
2725 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2726 &tgt_align, &tgt_size);
2727 if (tgt_align)
2729 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2730 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2731 tgt_align, tgt_size);
2734 tgt_vars = NULL;
2736 else
2738 refcount_set = htab_create (mapnum);
2739 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2740 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2742 devicep->run_func (devicep->target_id, fn_addr,
2743 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2744 args);
2745 if (tgt_vars)
2747 htab_clear (refcount_set);
2748 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2750 if (refcount_set)
2751 htab_free (refcount_set);
2754 /* Host fallback for GOMP_target_data{,_ext} routines. */
2756 static void
2757 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2759 struct gomp_task_icv *icv = gomp_icv (false);
2761 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2762 && devicep != NULL)
2763 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2764 "be used for offloading");
2766 if (icv->target_data)
2768 /* Even when doing a host fallback, if there are any active
2769 #pragma omp target data constructs, need to remember the
2770 new #pragma omp target data, otherwise GOMP_target_end_data
2771 would get out of sync. */
2772 struct target_mem_desc *tgt
2773 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2774 NULL, GOMP_MAP_VARS_DATA);
2775 tgt->prev = icv->target_data;
2776 icv->target_data = tgt;
2780 void
2781 GOMP_target_data (int device, const void *unused, size_t mapnum,
2782 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2784 struct gomp_device_descr *devicep = resolve_device (device);
2786 if (devicep == NULL
2787 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2788 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2789 return gomp_target_data_fallback (devicep);
2791 struct target_mem_desc *tgt
2792 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2793 NULL, GOMP_MAP_VARS_DATA);
2794 struct gomp_task_icv *icv = gomp_icv (true);
2795 tgt->prev = icv->target_data;
2796 icv->target_data = tgt;
2799 void
2800 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2801 size_t *sizes, unsigned short *kinds)
2803 struct gomp_device_descr *devicep = resolve_device (device);
2805 if (devicep == NULL
2806 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2807 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2808 return gomp_target_data_fallback (devicep);
2810 struct target_mem_desc *tgt
2811 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2812 NULL, GOMP_MAP_VARS_DATA);
2813 struct gomp_task_icv *icv = gomp_icv (true);
2814 tgt->prev = icv->target_data;
2815 icv->target_data = tgt;
2818 void
2819 GOMP_target_end_data (void)
2821 struct gomp_task_icv *icv = gomp_icv (false);
2822 if (icv->target_data)
2824 struct target_mem_desc *tgt = icv->target_data;
2825 icv->target_data = tgt->prev;
2826 gomp_unmap_vars (tgt, true, NULL);
2830 void
2831 GOMP_target_update (int device, const void *unused, size_t mapnum,
2832 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2834 struct gomp_device_descr *devicep = resolve_device (device);
2836 if (devicep == NULL
2837 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2838 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2839 return;
2841 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2844 void
2845 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2846 size_t *sizes, unsigned short *kinds,
2847 unsigned int flags, void **depend)
2849 struct gomp_device_descr *devicep = resolve_device (device);
2851 /* If there are depend clauses, but nowait is not present,
2852 block the parent task until the dependencies are resolved
2853 and then just continue with the rest of the function as if it
2854 is a merged task. Until we are able to schedule task during
2855 variable mapping or unmapping, ignore nowait if depend clauses
2856 are not present. */
2857 if (depend != NULL)
2859 struct gomp_thread *thr = gomp_thread ();
2860 if (thr->task && thr->task->depend_hash)
2862 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2863 && thr->ts.team
2864 && !thr->task->final_task)
2866 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2867 mapnum, hostaddrs, sizes, kinds,
2868 flags | GOMP_TARGET_FLAG_UPDATE,
2869 depend, NULL, GOMP_TARGET_TASK_DATA))
2870 return;
2872 else
2874 struct gomp_team *team = thr->ts.team;
2875 /* If parallel or taskgroup has been cancelled, don't start new
2876 tasks. */
2877 if (__builtin_expect (gomp_cancel_var, 0) && team)
2879 if (gomp_team_barrier_cancelled (&team->barrier))
2880 return;
2881 if (thr->task->taskgroup)
2883 if (thr->task->taskgroup->cancelled)
2884 return;
2885 if (thr->task->taskgroup->workshare
2886 && thr->task->taskgroup->prev
2887 && thr->task->taskgroup->prev->cancelled)
2888 return;
2892 gomp_task_maybe_wait_for_dependencies (depend);
2897 if (devicep == NULL
2898 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2899 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2900 return;
2902 struct gomp_thread *thr = gomp_thread ();
2903 struct gomp_team *team = thr->ts.team;
2904 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2905 if (__builtin_expect (gomp_cancel_var, 0) && team)
2907 if (gomp_team_barrier_cancelled (&team->barrier))
2908 return;
2909 if (thr->task->taskgroup)
2911 if (thr->task->taskgroup->cancelled)
2912 return;
2913 if (thr->task->taskgroup->workshare
2914 && thr->task->taskgroup->prev
2915 && thr->task->taskgroup->prev->cancelled)
2916 return;
2920 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2923 static void
2924 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2925 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2926 htab_t *refcount_set)
2928 const int typemask = 0xff;
2929 size_t i;
2930 gomp_mutex_lock (&devicep->lock);
2931 if (devicep->state == GOMP_DEVICE_FINALIZED)
2933 gomp_mutex_unlock (&devicep->lock);
2934 return;
2937 for (i = 0; i < mapnum; i++)
2938 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2940 struct splay_tree_key_s cur_node;
2941 cur_node.host_start = (uintptr_t) hostaddrs[i];
2942 cur_node.host_end = cur_node.host_start + sizeof (void *);
2943 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2945 if (n)
2946 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2947 false, NULL);
2950 int nrmvars = 0;
2951 splay_tree_key remove_vars[mapnum];
2953 for (i = 0; i < mapnum; i++)
2955 struct splay_tree_key_s cur_node;
2956 unsigned char kind = kinds[i] & typemask;
2957 switch (kind)
2959 case GOMP_MAP_FROM:
2960 case GOMP_MAP_ALWAYS_FROM:
2961 case GOMP_MAP_DELETE:
2962 case GOMP_MAP_RELEASE:
2963 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2964 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2965 cur_node.host_start = (uintptr_t) hostaddrs[i];
2966 cur_node.host_end = cur_node.host_start + sizes[i];
2967 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2968 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2969 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2970 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2971 if (!k)
2972 continue;
2974 bool delete_p = (kind == GOMP_MAP_DELETE
2975 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2976 bool do_copy, do_remove;
2977 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2978 &do_remove);
2980 if ((kind == GOMP_MAP_FROM && do_copy)
2981 || kind == GOMP_MAP_ALWAYS_FROM)
2983 if (k->aux && k->aux->attach_count)
2985 /* We have to be careful not to overwrite still attached
2986 pointers during the copyback to host. */
2987 uintptr_t addr = k->host_start;
2988 while (addr < k->host_end)
2990 size_t i = (addr - k->host_start) / sizeof (void *);
2991 if (k->aux->attach_count[i] == 0)
2992 gomp_copy_dev2host (devicep, NULL, (void *) addr,
2993 (void *) (k->tgt->tgt_start
2994 + k->tgt_offset
2995 + addr - k->host_start),
2996 sizeof (void *));
2997 addr += sizeof (void *);
3000 else
3001 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3002 (void *) (k->tgt->tgt_start + k->tgt_offset
3003 + cur_node.host_start
3004 - k->host_start),
3005 cur_node.host_end - cur_node.host_start);
3008 /* Structure elements lists are removed altogether at once, which
3009 may cause immediate deallocation of the target_mem_desc, causing
3010 errors if we still have following element siblings to copy back.
3011 While we're at it, it also seems more disciplined to simply
3012 queue all removals together for processing below.
3014 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3015 not have this problem, since they maintain an additional
3016 tgt->refcount = 1 reference to the target_mem_desc to start with.
3018 if (do_remove)
3019 remove_vars[nrmvars++] = k;
3020 break;
3022 case GOMP_MAP_DETACH:
3023 break;
3024 default:
3025 gomp_mutex_unlock (&devicep->lock);
3026 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3027 kind);
3031 for (int i = 0; i < nrmvars; i++)
3032 gomp_remove_var (devicep, remove_vars[i]);
3034 gomp_mutex_unlock (&devicep->lock);
3037 void
3038 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3039 size_t *sizes, unsigned short *kinds,
3040 unsigned int flags, void **depend)
3042 struct gomp_device_descr *devicep = resolve_device (device);
3044 /* If there are depend clauses, but nowait is not present,
3045 block the parent task until the dependencies are resolved
3046 and then just continue with the rest of the function as if it
3047 is a merged task. Until we are able to schedule task during
3048 variable mapping or unmapping, ignore nowait if depend clauses
3049 are not present. */
3050 if (depend != NULL)
3052 struct gomp_thread *thr = gomp_thread ();
3053 if (thr->task && thr->task->depend_hash)
3055 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3056 && thr->ts.team
3057 && !thr->task->final_task)
3059 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3060 mapnum, hostaddrs, sizes, kinds,
3061 flags, depend, NULL,
3062 GOMP_TARGET_TASK_DATA))
3063 return;
3065 else
3067 struct gomp_team *team = thr->ts.team;
3068 /* If parallel or taskgroup has been cancelled, don't start new
3069 tasks. */
3070 if (__builtin_expect (gomp_cancel_var, 0) && team)
3072 if (gomp_team_barrier_cancelled (&team->barrier))
3073 return;
3074 if (thr->task->taskgroup)
3076 if (thr->task->taskgroup->cancelled)
3077 return;
3078 if (thr->task->taskgroup->workshare
3079 && thr->task->taskgroup->prev
3080 && thr->task->taskgroup->prev->cancelled)
3081 return;
3085 gomp_task_maybe_wait_for_dependencies (depend);
3090 if (devicep == NULL
3091 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3092 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3093 return;
3095 struct gomp_thread *thr = gomp_thread ();
3096 struct gomp_team *team = thr->ts.team;
3097 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3098 if (__builtin_expect (gomp_cancel_var, 0) && team)
3100 if (gomp_team_barrier_cancelled (&team->barrier))
3101 return;
3102 if (thr->task->taskgroup)
3104 if (thr->task->taskgroup->cancelled)
3105 return;
3106 if (thr->task->taskgroup->workshare
3107 && thr->task->taskgroup->prev
3108 && thr->task->taskgroup->prev->cancelled)
3109 return;
3113 htab_t refcount_set = htab_create (mapnum);
3115 /* The variables are mapped separately such that they can be released
3116 independently. */
3117 size_t i, j;
3118 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3119 for (i = 0; i < mapnum; i++)
3120 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3122 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3123 &kinds[i], true, &refcount_set,
3124 GOMP_MAP_VARS_ENTER_DATA);
3125 i += sizes[i];
3127 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3129 for (j = i + 1; j < mapnum; j++)
3130 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3131 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3132 break;
3133 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3134 &kinds[i], true, &refcount_set,
3135 GOMP_MAP_VARS_ENTER_DATA);
3136 i += j - i - 1;
3138 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3140 /* An attach operation must be processed together with the mapped
3141 base-pointer list item. */
3142 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3143 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3144 i += 1;
3146 else
3147 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3148 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3149 else
3150 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3151 htab_free (refcount_set);
3154 bool
3155 gomp_target_task_fn (void *data)
3157 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3158 struct gomp_device_descr *devicep = ttask->devicep;
3160 if (ttask->fn != NULL)
3162 void *fn_addr;
3163 if (devicep == NULL
3164 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3165 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3166 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3168 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3169 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3170 ttask->args);
3171 return false;
3174 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3176 if (ttask->tgt)
3177 gomp_unmap_vars (ttask->tgt, true, NULL);
3178 return false;
3181 void *actual_arguments;
3182 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3184 ttask->tgt = NULL;
3185 actual_arguments = ttask->hostaddrs;
3187 else
3189 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3190 NULL, ttask->sizes, ttask->kinds, true,
3191 NULL, GOMP_MAP_VARS_TARGET);
3192 actual_arguments = (void *) ttask->tgt->tgt_start;
3194 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3196 assert (devicep->async_run_func);
3197 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3198 ttask->args, (void *) ttask);
3199 return true;
3201 else if (devicep == NULL
3202 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3203 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3204 return false;
3206 size_t i;
3207 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3208 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3209 ttask->kinds, true);
3210 else
3212 htab_t refcount_set = htab_create (ttask->mapnum);
3213 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3214 for (i = 0; i < ttask->mapnum; i++)
3215 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3217 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3218 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3219 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3220 i += ttask->sizes[i];
3222 else
3223 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3224 &ttask->kinds[i], true, &refcount_set,
3225 GOMP_MAP_VARS_ENTER_DATA);
3226 else
3227 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3228 ttask->kinds, &refcount_set);
3229 htab_free (refcount_set);
3231 return false;
3234 void
3235 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3237 if (thread_limit)
3239 struct gomp_task_icv *icv = gomp_icv (true);
3240 icv->thread_limit_var
3241 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3243 (void) num_teams;
3246 bool
3247 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3248 unsigned int thread_limit, bool first)
3250 struct gomp_thread *thr = gomp_thread ();
3251 if (first)
3253 if (thread_limit)
3255 struct gomp_task_icv *icv = gomp_icv (true);
3256 icv->thread_limit_var
3257 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3259 (void) num_teams_high;
3260 if (num_teams_low == 0)
3261 num_teams_low = 1;
3262 thr->num_teams = num_teams_low - 1;
3263 thr->team_num = 0;
3265 else if (thr->team_num == thr->num_teams)
3266 return false;
3267 else
3268 ++thr->team_num;
3269 return true;
3272 void *
3273 omp_target_alloc (size_t size, int device_num)
3275 if (device_num == gomp_get_num_devices ())
3276 return malloc (size);
3278 if (device_num < 0)
3279 return NULL;
3281 struct gomp_device_descr *devicep = resolve_device (device_num);
3282 if (devicep == NULL)
3283 return NULL;
3285 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3286 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3287 return malloc (size);
3289 gomp_mutex_lock (&devicep->lock);
3290 void *ret = devicep->alloc_func (devicep->target_id, size);
3291 gomp_mutex_unlock (&devicep->lock);
3292 return ret;
3295 void
3296 omp_target_free (void *device_ptr, int device_num)
3298 if (device_ptr == NULL)
3299 return;
3301 if (device_num == gomp_get_num_devices ())
3303 free (device_ptr);
3304 return;
3307 if (device_num < 0)
3308 return;
3310 struct gomp_device_descr *devicep = resolve_device (device_num);
3311 if (devicep == NULL)
3312 return;
3314 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3315 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3317 free (device_ptr);
3318 return;
3321 gomp_mutex_lock (&devicep->lock);
3322 gomp_free_device_memory (devicep, device_ptr);
3323 gomp_mutex_unlock (&devicep->lock);
3327 omp_target_is_present (const void *ptr, int device_num)
3329 if (ptr == NULL)
3330 return 1;
3332 if (device_num == gomp_get_num_devices ())
3333 return 1;
3335 if (device_num < 0)
3336 return 0;
3338 struct gomp_device_descr *devicep = resolve_device (device_num);
3339 if (devicep == NULL)
3340 return 0;
3342 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3343 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3344 return 1;
3346 gomp_mutex_lock (&devicep->lock);
3347 struct splay_tree_s *mem_map = &devicep->mem_map;
3348 struct splay_tree_key_s cur_node;
3350 cur_node.host_start = (uintptr_t) ptr;
3351 cur_node.host_end = cur_node.host_start;
3352 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3353 int ret = n != NULL;
3354 gomp_mutex_unlock (&devicep->lock);
3355 return ret;
3359 omp_target_memcpy (void *dst, const void *src, size_t length,
3360 size_t dst_offset, size_t src_offset, int dst_device_num,
3361 int src_device_num)
3363 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3364 bool ret;
3366 if (dst_device_num != gomp_get_num_devices ())
3368 if (dst_device_num < 0)
3369 return EINVAL;
3371 dst_devicep = resolve_device (dst_device_num);
3372 if (dst_devicep == NULL)
3373 return EINVAL;
3375 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3376 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3377 dst_devicep = NULL;
3379 if (src_device_num != num_devices_openmp)
3381 if (src_device_num < 0)
3382 return EINVAL;
3384 src_devicep = resolve_device (src_device_num);
3385 if (src_devicep == NULL)
3386 return EINVAL;
3388 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3389 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3390 src_devicep = NULL;
3392 if (src_devicep == NULL && dst_devicep == NULL)
3394 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3395 return 0;
3397 if (src_devicep == NULL)
3399 gomp_mutex_lock (&dst_devicep->lock);
3400 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3401 (char *) dst + dst_offset,
3402 (char *) src + src_offset, length);
3403 gomp_mutex_unlock (&dst_devicep->lock);
3404 return (ret ? 0 : EINVAL);
3406 if (dst_devicep == NULL)
3408 gomp_mutex_lock (&src_devicep->lock);
3409 ret = src_devicep->dev2host_func (src_devicep->target_id,
3410 (char *) dst + dst_offset,
3411 (char *) src + src_offset, length);
3412 gomp_mutex_unlock (&src_devicep->lock);
3413 return (ret ? 0 : EINVAL);
3415 if (src_devicep == dst_devicep)
3417 gomp_mutex_lock (&src_devicep->lock);
3418 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3419 (char *) dst + dst_offset,
3420 (char *) src + src_offset, length);
3421 gomp_mutex_unlock (&src_devicep->lock);
3422 return (ret ? 0 : EINVAL);
3424 return EINVAL;
3427 static int
3428 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3429 int num_dims, const size_t *volume,
3430 const size_t *dst_offsets,
3431 const size_t *src_offsets,
3432 const size_t *dst_dimensions,
3433 const size_t *src_dimensions,
3434 struct gomp_device_descr *dst_devicep,
3435 struct gomp_device_descr *src_devicep)
3437 size_t dst_slice = element_size;
3438 size_t src_slice = element_size;
3439 size_t j, dst_off, src_off, length;
3440 int i, ret;
3442 if (num_dims == 1)
3444 if (__builtin_mul_overflow (element_size, volume[0], &length)
3445 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3446 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3447 return EINVAL;
3448 if (dst_devicep == NULL && src_devicep == NULL)
3450 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3451 length);
3452 ret = 1;
3454 else if (src_devicep == NULL)
3455 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3456 (char *) dst + dst_off,
3457 (const char *) src + src_off,
3458 length);
3459 else if (dst_devicep == NULL)
3460 ret = src_devicep->dev2host_func (src_devicep->target_id,
3461 (char *) dst + dst_off,
3462 (const char *) src + src_off,
3463 length);
3464 else if (src_devicep == dst_devicep)
3465 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3466 (char *) dst + dst_off,
3467 (const char *) src + src_off,
3468 length);
3469 else
3470 ret = 0;
3471 return ret ? 0 : EINVAL;
3474 /* FIXME: it would be nice to have some plugin function to handle
3475 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3476 be handled in the generic recursion below, and for host-host it
3477 should be used even for any num_dims >= 2. */
3479 for (i = 1; i < num_dims; i++)
3480 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3481 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3482 return EINVAL;
3483 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3484 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3485 return EINVAL;
3486 for (j = 0; j < volume[0]; j++)
3488 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3489 (const char *) src + src_off,
3490 element_size, num_dims - 1,
3491 volume + 1, dst_offsets + 1,
3492 src_offsets + 1, dst_dimensions + 1,
3493 src_dimensions + 1, dst_devicep,
3494 src_devicep);
3495 if (ret)
3496 return ret;
3497 dst_off += dst_slice;
3498 src_off += src_slice;
3500 return 0;
3504 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3505 int num_dims, const size_t *volume,
3506 const size_t *dst_offsets,
3507 const size_t *src_offsets,
3508 const size_t *dst_dimensions,
3509 const size_t *src_dimensions,
3510 int dst_device_num, int src_device_num)
3512 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3514 if (!dst && !src)
3515 return INT_MAX;
3517 if (dst_device_num != gomp_get_num_devices ())
3519 if (dst_device_num < 0)
3520 return EINVAL;
3522 dst_devicep = resolve_device (dst_device_num);
3523 if (dst_devicep == NULL)
3524 return EINVAL;
3526 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3527 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3528 dst_devicep = NULL;
3530 if (src_device_num != num_devices_openmp)
3532 if (src_device_num < 0)
3533 return EINVAL;
3535 src_devicep = resolve_device (src_device_num);
3536 if (src_devicep == NULL)
3537 return EINVAL;
3539 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3540 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3541 src_devicep = NULL;
3544 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3545 return EINVAL;
3547 if (src_devicep)
3548 gomp_mutex_lock (&src_devicep->lock);
3549 else if (dst_devicep)
3550 gomp_mutex_lock (&dst_devicep->lock);
3551 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3552 volume, dst_offsets, src_offsets,
3553 dst_dimensions, src_dimensions,
3554 dst_devicep, src_devicep);
3555 if (src_devicep)
3556 gomp_mutex_unlock (&src_devicep->lock);
3557 else if (dst_devicep)
3558 gomp_mutex_unlock (&dst_devicep->lock);
3559 return ret;
3563 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3564 size_t size, size_t device_offset, int device_num)
3566 if (device_num == gomp_get_num_devices ())
3567 return EINVAL;
3569 if (device_num < 0)
3570 return EINVAL;
3572 struct gomp_device_descr *devicep = resolve_device (device_num);
3573 if (devicep == NULL)
3574 return EINVAL;
3576 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3577 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3578 return EINVAL;
3580 gomp_mutex_lock (&devicep->lock);
3582 struct splay_tree_s *mem_map = &devicep->mem_map;
3583 struct splay_tree_key_s cur_node;
3584 int ret = EINVAL;
3586 cur_node.host_start = (uintptr_t) host_ptr;
3587 cur_node.host_end = cur_node.host_start + size;
3588 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3589 if (n)
3591 if (n->tgt->tgt_start + n->tgt_offset
3592 == (uintptr_t) device_ptr + device_offset
3593 && n->host_start <= cur_node.host_start
3594 && n->host_end >= cur_node.host_end)
3595 ret = 0;
3597 else
3599 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3600 tgt->array = gomp_malloc (sizeof (*tgt->array));
3601 tgt->refcount = 1;
3602 tgt->tgt_start = 0;
3603 tgt->tgt_end = 0;
3604 tgt->to_free = NULL;
3605 tgt->prev = NULL;
3606 tgt->list_count = 0;
3607 tgt->device_descr = devicep;
3608 splay_tree_node array = tgt->array;
3609 splay_tree_key k = &array->key;
3610 k->host_start = cur_node.host_start;
3611 k->host_end = cur_node.host_end;
3612 k->tgt = tgt;
3613 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3614 k->refcount = REFCOUNT_INFINITY;
3615 k->dynamic_refcount = 0;
3616 k->aux = NULL;
3617 array->left = NULL;
3618 array->right = NULL;
3619 splay_tree_insert (&devicep->mem_map, array);
3620 ret = 0;
3622 gomp_mutex_unlock (&devicep->lock);
3623 return ret;
3627 omp_target_disassociate_ptr (const void *ptr, int device_num)
3629 if (device_num == gomp_get_num_devices ())
3630 return EINVAL;
3632 if (device_num < 0)
3633 return EINVAL;
3635 struct gomp_device_descr *devicep = resolve_device (device_num);
3636 if (devicep == NULL)
3637 return EINVAL;
3639 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3640 return EINVAL;
3642 gomp_mutex_lock (&devicep->lock);
3644 struct splay_tree_s *mem_map = &devicep->mem_map;
3645 struct splay_tree_key_s cur_node;
3646 int ret = EINVAL;
3648 cur_node.host_start = (uintptr_t) ptr;
3649 cur_node.host_end = cur_node.host_start;
3650 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3651 if (n
3652 && n->host_start == cur_node.host_start
3653 && n->refcount == REFCOUNT_INFINITY
3654 && n->tgt->tgt_start == 0
3655 && n->tgt->to_free == NULL
3656 && n->tgt->refcount == 1
3657 && n->tgt->list_count == 0)
3659 splay_tree_remove (&devicep->mem_map, n);
3660 gomp_unmap_tgt (n->tgt);
3661 ret = 0;
3664 gomp_mutex_unlock (&devicep->lock);
3665 return ret;
3669 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3671 (void) kind;
3672 if (device_num == gomp_get_num_devices ())
3673 return gomp_pause_host ();
3674 if (device_num < 0 || device_num >= num_devices_openmp)
3675 return -1;
3676 /* Do nothing for target devices for now. */
3677 return 0;
3681 omp_pause_resource_all (omp_pause_resource_t kind)
3683 (void) kind;
3684 if (gomp_pause_host ())
3685 return -1;
3686 /* Do nothing for target devices for now. */
3687 return 0;
3690 ialias (omp_pause_resource)
3691 ialias (omp_pause_resource_all)
3693 #ifdef PLUGIN_SUPPORT
3695 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3696 in PLUGIN_NAME.
3697 The handles of the found functions are stored in the corresponding fields
3698 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3700 static bool
3701 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3702 const char *plugin_name)
3704 const char *err = NULL, *last_missing = NULL;
3706 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3707 if (!plugin_handle)
3708 #if OFFLOAD_DEFAULTED
3709 return 0;
3710 #else
3711 goto dl_fail;
3712 #endif
3714 /* Check if all required functions are available in the plugin and store
3715 their handlers. None of the symbols can legitimately be NULL,
3716 so we don't need to check dlerror all the time. */
3717 #define DLSYM(f) \
3718 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3719 goto dl_fail
3720 /* Similar, but missing functions are not an error. Return false if
3721 failed, true otherwise. */
3722 #define DLSYM_OPT(f, n) \
3723 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3724 || (last_missing = #n, 0))
3726 DLSYM (version);
3727 if (device->version_func () != GOMP_VERSION)
3729 err = "plugin version mismatch";
3730 goto fail;
3733 DLSYM (get_name);
3734 DLSYM (get_caps);
3735 DLSYM (get_type);
3736 DLSYM (get_num_devices);
3737 DLSYM (init_device);
3738 DLSYM (fini_device);
3739 DLSYM (load_image);
3740 DLSYM (unload_image);
3741 DLSYM (alloc);
3742 DLSYM (free);
3743 DLSYM (dev2host);
3744 DLSYM (host2dev);
3745 device->capabilities = device->get_caps_func ();
3746 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3748 DLSYM (run);
3749 DLSYM_OPT (async_run, async_run);
3750 DLSYM_OPT (can_run, can_run);
3751 DLSYM (dev2dev);
3753 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3755 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3756 || !DLSYM_OPT (openacc.create_thread_data,
3757 openacc_create_thread_data)
3758 || !DLSYM_OPT (openacc.destroy_thread_data,
3759 openacc_destroy_thread_data)
3760 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3761 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3762 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3763 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3764 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3765 || !DLSYM_OPT (openacc.async.queue_callback,
3766 openacc_async_queue_callback)
3767 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3768 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3769 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3770 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3772 /* Require all the OpenACC handlers if we have
3773 GOMP_OFFLOAD_CAP_OPENACC_200. */
3774 err = "plugin missing OpenACC handler function";
3775 goto fail;
3778 unsigned cuda = 0;
3779 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3780 openacc_cuda_get_current_device);
3781 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3782 openacc_cuda_get_current_context);
3783 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3784 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3785 if (cuda && cuda != 4)
3787 /* Make sure all the CUDA functions are there if any of them are. */
3788 err = "plugin missing OpenACC CUDA handler function";
3789 goto fail;
3792 #undef DLSYM
3793 #undef DLSYM_OPT
3795 return 1;
3797 dl_fail:
3798 err = dlerror ();
3799 fail:
3800 gomp_error ("while loading %s: %s", plugin_name, err);
3801 if (last_missing)
3802 gomp_error ("missing function was %s", last_missing);
3803 if (plugin_handle)
3804 dlclose (plugin_handle);
3806 return 0;
3809 /* This function finalizes all initialized devices. */
3811 static void
3812 gomp_target_fini (void)
3814 int i;
3815 for (i = 0; i < num_devices; i++)
3817 bool ret = true;
3818 struct gomp_device_descr *devicep = &devices[i];
3819 gomp_mutex_lock (&devicep->lock);
3820 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3821 ret = gomp_fini_device (devicep);
3822 gomp_mutex_unlock (&devicep->lock);
3823 if (!ret)
3824 gomp_fatal ("device finalization failed");
3828 /* This function initializes the runtime for offloading.
3829 It parses the list of offload plugins, and tries to load these.
3830 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3831 will be set, and the array DEVICES initialized, containing descriptors for
3832 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3833 by the others. */
3835 static void
3836 gomp_target_init (void)
3838 const char *prefix ="libgomp-plugin-";
3839 const char *suffix = SONAME_SUFFIX (1);
3840 const char *cur, *next;
3841 char *plugin_name;
3842 int i, new_num_devs;
3843 int num_devs = 0, num_devs_openmp;
3844 struct gomp_device_descr *devs = NULL;
3846 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3847 return;
3849 cur = OFFLOAD_PLUGINS;
3850 if (*cur)
3853 struct gomp_device_descr current_device;
3854 size_t prefix_len, suffix_len, cur_len;
3856 next = strchr (cur, ',');
3858 prefix_len = strlen (prefix);
3859 cur_len = next ? next - cur : strlen (cur);
3860 suffix_len = strlen (suffix);
3862 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3863 if (!plugin_name)
3865 num_devs = 0;
3866 break;
3869 memcpy (plugin_name, prefix, prefix_len);
3870 memcpy (plugin_name + prefix_len, cur, cur_len);
3871 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3873 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3875 new_num_devs = current_device.get_num_devices_func ();
3876 if (new_num_devs >= 1)
3878 /* Augment DEVICES and NUM_DEVICES. */
3880 devs = realloc (devs, (num_devs + new_num_devs)
3881 * sizeof (struct gomp_device_descr));
3882 if (!devs)
3884 num_devs = 0;
3885 free (plugin_name);
3886 break;
3889 current_device.name = current_device.get_name_func ();
3890 /* current_device.capabilities has already been set. */
3891 current_device.type = current_device.get_type_func ();
3892 current_device.mem_map.root = NULL;
3893 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3894 for (i = 0; i < new_num_devs; i++)
3896 current_device.target_id = i;
3897 devs[num_devs] = current_device;
3898 gomp_mutex_init (&devs[num_devs].lock);
3899 num_devs++;
3904 free (plugin_name);
3905 cur = next + 1;
3907 while (next);
3909 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3910 NUM_DEVICES_OPENMP. */
3911 struct gomp_device_descr *devs_s
3912 = malloc (num_devs * sizeof (struct gomp_device_descr));
3913 if (!devs_s)
3915 num_devs = 0;
3916 free (devs);
3917 devs = NULL;
3919 num_devs_openmp = 0;
3920 for (i = 0; i < num_devs; i++)
3921 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3922 devs_s[num_devs_openmp++] = devs[i];
3923 int num_devs_after_openmp = num_devs_openmp;
3924 for (i = 0; i < num_devs; i++)
3925 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3926 devs_s[num_devs_after_openmp++] = devs[i];
3927 free (devs);
3928 devs = devs_s;
3930 for (i = 0; i < num_devs; i++)
3932 /* The 'devices' array can be moved (by the realloc call) until we have
3933 found all the plugins, so registering with the OpenACC runtime (which
3934 takes a copy of the pointer argument) must be delayed until now. */
3935 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3936 goacc_register (&devs[i]);
3939 num_devices = num_devs;
3940 num_devices_openmp = num_devs_openmp;
3941 devices = devs;
3942 if (atexit (gomp_target_fini) != 0)
3943 gomp_fatal ("atexit failed");
3946 #else /* PLUGIN_SUPPORT */
3947 /* If dlfcn.h is unavailable we always fallback to host execution.
3948 GOMP_target* routines are just stubs for this case. */
3949 static void
3950 gomp_target_init (void)
3953 #endif /* PLUGIN_SUPPORT */