Daily bump.
[official-gcc.git] / libgomp / target.c
blob67fcf41cc2e6e68116f8db0093d9c0df7de63e8d
1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 typedef uintptr_t *hash_entry_type;
48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
52 static inline hashval_t
53 htab_hash (hash_entry_type element)
55 return hash_pointer ((void *) element);
58 static inline bool
59 htab_eq (hash_entry_type x, hash_entry_type y)
61 return x == y;
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr {
78 unsigned version;
79 enum offload_target_type type;
80 const void *host_table;
81 const void *target_data;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr *offload_images;
87 /* Total number of offload images. */
88 static int num_offload_images;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr *devices;
93 /* Total number of available devices. */
94 static int num_devices;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
101 static void *
102 gomp_realloc_unlock (void *old, size_t size)
104 void *ret = realloc (old, size);
105 if (ret == NULL)
107 gomp_mutex_unlock (&register_lock);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
110 return ret;
113 attribute_hidden void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
119 attribute_hidden int
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp;
126 static struct gomp_device_descr *
127 resolve_device (int device_id)
129 if (device_id == GOMP_DEVICE_ICV)
131 struct gomp_task_icv *icv = gomp_icv (false);
132 device_id = icv->default_device_var;
135 if (device_id < 0 || device_id >= gomp_get_num_devices ())
137 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id != GOMP_DEVICE_HOST_FALLBACK
139 && device_id != num_devices_openmp)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
143 return NULL;
146 gomp_mutex_lock (&devices[device_id].lock);
147 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148 gomp_init_device (&devices[device_id]);
149 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
151 gomp_mutex_unlock (&devices[device_id].lock);
153 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
157 return NULL;
159 gomp_mutex_unlock (&devices[device_id].lock);
161 return &devices[device_id];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
168 if (key->host_start != key->host_end)
169 return splay_tree_lookup (mem_map, key);
171 key->host_end++;
172 splay_tree_key n = splay_tree_lookup (mem_map, key);
173 key->host_end--;
174 if (n)
175 return n;
176 key->host_start--;
177 n = splay_tree_lookup (mem_map, key);
178 key->host_start++;
179 if (n)
180 return n;
181 return splay_tree_lookup (mem_map, key);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
187 if (key->host_start != key->host_end)
188 return splay_tree_lookup (mem_map, key);
190 key->host_end++;
191 splay_tree_key n = splay_tree_lookup (mem_map, key);
192 key->host_end--;
193 return n;
196 static inline void
197 gomp_device_copy (struct gomp_device_descr *devicep,
198 bool (*copy_func) (int, void *, const void *, size_t),
199 const char *dst, void *dstaddr,
200 const char *src, const void *srcaddr,
201 size_t size)
203 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
205 gomp_mutex_unlock (&devicep->lock);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
211 static inline void
212 goacc_device_copy_async (struct gomp_device_descr *devicep,
213 bool (*copy_func) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue *),
215 const char *dst, void *dstaddr,
216 const char *src, const void *srcaddr,
217 const void *srcaddr_orig,
218 size_t size, struct goacc_asyncqueue *aq)
220 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
222 gomp_mutex_unlock (&devicep->lock);
223 if (srcaddr_orig && srcaddr_orig != srcaddr)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src, srcaddr_orig, srcaddr_orig + size,
228 src, srcaddr, srcaddr + size,
229 dst, dstaddr, dstaddr + size);
230 else
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src, srcaddr, srcaddr + size,
234 dst, dstaddr, dstaddr + size);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
244 size_t start, end;
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
251 void *buf;
252 struct target_mem_desc *tgt;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk *chunks;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258 be performed. */
259 long chunk_cnt;
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
263 long use_cnt;
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
282 example).
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
287 static inline void
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
290 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291 return;
292 if (cbuf->chunk_cnt)
294 if (cbuf->chunk_cnt < 0)
295 return;
296 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
298 cbuf->chunk_cnt = -1;
299 return;
301 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
303 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304 cbuf->use_cnt++;
305 return;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf->use_cnt == 1)
311 cbuf->chunk_cnt--;
313 cbuf->chunks[cbuf->chunk_cnt].start = start;
314 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315 cbuf->chunk_cnt++;
316 cbuf->use_cnt = 1;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
322 static inline bool
323 gomp_to_device_kind_p (int kind)
325 switch (kind)
327 case GOMP_MAP_ALLOC:
328 case GOMP_MAP_FROM:
329 case GOMP_MAP_FORCE_ALLOC:
330 case GOMP_MAP_FORCE_FROM:
331 case GOMP_MAP_ALWAYS_FROM:
332 return false;
333 default:
334 return true;
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
341 TRUE. */
343 attribute_hidden void
344 gomp_copy_host2dev (struct gomp_device_descr *devicep,
345 struct goacc_asyncqueue *aq,
346 void *d, const void *h, size_t sz,
347 bool ephemeral, struct gomp_coalesce_buf *cbuf)
349 if (__builtin_expect (aq != NULL, 0))
351 /* See 'gomp_coalesce_buf_add'. */
352 assert (!cbuf);
354 void *h_buf = (void *) h;
355 if (ephemeral)
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf = gomp_malloc (sz);
362 memcpy (h_buf, h, sz);
364 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 "dev", d, "host", h_buf, h, sz, aq);
366 if (ephemeral)
367 /* Free temporary buffer once the transfer has completed. */
368 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
370 return;
373 if (cbuf)
375 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
378 long first = 0;
379 long last = cbuf->chunk_cnt - 1;
380 while (first <= last)
382 long middle = (first + last) >> 1;
383 if (cbuf->chunks[middle].end <= doff)
384 first = middle + 1;
385 else if (cbuf->chunks[middle].start <= doff)
387 if (doff + sz > cbuf->chunks[middle].end)
388 gomp_fatal ("internal libgomp cbuf error");
389 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
390 h, sz);
391 return;
393 else
394 last = middle - 1;
399 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
402 attribute_hidden void
403 gomp_copy_dev2host (struct gomp_device_descr *devicep,
404 struct goacc_asyncqueue *aq,
405 void *h, const void *d, size_t sz)
407 if (__builtin_expect (aq != NULL, 0))
408 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
409 "host", h, "dev", d, NULL, sz, aq);
410 else
411 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
414 static void
415 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
417 if (!devicep->free_func (devicep->target_id, devptr))
419 gomp_mutex_unlock (&devicep->lock);
420 gomp_fatal ("error in freeing device memory block at %p", devptr);
424 /* Increment reference count of a splay_tree_key region K by 1.
425 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
426 increment the value if refcount is not yet contained in the set (used for
427 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
428 once for each construct). */
430 static inline void
431 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
433 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
434 return;
436 uintptr_t *refcount_ptr = &k->refcount;
438 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
439 refcount_ptr = &k->structelem_refcount;
440 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
441 refcount_ptr = k->structelem_refcount_ptr;
443 if (refcount_set)
445 if (htab_find (*refcount_set, refcount_ptr))
446 return;
447 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
448 *slot = refcount_ptr;
451 *refcount_ptr += 1;
452 return;
455 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
456 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
457 track already seen refcounts, and only adjust the value if refcount is not
458 yet contained in the set (like gomp_increment_refcount).
460 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
461 it is already zero and we know we decremented it earlier. This signals that
462 associated maps should be copied back to host.
464 *DO_REMOVE is set to true when we this is the first handling of this refcount
465 and we are setting it to zero. This signals a removal of this key from the
466 splay-tree map.
468 Copy and removal are separated due to cases like handling of structure
469 elements, e.g. each map of a structure element representing a possible copy
470 out of a structure field has to be handled individually, but we only signal
471 removal for one (the first encountered) sibing map. */
473 static inline void
474 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
475 bool *do_copy, bool *do_remove)
477 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
479 *do_copy = *do_remove = false;
480 return;
483 uintptr_t *refcount_ptr = &k->refcount;
485 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
486 refcount_ptr = &k->structelem_refcount;
487 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
488 refcount_ptr = k->structelem_refcount_ptr;
490 bool new_encountered_refcount;
491 bool set_to_zero = false;
492 bool is_zero = false;
494 uintptr_t orig_refcount = *refcount_ptr;
496 if (refcount_set)
498 if (htab_find (*refcount_set, refcount_ptr))
500 new_encountered_refcount = false;
501 goto end;
504 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
505 *slot = refcount_ptr;
506 new_encountered_refcount = true;
508 else
509 /* If no refcount_set being used, assume all keys are being decremented
510 for the first time. */
511 new_encountered_refcount = true;
513 if (delete_p)
514 *refcount_ptr = 0;
515 else if (*refcount_ptr > 0)
516 *refcount_ptr -= 1;
518 end:
519 if (*refcount_ptr == 0)
521 if (orig_refcount > 0)
522 set_to_zero = true;
524 is_zero = true;
527 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
528 *do_remove = (new_encountered_refcount && set_to_zero);
531 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
532 gomp_map_0len_lookup found oldn for newn.
533 Helper function of gomp_map_vars. */
535 static inline void
536 gomp_map_vars_existing (struct gomp_device_descr *devicep,
537 struct goacc_asyncqueue *aq, splay_tree_key oldn,
538 splay_tree_key newn, struct target_var_desc *tgt_var,
539 unsigned char kind, bool always_to_flag,
540 struct gomp_coalesce_buf *cbuf,
541 htab_t *refcount_set)
543 assert (kind != GOMP_MAP_ATTACH);
545 tgt_var->key = oldn;
546 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
547 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
548 tgt_var->is_attach = false;
549 tgt_var->offset = newn->host_start - oldn->host_start;
550 tgt_var->length = newn->host_end - newn->host_start;
552 if ((kind & GOMP_MAP_FLAG_FORCE)
553 || oldn->host_start > newn->host_start
554 || oldn->host_end < newn->host_end)
556 gomp_mutex_unlock (&devicep->lock);
557 gomp_fatal ("Trying to map into device [%p..%p) object when "
558 "[%p..%p) is already mapped",
559 (void *) newn->host_start, (void *) newn->host_end,
560 (void *) oldn->host_start, (void *) oldn->host_end);
563 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
564 gomp_copy_host2dev (devicep, aq,
565 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
566 + newn->host_start - oldn->host_start),
567 (void *) newn->host_start,
568 newn->host_end - newn->host_start, false, cbuf);
570 gomp_increment_refcount (oldn, refcount_set);
573 static int
574 get_kind (bool short_mapkind, void *kinds, int idx)
576 return short_mapkind ? ((unsigned short *) kinds)[idx]
577 : ((unsigned char *) kinds)[idx];
580 static void
581 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
582 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
583 struct gomp_coalesce_buf *cbuf)
585 struct gomp_device_descr *devicep = tgt->device_descr;
586 struct splay_tree_s *mem_map = &devicep->mem_map;
587 struct splay_tree_key_s cur_node;
589 cur_node.host_start = host_ptr;
590 if (cur_node.host_start == (uintptr_t) NULL)
592 cur_node.tgt_offset = (uintptr_t) NULL;
593 gomp_copy_host2dev (devicep, aq,
594 (void *) (tgt->tgt_start + target_offset),
595 (void *) &cur_node.tgt_offset, sizeof (void *),
596 true, cbuf);
597 return;
599 /* Add bias to the pointer value. */
600 cur_node.host_start += bias;
601 cur_node.host_end = cur_node.host_start;
602 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
603 if (n == NULL)
605 gomp_mutex_unlock (&devicep->lock);
606 gomp_fatal ("Pointer target of array section wasn't mapped");
608 cur_node.host_start -= n->host_start;
609 cur_node.tgt_offset
610 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
611 /* At this point tgt_offset is target address of the
612 array section. Now subtract bias to get what we want
613 to initialize the pointer with. */
614 cur_node.tgt_offset -= bias;
615 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
616 (void *) &cur_node.tgt_offset, sizeof (void *),
617 true, cbuf);
620 static void
621 gomp_map_fields_existing (struct target_mem_desc *tgt,
622 struct goacc_asyncqueue *aq, splay_tree_key n,
623 size_t first, size_t i, void **hostaddrs,
624 size_t *sizes, void *kinds,
625 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
627 struct gomp_device_descr *devicep = tgt->device_descr;
628 struct splay_tree_s *mem_map = &devicep->mem_map;
629 struct splay_tree_key_s cur_node;
630 int kind;
631 const bool short_mapkind = true;
632 const int typemask = short_mapkind ? 0xff : 0x7;
634 cur_node.host_start = (uintptr_t) hostaddrs[i];
635 cur_node.host_end = cur_node.host_start + sizes[i];
636 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
637 kind = get_kind (short_mapkind, kinds, i);
638 if (n2
639 && n2->tgt == n->tgt
640 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
642 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
643 kind & typemask, false, cbuf, refcount_set);
644 return;
646 if (sizes[i] == 0)
648 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
650 cur_node.host_start--;
651 n2 = splay_tree_lookup (mem_map, &cur_node);
652 cur_node.host_start++;
653 if (n2
654 && n2->tgt == n->tgt
655 && n2->host_start - n->host_start
656 == n2->tgt_offset - n->tgt_offset)
658 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
659 kind & typemask, false, cbuf, refcount_set);
660 return;
663 cur_node.host_end++;
664 n2 = splay_tree_lookup (mem_map, &cur_node);
665 cur_node.host_end--;
666 if (n2
667 && n2->tgt == n->tgt
668 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
670 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
671 kind & typemask, false, cbuf, refcount_set);
672 return;
675 gomp_mutex_unlock (&devicep->lock);
676 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
677 "other mapped elements from the same structure weren't mapped "
678 "together with it", (void *) cur_node.host_start,
679 (void *) cur_node.host_end);
682 attribute_hidden void
683 gomp_attach_pointer (struct gomp_device_descr *devicep,
684 struct goacc_asyncqueue *aq, splay_tree mem_map,
685 splay_tree_key n, uintptr_t attach_to, size_t bias,
686 struct gomp_coalesce_buf *cbufp)
688 struct splay_tree_key_s s;
689 size_t size, idx;
691 if (n == NULL)
693 gomp_mutex_unlock (&devicep->lock);
694 gomp_fatal ("enclosing struct not mapped for attach");
697 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
698 /* We might have a pointer in a packed struct: however we cannot have more
699 than one such pointer in each pointer-sized portion of the struct, so
700 this is safe. */
701 idx = (attach_to - n->host_start) / sizeof (void *);
703 if (!n->aux)
704 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
706 if (!n->aux->attach_count)
707 n->aux->attach_count
708 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
710 if (n->aux->attach_count[idx] < UINTPTR_MAX)
711 n->aux->attach_count[idx]++;
712 else
714 gomp_mutex_unlock (&devicep->lock);
715 gomp_fatal ("attach count overflow");
718 if (n->aux->attach_count[idx] == 1)
720 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
721 - n->host_start;
722 uintptr_t target = (uintptr_t) *(void **) attach_to;
723 splay_tree_key tn;
724 uintptr_t data;
726 if ((void *) target == NULL)
728 gomp_mutex_unlock (&devicep->lock);
729 gomp_fatal ("attempt to attach null pointer");
732 s.host_start = target + bias;
733 s.host_end = s.host_start + 1;
734 tn = splay_tree_lookup (mem_map, &s);
736 if (!tn)
738 gomp_mutex_unlock (&devicep->lock);
739 gomp_fatal ("pointer target not mapped for attach");
742 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
744 gomp_debug (1,
745 "%s: attaching host %p, target %p (struct base %p) to %p\n",
746 __FUNCTION__, (void *) attach_to, (void *) devptr,
747 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
749 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
750 sizeof (void *), true, cbufp);
752 else
753 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
754 (void *) attach_to, (int) n->aux->attach_count[idx]);
757 attribute_hidden void
758 gomp_detach_pointer (struct gomp_device_descr *devicep,
759 struct goacc_asyncqueue *aq, splay_tree_key n,
760 uintptr_t detach_from, bool finalize,
761 struct gomp_coalesce_buf *cbufp)
763 size_t idx;
765 if (n == NULL)
767 gomp_mutex_unlock (&devicep->lock);
768 gomp_fatal ("enclosing struct not mapped for detach");
771 idx = (detach_from - n->host_start) / sizeof (void *);
773 if (!n->aux || !n->aux->attach_count)
775 gomp_mutex_unlock (&devicep->lock);
776 gomp_fatal ("no attachment counters for struct");
779 if (finalize)
780 n->aux->attach_count[idx] = 1;
782 if (n->aux->attach_count[idx] == 0)
784 gomp_mutex_unlock (&devicep->lock);
785 gomp_fatal ("attach count underflow");
787 else
788 n->aux->attach_count[idx]--;
790 if (n->aux->attach_count[idx] == 0)
792 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
793 - n->host_start;
794 uintptr_t target = (uintptr_t) *(void **) detach_from;
796 gomp_debug (1,
797 "%s: detaching host %p, target %p (struct base %p) to %p\n",
798 __FUNCTION__, (void *) detach_from, (void *) devptr,
799 (void *) (n->tgt->tgt_start + n->tgt_offset),
800 (void *) target);
802 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
803 sizeof (void *), true, cbufp);
805 else
806 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
807 (void *) detach_from, (int) n->aux->attach_count[idx]);
810 attribute_hidden uintptr_t
811 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
813 if (tgt->list[i].key != NULL)
814 return tgt->list[i].key->tgt->tgt_start
815 + tgt->list[i].key->tgt_offset
816 + tgt->list[i].offset;
818 switch (tgt->list[i].offset)
820 case OFFSET_INLINED:
821 return (uintptr_t) hostaddrs[i];
823 case OFFSET_POINTER:
824 return 0;
826 case OFFSET_STRUCT:
827 return tgt->list[i + 1].key->tgt->tgt_start
828 + tgt->list[i + 1].key->tgt_offset
829 + tgt->list[i + 1].offset
830 + (uintptr_t) hostaddrs[i]
831 - (uintptr_t) hostaddrs[i + 1];
833 default:
834 return tgt->tgt_start + tgt->list[i].offset;
838 static inline __attribute__((always_inline)) struct target_mem_desc *
839 gomp_map_vars_internal (struct gomp_device_descr *devicep,
840 struct goacc_asyncqueue *aq, size_t mapnum,
841 void **hostaddrs, void **devaddrs, size_t *sizes,
842 void *kinds, bool short_mapkind,
843 htab_t *refcount_set,
844 enum gomp_map_vars_kind pragma_kind)
846 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
847 bool has_firstprivate = false;
848 bool has_always_ptrset = false;
849 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
850 const int rshift = short_mapkind ? 8 : 3;
851 const int typemask = short_mapkind ? 0xff : 0x7;
852 struct splay_tree_s *mem_map = &devicep->mem_map;
853 struct splay_tree_key_s cur_node;
854 struct target_mem_desc *tgt
855 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
856 tgt->list_count = mapnum;
857 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
858 tgt->device_descr = devicep;
859 tgt->prev = NULL;
860 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
862 if (mapnum == 0)
864 tgt->tgt_start = 0;
865 tgt->tgt_end = 0;
866 return tgt;
869 tgt_align = sizeof (void *);
870 tgt_size = 0;
871 cbuf.chunks = NULL;
872 cbuf.chunk_cnt = -1;
873 cbuf.use_cnt = 0;
874 cbuf.buf = NULL;
875 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
877 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
878 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
879 cbuf.chunk_cnt = 0;
881 if (pragma_kind == GOMP_MAP_VARS_TARGET)
883 size_t align = 4 * sizeof (void *);
884 tgt_align = align;
885 tgt_size = mapnum * sizeof (void *);
886 cbuf.chunk_cnt = 1;
887 cbuf.use_cnt = 1 + (mapnum > 1);
888 cbuf.chunks[0].start = 0;
889 cbuf.chunks[0].end = tgt_size;
892 gomp_mutex_lock (&devicep->lock);
893 if (devicep->state == GOMP_DEVICE_FINALIZED)
895 gomp_mutex_unlock (&devicep->lock);
896 free (tgt);
897 return NULL;
900 for (i = 0; i < mapnum; i++)
902 int kind = get_kind (short_mapkind, kinds, i);
903 if (hostaddrs[i] == NULL
904 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
906 tgt->list[i].key = NULL;
907 tgt->list[i].offset = OFFSET_INLINED;
908 continue;
910 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
911 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
913 tgt->list[i].key = NULL;
914 if (!not_found_cnt)
916 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
917 on a separate construct prior to using use_device_{addr,ptr}.
918 In OpenMP 5.0, map directives need to be ordered by the
919 middle-end before the use_device_* clauses. If
920 !not_found_cnt, all mappings requested (if any) are already
921 mapped, so use_device_{addr,ptr} can be resolved right away.
922 Otherwise, if not_found_cnt, gomp_map_lookup might fail
923 now but would succeed after performing the mappings in the
924 following loop. We can't defer this always to the second
925 loop, because it is not even invoked when !not_found_cnt
926 after the first loop. */
927 cur_node.host_start = (uintptr_t) hostaddrs[i];
928 cur_node.host_end = cur_node.host_start;
929 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
930 if (n != NULL)
932 cur_node.host_start -= n->host_start;
933 hostaddrs[i]
934 = (void *) (n->tgt->tgt_start + n->tgt_offset
935 + cur_node.host_start);
937 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
939 gomp_mutex_unlock (&devicep->lock);
940 gomp_fatal ("use_device_ptr pointer wasn't mapped");
942 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
943 /* If not present, continue using the host address. */
945 else
946 __builtin_unreachable ();
947 tgt->list[i].offset = OFFSET_INLINED;
949 else
950 tgt->list[i].offset = 0;
951 continue;
953 else if ((kind & typemask) == GOMP_MAP_STRUCT)
955 size_t first = i + 1;
956 size_t last = i + sizes[i];
957 cur_node.host_start = (uintptr_t) hostaddrs[i];
958 cur_node.host_end = (uintptr_t) hostaddrs[last]
959 + sizes[last];
960 tgt->list[i].key = NULL;
961 tgt->list[i].offset = OFFSET_STRUCT;
962 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
963 if (n == NULL)
965 size_t align = (size_t) 1 << (kind >> rshift);
966 if (tgt_align < align)
967 tgt_align = align;
968 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
969 tgt_size = (tgt_size + align - 1) & ~(align - 1);
970 tgt_size += cur_node.host_end - cur_node.host_start;
971 not_found_cnt += last - i;
972 for (i = first; i <= last; i++)
974 tgt->list[i].key = NULL;
975 if (!aq
976 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
977 & typemask))
978 gomp_coalesce_buf_add (&cbuf,
979 tgt_size - cur_node.host_end
980 + (uintptr_t) hostaddrs[i],
981 sizes[i]);
983 i--;
984 continue;
986 for (i = first; i <= last; i++)
987 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
988 sizes, kinds, NULL, refcount_set);
989 i--;
990 continue;
992 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
994 tgt->list[i].key = NULL;
995 tgt->list[i].offset = OFFSET_POINTER;
996 has_firstprivate = true;
997 continue;
999 else if ((kind & typemask) == GOMP_MAP_ATTACH)
1001 tgt->list[i].key = NULL;
1002 has_firstprivate = true;
1003 continue;
1005 cur_node.host_start = (uintptr_t) hostaddrs[i];
1006 if (!GOMP_MAP_POINTER_P (kind & typemask))
1007 cur_node.host_end = cur_node.host_start + sizes[i];
1008 else
1009 cur_node.host_end = cur_node.host_start + sizeof (void *);
1010 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1012 tgt->list[i].key = NULL;
1014 size_t align = (size_t) 1 << (kind >> rshift);
1015 if (tgt_align < align)
1016 tgt_align = align;
1017 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1018 if (!aq)
1019 gomp_coalesce_buf_add (&cbuf, tgt_size,
1020 cur_node.host_end - cur_node.host_start);
1021 tgt_size += cur_node.host_end - cur_node.host_start;
1022 has_firstprivate = true;
1023 continue;
1025 splay_tree_key n;
1026 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1028 n = gomp_map_0len_lookup (mem_map, &cur_node);
1029 if (!n)
1031 tgt->list[i].key = NULL;
1032 tgt->list[i].offset = OFFSET_POINTER;
1033 continue;
1036 else
1037 n = splay_tree_lookup (mem_map, &cur_node);
1038 if (n && n->refcount != REFCOUNT_LINK)
1040 int always_to_cnt = 0;
1041 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1043 bool has_nullptr = false;
1044 size_t j;
1045 for (j = 0; j < n->tgt->list_count; j++)
1046 if (n->tgt->list[j].key == n)
1048 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1049 break;
1051 if (n->tgt->list_count == 0)
1053 /* 'declare target'; assume has_nullptr; it could also be
1054 statically assigned pointer, but that it should be to
1055 the equivalent variable on the host. */
1056 assert (n->refcount == REFCOUNT_INFINITY);
1057 has_nullptr = true;
1059 else
1060 assert (j < n->tgt->list_count);
1061 /* Re-map the data if there is an 'always' modifier or if it a
1062 null pointer was there and non a nonnull has been found; that
1063 permits transparent re-mapping for Fortran array descriptors
1064 which were previously mapped unallocated. */
1065 for (j = i + 1; j < mapnum; j++)
1067 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1068 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1069 && (!has_nullptr
1070 || !GOMP_MAP_POINTER_P (ptr_kind)
1071 || *(void **) hostaddrs[j] == NULL))
1072 break;
1073 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1074 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1075 > cur_node.host_end))
1076 break;
1077 else
1079 has_always_ptrset = true;
1080 ++always_to_cnt;
1084 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1085 kind & typemask, always_to_cnt > 0, NULL,
1086 refcount_set);
1087 i += always_to_cnt;
1089 else
1091 tgt->list[i].key = NULL;
1093 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1095 /* Not present, hence, skip entry - including its MAP_POINTER,
1096 when existing. */
1097 tgt->list[i].offset = OFFSET_POINTER;
1098 if (i + 1 < mapnum
1099 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1100 == GOMP_MAP_POINTER))
1102 ++i;
1103 tgt->list[i].key = NULL;
1104 tgt->list[i].offset = 0;
1106 continue;
1108 size_t align = (size_t) 1 << (kind >> rshift);
1109 not_found_cnt++;
1110 if (tgt_align < align)
1111 tgt_align = align;
1112 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1113 if (!aq
1114 && gomp_to_device_kind_p (kind & typemask))
1115 gomp_coalesce_buf_add (&cbuf, tgt_size,
1116 cur_node.host_end - cur_node.host_start);
1117 tgt_size += cur_node.host_end - cur_node.host_start;
1118 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1120 size_t j;
1121 int kind;
1122 for (j = i + 1; j < mapnum; j++)
1123 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1124 kinds, j)) & typemask))
1125 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1126 break;
1127 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1128 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1129 > cur_node.host_end))
1130 break;
1131 else
1133 tgt->list[j].key = NULL;
1134 i++;
1140 if (devaddrs)
1142 if (mapnum != 1)
1144 gomp_mutex_unlock (&devicep->lock);
1145 gomp_fatal ("unexpected aggregation");
1147 tgt->to_free = devaddrs[0];
1148 tgt->tgt_start = (uintptr_t) tgt->to_free;
1149 tgt->tgt_end = tgt->tgt_start + sizes[0];
1151 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1153 /* Allocate tgt_align aligned tgt_size block of memory. */
1154 /* FIXME: Perhaps change interface to allocate properly aligned
1155 memory. */
1156 tgt->to_free = devicep->alloc_func (devicep->target_id,
1157 tgt_size + tgt_align - 1);
1158 if (!tgt->to_free)
1160 gomp_mutex_unlock (&devicep->lock);
1161 gomp_fatal ("device memory allocation fail");
1164 tgt->tgt_start = (uintptr_t) tgt->to_free;
1165 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1166 tgt->tgt_end = tgt->tgt_start + tgt_size;
1168 if (cbuf.use_cnt == 1)
1169 cbuf.chunk_cnt--;
1170 if (cbuf.chunk_cnt > 0)
1172 cbuf.buf
1173 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1174 if (cbuf.buf)
1176 cbuf.tgt = tgt;
1177 cbufp = &cbuf;
1181 else
1183 tgt->to_free = NULL;
1184 tgt->tgt_start = 0;
1185 tgt->tgt_end = 0;
1188 tgt_size = 0;
1189 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1190 tgt_size = mapnum * sizeof (void *);
1192 tgt->array = NULL;
1193 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1195 if (not_found_cnt)
1196 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1197 splay_tree_node array = tgt->array;
1198 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1199 uintptr_t field_tgt_base = 0;
1200 splay_tree_key field_tgt_structelem_first = NULL;
1202 for (i = 0; i < mapnum; i++)
1203 if (has_always_ptrset
1204 && tgt->list[i].key
1205 && (get_kind (short_mapkind, kinds, i) & typemask)
1206 == GOMP_MAP_TO_PSET)
1208 splay_tree_key k = tgt->list[i].key;
1209 bool has_nullptr = false;
1210 size_t j;
1211 for (j = 0; j < k->tgt->list_count; j++)
1212 if (k->tgt->list[j].key == k)
1214 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1215 break;
1217 if (k->tgt->list_count == 0)
1218 has_nullptr = true;
1219 else
1220 assert (j < k->tgt->list_count);
1222 tgt->list[i].has_null_ptr_assoc = false;
1223 for (j = i + 1; j < mapnum; j++)
1225 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1226 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1227 && (!has_nullptr
1228 || !GOMP_MAP_POINTER_P (ptr_kind)
1229 || *(void **) hostaddrs[j] == NULL))
1230 break;
1231 else if ((uintptr_t) hostaddrs[j] < k->host_start
1232 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1233 > k->host_end))
1234 break;
1235 else
1237 if (*(void **) hostaddrs[j] == NULL)
1238 tgt->list[i].has_null_ptr_assoc = true;
1239 tgt->list[j].key = k;
1240 tgt->list[j].copy_from = false;
1241 tgt->list[j].always_copy_from = false;
1242 tgt->list[j].is_attach = false;
1243 gomp_increment_refcount (k, refcount_set);
1244 gomp_map_pointer (k->tgt, aq,
1245 (uintptr_t) *(void **) hostaddrs[j],
1246 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1247 - k->host_start),
1248 sizes[j], cbufp);
1251 i = j - 1;
1253 else if (tgt->list[i].key == NULL)
1255 int kind = get_kind (short_mapkind, kinds, i);
1256 if (hostaddrs[i] == NULL)
1257 continue;
1258 switch (kind & typemask)
1260 size_t align, len, first, last;
1261 splay_tree_key n;
1262 case GOMP_MAP_FIRSTPRIVATE:
1263 align = (size_t) 1 << (kind >> rshift);
1264 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1265 tgt->list[i].offset = tgt_size;
1266 len = sizes[i];
1267 gomp_copy_host2dev (devicep, aq,
1268 (void *) (tgt->tgt_start + tgt_size),
1269 (void *) hostaddrs[i], len, false, cbufp);
1270 tgt_size += len;
1271 continue;
1272 case GOMP_MAP_FIRSTPRIVATE_INT:
1273 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1274 continue;
1275 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1276 /* The OpenACC 'host_data' construct only allows 'use_device'
1277 "mapping" clauses, so in the first loop, 'not_found_cnt'
1278 must always have been zero, so all OpenACC 'use_device'
1279 clauses have already been handled. (We can only easily test
1280 'use_device' with 'if_present' clause here.) */
1281 assert (tgt->list[i].offset == OFFSET_INLINED);
1282 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1283 code conceptually simple, similar to the first loop. */
1284 case GOMP_MAP_USE_DEVICE_PTR:
1285 if (tgt->list[i].offset == 0)
1287 cur_node.host_start = (uintptr_t) hostaddrs[i];
1288 cur_node.host_end = cur_node.host_start;
1289 n = gomp_map_lookup (mem_map, &cur_node);
1290 if (n != NULL)
1292 cur_node.host_start -= n->host_start;
1293 hostaddrs[i]
1294 = (void *) (n->tgt->tgt_start + n->tgt_offset
1295 + cur_node.host_start);
1297 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1299 gomp_mutex_unlock (&devicep->lock);
1300 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1302 else if ((kind & typemask)
1303 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1304 /* If not present, continue using the host address. */
1306 else
1307 __builtin_unreachable ();
1308 tgt->list[i].offset = OFFSET_INLINED;
1310 continue;
1311 case GOMP_MAP_STRUCT:
1312 first = i + 1;
1313 last = i + sizes[i];
1314 cur_node.host_start = (uintptr_t) hostaddrs[i];
1315 cur_node.host_end = (uintptr_t) hostaddrs[last]
1316 + sizes[last];
1317 if (tgt->list[first].key != NULL)
1318 continue;
1319 n = splay_tree_lookup (mem_map, &cur_node);
1320 if (n == NULL)
1322 size_t align = (size_t) 1 << (kind >> rshift);
1323 tgt_size -= (uintptr_t) hostaddrs[first]
1324 - (uintptr_t) hostaddrs[i];
1325 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1326 tgt_size += (uintptr_t) hostaddrs[first]
1327 - (uintptr_t) hostaddrs[i];
1328 field_tgt_base = (uintptr_t) hostaddrs[first];
1329 field_tgt_offset = tgt_size;
1330 field_tgt_clear = last;
1331 field_tgt_structelem_first = NULL;
1332 tgt_size += cur_node.host_end
1333 - (uintptr_t) hostaddrs[first];
1334 continue;
1336 for (i = first; i <= last; i++)
1337 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1338 sizes, kinds, cbufp, refcount_set);
1339 i--;
1340 continue;
1341 case GOMP_MAP_ALWAYS_POINTER:
1342 cur_node.host_start = (uintptr_t) hostaddrs[i];
1343 cur_node.host_end = cur_node.host_start + sizeof (void *);
1344 n = splay_tree_lookup (mem_map, &cur_node);
1345 if (n == NULL
1346 || n->host_start > cur_node.host_start
1347 || n->host_end < cur_node.host_end)
1349 gomp_mutex_unlock (&devicep->lock);
1350 gomp_fatal ("always pointer not mapped");
1352 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1353 != GOMP_MAP_ALWAYS_POINTER)
1354 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1355 if (cur_node.tgt_offset)
1356 cur_node.tgt_offset -= sizes[i];
1357 gomp_copy_host2dev (devicep, aq,
1358 (void *) (n->tgt->tgt_start
1359 + n->tgt_offset
1360 + cur_node.host_start
1361 - n->host_start),
1362 (void *) &cur_node.tgt_offset,
1363 sizeof (void *), true, cbufp);
1364 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1365 + cur_node.host_start - n->host_start;
1366 continue;
1367 case GOMP_MAP_IF_PRESENT:
1368 /* Not present - otherwise handled above. Skip over its
1369 MAP_POINTER as well. */
1370 if (i + 1 < mapnum
1371 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1372 == GOMP_MAP_POINTER))
1373 ++i;
1374 continue;
1375 case GOMP_MAP_ATTACH:
1377 cur_node.host_start = (uintptr_t) hostaddrs[i];
1378 cur_node.host_end = cur_node.host_start + sizeof (void *);
1379 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1380 if (n != NULL)
1382 tgt->list[i].key = n;
1383 tgt->list[i].offset = cur_node.host_start - n->host_start;
1384 tgt->list[i].length = n->host_end - n->host_start;
1385 tgt->list[i].copy_from = false;
1386 tgt->list[i].always_copy_from = false;
1387 tgt->list[i].is_attach = true;
1388 /* OpenACC 'attach'/'detach' doesn't affect
1389 structured/dynamic reference counts ('n->refcount',
1390 'n->dynamic_refcount'). */
1392 gomp_attach_pointer (devicep, aq, mem_map, n,
1393 (uintptr_t) hostaddrs[i], sizes[i],
1394 cbufp);
1396 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1398 gomp_mutex_unlock (&devicep->lock);
1399 gomp_fatal ("outer struct not mapped for attach");
1401 continue;
1403 default:
1404 break;
1406 splay_tree_key k = &array->key;
1407 k->host_start = (uintptr_t) hostaddrs[i];
1408 if (!GOMP_MAP_POINTER_P (kind & typemask))
1409 k->host_end = k->host_start + sizes[i];
1410 else
1411 k->host_end = k->host_start + sizeof (void *);
1412 splay_tree_key n = splay_tree_lookup (mem_map, k);
1413 if (n && n->refcount != REFCOUNT_LINK)
1414 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1415 kind & typemask, false, cbufp,
1416 refcount_set);
1417 else
1419 k->aux = NULL;
1420 if (n && n->refcount == REFCOUNT_LINK)
1422 /* Replace target address of the pointer with target address
1423 of mapped object in the splay tree. */
1424 splay_tree_remove (mem_map, n);
1425 k->aux
1426 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1427 k->aux->link_key = n;
1429 size_t align = (size_t) 1 << (kind >> rshift);
1430 tgt->list[i].key = k;
1431 k->tgt = tgt;
1432 k->refcount = 0;
1433 k->dynamic_refcount = 0;
1434 if (field_tgt_clear != FIELD_TGT_EMPTY)
1436 k->tgt_offset = k->host_start - field_tgt_base
1437 + field_tgt_offset;
1438 if (openmp_p)
1440 k->refcount = REFCOUNT_STRUCTELEM;
1441 if (field_tgt_structelem_first == NULL)
1443 /* Set to first structure element of sequence. */
1444 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1445 field_tgt_structelem_first = k;
1447 else
1448 /* Point to refcount of leading element, but do not
1449 increment again. */
1450 k->structelem_refcount_ptr
1451 = &field_tgt_structelem_first->structelem_refcount;
1453 if (i == field_tgt_clear)
1455 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1456 field_tgt_structelem_first = NULL;
1459 if (i == field_tgt_clear)
1460 field_tgt_clear = FIELD_TGT_EMPTY;
1462 else
1464 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1465 k->tgt_offset = tgt_size;
1466 tgt_size += k->host_end - k->host_start;
1468 /* First increment, from 0 to 1. gomp_increment_refcount
1469 encapsulates the different increment cases, so use this
1470 instead of directly setting 1 during initialization. */
1471 gomp_increment_refcount (k, refcount_set);
1473 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1474 tgt->list[i].always_copy_from
1475 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1476 tgt->list[i].is_attach = false;
1477 tgt->list[i].offset = 0;
1478 tgt->list[i].length = k->host_end - k->host_start;
1479 tgt->refcount++;
1480 array->left = NULL;
1481 array->right = NULL;
1482 splay_tree_insert (mem_map, array);
1483 switch (kind & typemask)
1485 case GOMP_MAP_ALLOC:
1486 case GOMP_MAP_FROM:
1487 case GOMP_MAP_FORCE_ALLOC:
1488 case GOMP_MAP_FORCE_FROM:
1489 case GOMP_MAP_ALWAYS_FROM:
1490 break;
1491 case GOMP_MAP_TO:
1492 case GOMP_MAP_TOFROM:
1493 case GOMP_MAP_FORCE_TO:
1494 case GOMP_MAP_FORCE_TOFROM:
1495 case GOMP_MAP_ALWAYS_TO:
1496 case GOMP_MAP_ALWAYS_TOFROM:
1497 gomp_copy_host2dev (devicep, aq,
1498 (void *) (tgt->tgt_start
1499 + k->tgt_offset),
1500 (void *) k->host_start,
1501 k->host_end - k->host_start,
1502 false, cbufp);
1503 break;
1504 case GOMP_MAP_POINTER:
1505 gomp_map_pointer (tgt, aq,
1506 (uintptr_t) *(void **) k->host_start,
1507 k->tgt_offset, sizes[i], cbufp);
1508 break;
1509 case GOMP_MAP_TO_PSET:
1510 gomp_copy_host2dev (devicep, aq,
1511 (void *) (tgt->tgt_start
1512 + k->tgt_offset),
1513 (void *) k->host_start,
1514 k->host_end - k->host_start,
1515 false, cbufp);
1516 tgt->list[i].has_null_ptr_assoc = false;
1518 for (j = i + 1; j < mapnum; j++)
1520 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1521 & typemask);
1522 if (!GOMP_MAP_POINTER_P (ptr_kind)
1523 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1524 break;
1525 else if ((uintptr_t) hostaddrs[j] < k->host_start
1526 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1527 > k->host_end))
1528 break;
1529 else
1531 tgt->list[j].key = k;
1532 tgt->list[j].copy_from = false;
1533 tgt->list[j].always_copy_from = false;
1534 tgt->list[j].is_attach = false;
1535 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1536 /* For OpenMP, the use of refcount_sets causes
1537 errors if we set k->refcount = 1 above but also
1538 increment it again here, for decrementing will
1539 not properly match, since we decrement only once
1540 for each key's refcount. Therefore avoid this
1541 increment for OpenMP constructs. */
1542 if (!openmp_p)
1543 gomp_increment_refcount (k, refcount_set);
1544 gomp_map_pointer (tgt, aq,
1545 (uintptr_t) *(void **) hostaddrs[j],
1546 k->tgt_offset
1547 + ((uintptr_t) hostaddrs[j]
1548 - k->host_start),
1549 sizes[j], cbufp);
1552 i = j - 1;
1553 break;
1554 case GOMP_MAP_FORCE_PRESENT:
1556 /* We already looked up the memory region above and it
1557 was missing. */
1558 size_t size = k->host_end - k->host_start;
1559 gomp_mutex_unlock (&devicep->lock);
1560 #ifdef HAVE_INTTYPES_H
1561 gomp_fatal ("present clause: !acc_is_present (%p, "
1562 "%"PRIu64" (0x%"PRIx64"))",
1563 (void *) k->host_start,
1564 (uint64_t) size, (uint64_t) size);
1565 #else
1566 gomp_fatal ("present clause: !acc_is_present (%p, "
1567 "%lu (0x%lx))", (void *) k->host_start,
1568 (unsigned long) size, (unsigned long) size);
1569 #endif
1571 break;
1572 case GOMP_MAP_FORCE_DEVICEPTR:
1573 assert (k->host_end - k->host_start == sizeof (void *));
1574 gomp_copy_host2dev (devicep, aq,
1575 (void *) (tgt->tgt_start
1576 + k->tgt_offset),
1577 (void *) k->host_start,
1578 sizeof (void *), false, cbufp);
1579 break;
1580 default:
1581 gomp_mutex_unlock (&devicep->lock);
1582 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1583 kind);
1586 if (k->aux && k->aux->link_key)
1588 /* Set link pointer on target to the device address of the
1589 mapped object. */
1590 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1591 /* We intentionally do not use coalescing here, as it's not
1592 data allocated by the current call to this function. */
1593 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1594 &tgt_addr, sizeof (void *), true, NULL);
1596 array++;
1601 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1603 for (i = 0; i < mapnum; i++)
1605 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1606 gomp_copy_host2dev (devicep, aq,
1607 (void *) (tgt->tgt_start + i * sizeof (void *)),
1608 (void *) &cur_node.tgt_offset, sizeof (void *),
1609 true, cbufp);
1613 if (cbufp)
1615 /* See 'gomp_coalesce_buf_add'. */
1616 assert (!aq);
1618 long c = 0;
1619 for (c = 0; c < cbuf.chunk_cnt; ++c)
1620 gomp_copy_host2dev (devicep, aq,
1621 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1622 (char *) cbuf.buf + (cbuf.chunks[c].start
1623 - cbuf.chunks[0].start),
1624 cbuf.chunks[c].end - cbuf.chunks[c].start,
1625 true, NULL);
1626 free (cbuf.buf);
1627 cbuf.buf = NULL;
1628 cbufp = NULL;
1631 /* If the variable from "omp target enter data" map-list was already mapped,
1632 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1633 gomp_exit_data. */
1634 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1636 free (tgt);
1637 tgt = NULL;
1640 gomp_mutex_unlock (&devicep->lock);
1641 return tgt;
1644 static struct target_mem_desc *
1645 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1646 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1647 bool short_mapkind, htab_t *refcount_set,
1648 enum gomp_map_vars_kind pragma_kind)
1650 /* This management of a local refcount_set is for convenience of callers
1651 who do not share a refcount_set over multiple map/unmap uses. */
1652 htab_t local_refcount_set = NULL;
1653 if (refcount_set == NULL)
1655 local_refcount_set = htab_create (mapnum);
1656 refcount_set = &local_refcount_set;
1659 struct target_mem_desc *tgt;
1660 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1661 sizes, kinds, short_mapkind, refcount_set,
1662 pragma_kind);
1663 if (local_refcount_set)
1664 htab_free (local_refcount_set);
1666 return tgt;
1669 attribute_hidden struct target_mem_desc *
1670 goacc_map_vars (struct gomp_device_descr *devicep,
1671 struct goacc_asyncqueue *aq, size_t mapnum,
1672 void **hostaddrs, void **devaddrs, size_t *sizes,
1673 void *kinds, bool short_mapkind,
1674 enum gomp_map_vars_kind pragma_kind)
1676 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1677 sizes, kinds, short_mapkind, NULL,
1678 GOMP_MAP_VARS_OPENACC | pragma_kind);
1681 static void
1682 gomp_unmap_tgt (struct target_mem_desc *tgt)
1684 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1685 if (tgt->tgt_end)
1686 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1688 free (tgt->array);
1689 free (tgt);
1692 static bool
1693 gomp_unref_tgt (void *ptr)
1695 bool is_tgt_unmapped = false;
1697 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1699 if (tgt->refcount > 1)
1700 tgt->refcount--;
1701 else
1703 gomp_unmap_tgt (tgt);
1704 is_tgt_unmapped = true;
1707 return is_tgt_unmapped;
1710 static void
1711 gomp_unref_tgt_void (void *ptr)
1713 (void) gomp_unref_tgt (ptr);
1716 static void
1717 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1719 splay_tree_remove (sp, k);
1720 if (k->aux)
1722 if (k->aux->link_key)
1723 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1724 if (k->aux->attach_count)
1725 free (k->aux->attach_count);
1726 free (k->aux);
1727 k->aux = NULL;
1731 static inline __attribute__((always_inline)) bool
1732 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1733 struct goacc_asyncqueue *aq)
1735 bool is_tgt_unmapped = false;
1737 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1739 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1740 /* Infer the splay_tree_key of the first structelem key using the
1741 pointer to the first structleme_refcount. */
1742 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1743 - offsetof (struct splay_tree_key_s,
1744 structelem_refcount));
1745 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1747 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1748 with the splay_tree_keys embedded inside. */
1749 splay_tree_node node =
1750 (splay_tree_node) ((char *) k
1751 - offsetof (struct splay_tree_node_s, key));
1752 while (true)
1754 /* Starting from the _FIRST key, and continue for all following
1755 sibling keys. */
1756 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1757 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1758 break;
1759 else
1760 k = &(++node)->key;
1763 else
1764 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1766 if (aq)
1767 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1768 (void *) k->tgt);
1769 else
1770 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1771 return is_tgt_unmapped;
1774 attribute_hidden bool
1775 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1777 return gomp_remove_var_internal (devicep, k, NULL);
1780 /* Remove a variable asynchronously. This actually removes the variable
1781 mapping immediately, but retains the linked target_mem_desc until the
1782 asynchronous operation has completed (as it may still refer to target
1783 memory). The device lock must be held before entry, and remains locked on
1784 exit. */
1786 attribute_hidden void
1787 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1788 struct goacc_asyncqueue *aq)
1790 (void) gomp_remove_var_internal (devicep, k, aq);
1793 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1794 variables back from device to host: if it is false, it is assumed that this
1795 has been done already. */
1797 static inline __attribute__((always_inline)) void
1798 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1799 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1801 struct gomp_device_descr *devicep = tgt->device_descr;
1803 if (tgt->list_count == 0)
1805 free (tgt);
1806 return;
1809 gomp_mutex_lock (&devicep->lock);
1810 if (devicep->state == GOMP_DEVICE_FINALIZED)
1812 gomp_mutex_unlock (&devicep->lock);
1813 free (tgt->array);
1814 free (tgt);
1815 return;
1818 size_t i;
1820 /* We must perform detachments before any copies back to the host. */
1821 for (i = 0; i < tgt->list_count; i++)
1823 splay_tree_key k = tgt->list[i].key;
1825 if (k != NULL && tgt->list[i].is_attach)
1826 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1827 + tgt->list[i].offset,
1828 false, NULL);
1831 for (i = 0; i < tgt->list_count; i++)
1833 splay_tree_key k = tgt->list[i].key;
1834 if (k == NULL)
1835 continue;
1837 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1838 counts ('n->refcount', 'n->dynamic_refcount'). */
1839 if (tgt->list[i].is_attach)
1840 continue;
1842 bool do_copy, do_remove;
1843 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1845 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1846 || tgt->list[i].always_copy_from)
1847 gomp_copy_dev2host (devicep, aq,
1848 (void *) (k->host_start + tgt->list[i].offset),
1849 (void *) (k->tgt->tgt_start + k->tgt_offset
1850 + tgt->list[i].offset),
1851 tgt->list[i].length);
1852 if (do_remove)
1854 struct target_mem_desc *k_tgt = k->tgt;
1855 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1856 /* It would be bad if TGT got unmapped while we're still iterating
1857 over its LIST_COUNT, and also expect to use it in the following
1858 code. */
1859 assert (!is_tgt_unmapped
1860 || k_tgt != tgt);
1864 if (aq)
1865 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1866 (void *) tgt);
1867 else
1868 gomp_unref_tgt ((void *) tgt);
1870 gomp_mutex_unlock (&devicep->lock);
1873 static void
1874 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1875 htab_t *refcount_set)
1877 /* This management of a local refcount_set is for convenience of callers
1878 who do not share a refcount_set over multiple map/unmap uses. */
1879 htab_t local_refcount_set = NULL;
1880 if (refcount_set == NULL)
1882 local_refcount_set = htab_create (tgt->list_count);
1883 refcount_set = &local_refcount_set;
1886 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1888 if (local_refcount_set)
1889 htab_free (local_refcount_set);
1892 attribute_hidden void
1893 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1894 struct goacc_asyncqueue *aq)
1896 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1899 static void
1900 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1901 size_t *sizes, void *kinds, bool short_mapkind)
1903 size_t i;
1904 struct splay_tree_key_s cur_node;
1905 const int typemask = short_mapkind ? 0xff : 0x7;
1907 if (!devicep)
1908 return;
1910 if (mapnum == 0)
1911 return;
1913 gomp_mutex_lock (&devicep->lock);
1914 if (devicep->state == GOMP_DEVICE_FINALIZED)
1916 gomp_mutex_unlock (&devicep->lock);
1917 return;
1920 for (i = 0; i < mapnum; i++)
1921 if (sizes[i])
1923 cur_node.host_start = (uintptr_t) hostaddrs[i];
1924 cur_node.host_end = cur_node.host_start + sizes[i];
1925 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1926 if (n)
1928 int kind = get_kind (short_mapkind, kinds, i);
1929 if (n->host_start > cur_node.host_start
1930 || n->host_end < cur_node.host_end)
1932 gomp_mutex_unlock (&devicep->lock);
1933 gomp_fatal ("Trying to update [%p..%p) object when "
1934 "only [%p..%p) is mapped",
1935 (void *) cur_node.host_start,
1936 (void *) cur_node.host_end,
1937 (void *) n->host_start,
1938 (void *) n->host_end);
1942 void *hostaddr = (void *) cur_node.host_start;
1943 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1944 + cur_node.host_start - n->host_start);
1945 size_t size = cur_node.host_end - cur_node.host_start;
1947 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1948 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1949 false, NULL);
1950 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1951 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1954 gomp_mutex_unlock (&devicep->lock);
1957 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1958 And insert to splay tree the mapping between addresses from HOST_TABLE and
1959 from loaded target image. We rely in the host and device compiler
1960 emitting variable and functions in the same order. */
1962 static void
1963 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1964 const void *host_table, const void *target_data,
1965 bool is_register_lock)
1967 void **host_func_table = ((void ***) host_table)[0];
1968 void **host_funcs_end = ((void ***) host_table)[1];
1969 void **host_var_table = ((void ***) host_table)[2];
1970 void **host_vars_end = ((void ***) host_table)[3];
1972 /* The func table contains only addresses, the var table contains addresses
1973 and corresponding sizes. */
1974 int num_funcs = host_funcs_end - host_func_table;
1975 int num_vars = (host_vars_end - host_var_table) / 2;
1977 /* Others currently is only 'device_num' */
1978 int num_others = 1;
1980 /* Load image to device and get target addresses for the image. */
1981 struct addr_pair *target_table = NULL;
1982 int i, num_target_entries;
1984 num_target_entries
1985 = devicep->load_image_func (devicep->target_id, version,
1986 target_data, &target_table);
1988 if (num_target_entries != num_funcs + num_vars
1989 /* Others (device_num) are included as trailing entries in pair list. */
1990 && num_target_entries != num_funcs + num_vars + num_others)
1992 gomp_mutex_unlock (&devicep->lock);
1993 if (is_register_lock)
1994 gomp_mutex_unlock (&register_lock);
1995 gomp_fatal ("Cannot map target functions or variables"
1996 " (expected %u, have %u)", num_funcs + num_vars,
1997 num_target_entries);
2000 /* Insert host-target address mapping into splay tree. */
2001 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2002 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2003 tgt->refcount = REFCOUNT_INFINITY;
2004 tgt->tgt_start = 0;
2005 tgt->tgt_end = 0;
2006 tgt->to_free = NULL;
2007 tgt->prev = NULL;
2008 tgt->list_count = 0;
2009 tgt->device_descr = devicep;
2010 splay_tree_node array = tgt->array;
2012 for (i = 0; i < num_funcs; i++)
2014 splay_tree_key k = &array->key;
2015 k->host_start = (uintptr_t) host_func_table[i];
2016 k->host_end = k->host_start + 1;
2017 k->tgt = tgt;
2018 k->tgt_offset = target_table[i].start;
2019 k->refcount = REFCOUNT_INFINITY;
2020 k->dynamic_refcount = 0;
2021 k->aux = NULL;
2022 array->left = NULL;
2023 array->right = NULL;
2024 splay_tree_insert (&devicep->mem_map, array);
2025 array++;
2028 /* Most significant bit of the size in host and target tables marks
2029 "omp declare target link" variables. */
2030 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2031 const uintptr_t size_mask = ~link_bit;
2033 for (i = 0; i < num_vars; i++)
2035 struct addr_pair *target_var = &target_table[num_funcs + i];
2036 uintptr_t target_size = target_var->end - target_var->start;
2037 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2039 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2041 gomp_mutex_unlock (&devicep->lock);
2042 if (is_register_lock)
2043 gomp_mutex_unlock (&register_lock);
2044 gomp_fatal ("Cannot map target variables (size mismatch)");
2047 splay_tree_key k = &array->key;
2048 k->host_start = (uintptr_t) host_var_table[i * 2];
2049 k->host_end
2050 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2051 k->tgt = tgt;
2052 k->tgt_offset = target_var->start;
2053 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2054 k->dynamic_refcount = 0;
2055 k->aux = NULL;
2056 array->left = NULL;
2057 array->right = NULL;
2058 splay_tree_insert (&devicep->mem_map, array);
2059 array++;
2062 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2063 where plugin does not return this entry. */
2064 if (num_funcs + num_vars < num_target_entries)
2066 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2067 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2068 was found in this image. */
2069 if (device_num_var->start != 0)
2071 /* The index of the devicep within devices[] is regarded as its
2072 'device number', which is different from the per-device type
2073 devicep->target_id. */
2074 int device_num_val = (int) (devicep - &devices[0]);
2075 if (device_num_var->end - device_num_var->start != sizeof (int))
2077 gomp_mutex_unlock (&devicep->lock);
2078 if (is_register_lock)
2079 gomp_mutex_unlock (&register_lock);
2080 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2081 "format");
2084 /* Copy device_num value to place on device memory, hereby actually
2085 designating its device number into effect. */
2086 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2087 &device_num_val, sizeof (int), false, NULL);
2091 free (target_table);
2094 /* Unload the mappings described by target_data from device DEVICE_P.
2095 The device must be locked. */
2097 static void
2098 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2099 unsigned version,
2100 const void *host_table, const void *target_data)
2102 void **host_func_table = ((void ***) host_table)[0];
2103 void **host_funcs_end = ((void ***) host_table)[1];
2104 void **host_var_table = ((void ***) host_table)[2];
2105 void **host_vars_end = ((void ***) host_table)[3];
2107 /* The func table contains only addresses, the var table contains addresses
2108 and corresponding sizes. */
2109 int num_funcs = host_funcs_end - host_func_table;
2110 int num_vars = (host_vars_end - host_var_table) / 2;
2112 struct splay_tree_key_s k;
2113 splay_tree_key node = NULL;
2115 /* Find mapping at start of node array */
2116 if (num_funcs || num_vars)
2118 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2119 : (uintptr_t) host_var_table[0]);
2120 k.host_end = k.host_start + 1;
2121 node = splay_tree_lookup (&devicep->mem_map, &k);
2124 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2126 gomp_mutex_unlock (&devicep->lock);
2127 gomp_fatal ("image unload fail");
2130 /* Remove mappings from splay tree. */
2131 int i;
2132 for (i = 0; i < num_funcs; i++)
2134 k.host_start = (uintptr_t) host_func_table[i];
2135 k.host_end = k.host_start + 1;
2136 splay_tree_remove (&devicep->mem_map, &k);
2139 /* Most significant bit of the size in host and target tables marks
2140 "omp declare target link" variables. */
2141 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2142 const uintptr_t size_mask = ~link_bit;
2143 bool is_tgt_unmapped = false;
2145 for (i = 0; i < num_vars; i++)
2147 k.host_start = (uintptr_t) host_var_table[i * 2];
2148 k.host_end
2149 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2151 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2152 splay_tree_remove (&devicep->mem_map, &k);
2153 else
2155 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2156 is_tgt_unmapped = gomp_remove_var (devicep, n);
2160 if (node && !is_tgt_unmapped)
2162 free (node->tgt);
2163 free (node);
2167 /* This function should be called from every offload image while loading.
2168 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2169 the target, and TARGET_DATA needed by target plugin. */
2171 void
2172 GOMP_offload_register_ver (unsigned version, const void *host_table,
2173 int target_type, const void *target_data)
2175 int i;
2177 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2178 gomp_fatal ("Library too old for offload (version %u < %u)",
2179 GOMP_VERSION, GOMP_VERSION_LIB (version));
2181 gomp_mutex_lock (&register_lock);
2183 /* Load image to all initialized devices. */
2184 for (i = 0; i < num_devices; i++)
2186 struct gomp_device_descr *devicep = &devices[i];
2187 gomp_mutex_lock (&devicep->lock);
2188 if (devicep->type == target_type
2189 && devicep->state == GOMP_DEVICE_INITIALIZED)
2190 gomp_load_image_to_device (devicep, version,
2191 host_table, target_data, true);
2192 gomp_mutex_unlock (&devicep->lock);
2195 /* Insert image to array of pending images. */
2196 offload_images
2197 = gomp_realloc_unlock (offload_images,
2198 (num_offload_images + 1)
2199 * sizeof (struct offload_image_descr));
2200 offload_images[num_offload_images].version = version;
2201 offload_images[num_offload_images].type = target_type;
2202 offload_images[num_offload_images].host_table = host_table;
2203 offload_images[num_offload_images].target_data = target_data;
2205 num_offload_images++;
2206 gomp_mutex_unlock (&register_lock);
2209 void
2210 GOMP_offload_register (const void *host_table, int target_type,
2211 const void *target_data)
2213 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2216 /* This function should be called from every offload image while unloading.
2217 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2218 the target, and TARGET_DATA needed by target plugin. */
2220 void
2221 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2222 int target_type, const void *target_data)
2224 int i;
2226 gomp_mutex_lock (&register_lock);
2228 /* Unload image from all initialized devices. */
2229 for (i = 0; i < num_devices; i++)
2231 struct gomp_device_descr *devicep = &devices[i];
2232 gomp_mutex_lock (&devicep->lock);
2233 if (devicep->type == target_type
2234 && devicep->state == GOMP_DEVICE_INITIALIZED)
2235 gomp_unload_image_from_device (devicep, version,
2236 host_table, target_data);
2237 gomp_mutex_unlock (&devicep->lock);
2240 /* Remove image from array of pending images. */
2241 for (i = 0; i < num_offload_images; i++)
2242 if (offload_images[i].target_data == target_data)
2244 offload_images[i] = offload_images[--num_offload_images];
2245 break;
2248 gomp_mutex_unlock (&register_lock);
2251 void
2252 GOMP_offload_unregister (const void *host_table, int target_type,
2253 const void *target_data)
2255 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2258 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2259 must be locked on entry, and remains locked on return. */
2261 attribute_hidden void
2262 gomp_init_device (struct gomp_device_descr *devicep)
2264 int i;
2265 if (!devicep->init_device_func (devicep->target_id))
2267 gomp_mutex_unlock (&devicep->lock);
2268 gomp_fatal ("device initialization failed");
2271 /* Load to device all images registered by the moment. */
2272 for (i = 0; i < num_offload_images; i++)
2274 struct offload_image_descr *image = &offload_images[i];
2275 if (image->type == devicep->type)
2276 gomp_load_image_to_device (devicep, image->version,
2277 image->host_table, image->target_data,
2278 false);
2281 /* Initialize OpenACC asynchronous queues. */
2282 goacc_init_asyncqueues (devicep);
2284 devicep->state = GOMP_DEVICE_INITIALIZED;
2287 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2288 must be locked on entry, and remains locked on return. */
2290 attribute_hidden bool
2291 gomp_fini_device (struct gomp_device_descr *devicep)
2293 bool ret = goacc_fini_asyncqueues (devicep);
2294 ret &= devicep->fini_device_func (devicep->target_id);
2295 devicep->state = GOMP_DEVICE_FINALIZED;
2296 return ret;
2299 attribute_hidden void
2300 gomp_unload_device (struct gomp_device_descr *devicep)
2302 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2304 unsigned i;
2306 /* Unload from device all images registered at the moment. */
2307 for (i = 0; i < num_offload_images; i++)
2309 struct offload_image_descr *image = &offload_images[i];
2310 if (image->type == devicep->type)
2311 gomp_unload_image_from_device (devicep, image->version,
2312 image->host_table,
2313 image->target_data);
2318 /* Host fallback for GOMP_target{,_ext} routines. */
2320 static void
2321 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2322 struct gomp_device_descr *devicep)
2324 struct gomp_thread old_thr, *thr = gomp_thread ();
2326 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2327 && devicep != NULL)
2328 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2329 "be used for offloading");
2331 old_thr = *thr;
2332 memset (thr, '\0', sizeof (*thr));
2333 if (gomp_places_list)
2335 thr->place = old_thr.place;
2336 thr->ts.place_partition_len = gomp_places_list_len;
2338 fn (hostaddrs);
2339 gomp_free_thread (thr);
2340 *thr = old_thr;
2343 /* Calculate alignment and size requirements of a private copy of data shared
2344 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2346 static inline void
2347 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2348 unsigned short *kinds, size_t *tgt_align,
2349 size_t *tgt_size)
2351 size_t i;
2352 for (i = 0; i < mapnum; i++)
2353 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2355 size_t align = (size_t) 1 << (kinds[i] >> 8);
2356 if (*tgt_align < align)
2357 *tgt_align = align;
2358 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2359 *tgt_size += sizes[i];
2363 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2365 static inline void
2366 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2367 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2368 size_t tgt_size)
2370 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2371 if (al)
2372 tgt += tgt_align - al;
2373 tgt_size = 0;
2374 size_t i;
2375 for (i = 0; i < mapnum; i++)
2376 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2378 size_t align = (size_t) 1 << (kinds[i] >> 8);
2379 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2380 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2381 hostaddrs[i] = tgt + tgt_size;
2382 tgt_size = tgt_size + sizes[i];
2386 /* Helper function of GOMP_target{,_ext} routines. */
2388 static void *
2389 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2390 void (*host_fn) (void *))
2392 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2393 return (void *) host_fn;
2394 else
2396 gomp_mutex_lock (&devicep->lock);
2397 if (devicep->state == GOMP_DEVICE_FINALIZED)
2399 gomp_mutex_unlock (&devicep->lock);
2400 return NULL;
2403 struct splay_tree_key_s k;
2404 k.host_start = (uintptr_t) host_fn;
2405 k.host_end = k.host_start + 1;
2406 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2407 gomp_mutex_unlock (&devicep->lock);
2408 if (tgt_fn == NULL)
2409 return NULL;
2411 return (void *) tgt_fn->tgt_offset;
2415 /* Called when encountering a target directive. If DEVICE
2416 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2417 GOMP_DEVICE_HOST_FALLBACK (or any value
2418 larger than last available hw device), use host fallback.
2419 FN is address of host code, UNUSED is part of the current ABI, but
2420 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2421 with MAPNUM entries, with addresses of the host objects,
2422 sizes of the host objects (resp. for pointer kind pointer bias
2423 and assumed sizeof (void *) size) and kinds. */
2425 void
2426 GOMP_target (int device, void (*fn) (void *), const void *unused,
2427 size_t mapnum, void **hostaddrs, size_t *sizes,
2428 unsigned char *kinds)
2430 struct gomp_device_descr *devicep = resolve_device (device);
2432 void *fn_addr;
2433 if (devicep == NULL
2434 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2435 /* All shared memory devices should use the GOMP_target_ext function. */
2436 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2437 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2438 return gomp_target_fallback (fn, hostaddrs, devicep);
2440 htab_t refcount_set = htab_create (mapnum);
2441 struct target_mem_desc *tgt_vars
2442 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2443 &refcount_set, GOMP_MAP_VARS_TARGET);
2444 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2445 NULL);
2446 htab_clear (refcount_set);
2447 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2448 htab_free (refcount_set);
2451 static inline unsigned int
2452 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2454 /* If we cannot run asynchronously, simply ignore nowait. */
2455 if (devicep != NULL && devicep->async_run_func == NULL)
2456 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2458 return flags;
2461 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2462 and several arguments have been added:
2463 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2464 DEPEND is array of dependencies, see GOMP_task for details.
2466 ARGS is a pointer to an array consisting of a variable number of both
2467 device-independent and device-specific arguments, which can take one two
2468 elements where the first specifies for which device it is intended, the type
2469 and optionally also the value. If the value is not present in the first
2470 one, the whole second element the actual value. The last element of the
2471 array is a single NULL. Among the device independent can be for example
2472 NUM_TEAMS and THREAD_LIMIT.
2474 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2475 that value, or 1 if teams construct is not present, or 0, if
2476 teams construct does not have num_teams clause and so the choice is
2477 implementation defined, and -1 if it can't be determined on the host
2478 what value will GOMP_teams have on the device.
2479 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2480 body with that value, or 0, if teams construct does not have thread_limit
2481 clause or the teams construct is not present, or -1 if it can't be
2482 determined on the host what value will GOMP_teams have on the device. */
2484 void
2485 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2486 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2487 unsigned int flags, void **depend, void **args)
2489 struct gomp_device_descr *devicep = resolve_device (device);
2490 size_t tgt_align = 0, tgt_size = 0;
2491 bool fpc_done = false;
2493 flags = clear_unsupported_flags (devicep, flags);
2495 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2497 struct gomp_thread *thr = gomp_thread ();
2498 /* Create a team if we don't have any around, as nowait
2499 target tasks make sense to run asynchronously even when
2500 outside of any parallel. */
2501 if (__builtin_expect (thr->ts.team == NULL, 0))
2503 struct gomp_team *team = gomp_new_team (1);
2504 struct gomp_task *task = thr->task;
2505 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2506 team->prev_ts = thr->ts;
2507 thr->ts.team = team;
2508 thr->ts.team_id = 0;
2509 thr->ts.work_share = &team->work_shares[0];
2510 thr->ts.last_work_share = NULL;
2511 #ifdef HAVE_SYNC_BUILTINS
2512 thr->ts.single_count = 0;
2513 #endif
2514 thr->ts.static_trip = 0;
2515 thr->task = &team->implicit_task[0];
2516 gomp_init_task (thr->task, NULL, icv);
2517 if (task)
2519 thr->task = task;
2520 gomp_end_task ();
2521 free (task);
2522 thr->task = &team->implicit_task[0];
2524 else
2525 pthread_setspecific (gomp_thread_destructor, thr);
2527 if (thr->ts.team
2528 && !thr->task->final_task)
2530 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2531 sizes, kinds, flags, depend, args,
2532 GOMP_TARGET_TASK_BEFORE_MAP);
2533 return;
2537 /* If there are depend clauses, but nowait is not present
2538 (or we are in a final task), block the parent task until the
2539 dependencies are resolved and then just continue with the rest
2540 of the function as if it is a merged task. */
2541 if (depend != NULL)
2543 struct gomp_thread *thr = gomp_thread ();
2544 if (thr->task && thr->task->depend_hash)
2546 /* If we might need to wait, copy firstprivate now. */
2547 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2548 &tgt_align, &tgt_size);
2549 if (tgt_align)
2551 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2552 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2553 tgt_align, tgt_size);
2555 fpc_done = true;
2556 gomp_task_maybe_wait_for_dependencies (depend);
2560 void *fn_addr;
2561 if (devicep == NULL
2562 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2563 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2564 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2566 if (!fpc_done)
2568 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2569 &tgt_align, &tgt_size);
2570 if (tgt_align)
2572 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2573 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2574 tgt_align, tgt_size);
2577 gomp_target_fallback (fn, hostaddrs, devicep);
2578 return;
2581 struct target_mem_desc *tgt_vars;
2582 htab_t refcount_set = NULL;
2584 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2586 if (!fpc_done)
2588 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2589 &tgt_align, &tgt_size);
2590 if (tgt_align)
2592 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2593 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2594 tgt_align, tgt_size);
2597 tgt_vars = NULL;
2599 else
2601 refcount_set = htab_create (mapnum);
2602 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2603 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2605 devicep->run_func (devicep->target_id, fn_addr,
2606 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2607 args);
2608 if (tgt_vars)
2610 htab_clear (refcount_set);
2611 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2613 if (refcount_set)
2614 htab_free (refcount_set);
2617 /* Host fallback for GOMP_target_data{,_ext} routines. */
2619 static void
2620 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2622 struct gomp_task_icv *icv = gomp_icv (false);
2624 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2625 && devicep != NULL)
2626 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2627 "be used for offloading");
2629 if (icv->target_data)
2631 /* Even when doing a host fallback, if there are any active
2632 #pragma omp target data constructs, need to remember the
2633 new #pragma omp target data, otherwise GOMP_target_end_data
2634 would get out of sync. */
2635 struct target_mem_desc *tgt
2636 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2637 NULL, GOMP_MAP_VARS_DATA);
2638 tgt->prev = icv->target_data;
2639 icv->target_data = tgt;
2643 void
2644 GOMP_target_data (int device, const void *unused, size_t mapnum,
2645 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2647 struct gomp_device_descr *devicep = resolve_device (device);
2649 if (devicep == NULL
2650 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2651 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2652 return gomp_target_data_fallback (devicep);
2654 struct target_mem_desc *tgt
2655 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2656 NULL, GOMP_MAP_VARS_DATA);
2657 struct gomp_task_icv *icv = gomp_icv (true);
2658 tgt->prev = icv->target_data;
2659 icv->target_data = tgt;
2662 void
2663 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2664 size_t *sizes, unsigned short *kinds)
2666 struct gomp_device_descr *devicep = resolve_device (device);
2668 if (devicep == NULL
2669 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2670 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2671 return gomp_target_data_fallback (devicep);
2673 struct target_mem_desc *tgt
2674 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2675 NULL, GOMP_MAP_VARS_DATA);
2676 struct gomp_task_icv *icv = gomp_icv (true);
2677 tgt->prev = icv->target_data;
2678 icv->target_data = tgt;
2681 void
2682 GOMP_target_end_data (void)
2684 struct gomp_task_icv *icv = gomp_icv (false);
2685 if (icv->target_data)
2687 struct target_mem_desc *tgt = icv->target_data;
2688 icv->target_data = tgt->prev;
2689 gomp_unmap_vars (tgt, true, NULL);
2693 void
2694 GOMP_target_update (int device, const void *unused, size_t mapnum,
2695 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2697 struct gomp_device_descr *devicep = resolve_device (device);
2699 if (devicep == NULL
2700 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2701 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2702 return;
2704 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2707 void
2708 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2709 size_t *sizes, unsigned short *kinds,
2710 unsigned int flags, void **depend)
2712 struct gomp_device_descr *devicep = resolve_device (device);
2714 /* If there are depend clauses, but nowait is not present,
2715 block the parent task until the dependencies are resolved
2716 and then just continue with the rest of the function as if it
2717 is a merged task. Until we are able to schedule task during
2718 variable mapping or unmapping, ignore nowait if depend clauses
2719 are not present. */
2720 if (depend != NULL)
2722 struct gomp_thread *thr = gomp_thread ();
2723 if (thr->task && thr->task->depend_hash)
2725 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2726 && thr->ts.team
2727 && !thr->task->final_task)
2729 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2730 mapnum, hostaddrs, sizes, kinds,
2731 flags | GOMP_TARGET_FLAG_UPDATE,
2732 depend, NULL, GOMP_TARGET_TASK_DATA))
2733 return;
2735 else
2737 struct gomp_team *team = thr->ts.team;
2738 /* If parallel or taskgroup has been cancelled, don't start new
2739 tasks. */
2740 if (__builtin_expect (gomp_cancel_var, 0) && team)
2742 if (gomp_team_barrier_cancelled (&team->barrier))
2743 return;
2744 if (thr->task->taskgroup)
2746 if (thr->task->taskgroup->cancelled)
2747 return;
2748 if (thr->task->taskgroup->workshare
2749 && thr->task->taskgroup->prev
2750 && thr->task->taskgroup->prev->cancelled)
2751 return;
2755 gomp_task_maybe_wait_for_dependencies (depend);
2760 if (devicep == NULL
2761 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2762 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2763 return;
2765 struct gomp_thread *thr = gomp_thread ();
2766 struct gomp_team *team = thr->ts.team;
2767 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2768 if (__builtin_expect (gomp_cancel_var, 0) && team)
2770 if (gomp_team_barrier_cancelled (&team->barrier))
2771 return;
2772 if (thr->task->taskgroup)
2774 if (thr->task->taskgroup->cancelled)
2775 return;
2776 if (thr->task->taskgroup->workshare
2777 && thr->task->taskgroup->prev
2778 && thr->task->taskgroup->prev->cancelled)
2779 return;
2783 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2786 static void
2787 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2788 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2789 htab_t *refcount_set)
2791 const int typemask = 0xff;
2792 size_t i;
2793 gomp_mutex_lock (&devicep->lock);
2794 if (devicep->state == GOMP_DEVICE_FINALIZED)
2796 gomp_mutex_unlock (&devicep->lock);
2797 return;
2800 for (i = 0; i < mapnum; i++)
2801 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2803 struct splay_tree_key_s cur_node;
2804 cur_node.host_start = (uintptr_t) hostaddrs[i];
2805 cur_node.host_end = cur_node.host_start + sizeof (void *);
2806 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2808 if (n)
2809 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2810 false, NULL);
2813 int nrmvars = 0;
2814 splay_tree_key remove_vars[mapnum];
2816 for (i = 0; i < mapnum; i++)
2818 struct splay_tree_key_s cur_node;
2819 unsigned char kind = kinds[i] & typemask;
2820 switch (kind)
2822 case GOMP_MAP_FROM:
2823 case GOMP_MAP_ALWAYS_FROM:
2824 case GOMP_MAP_DELETE:
2825 case GOMP_MAP_RELEASE:
2826 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2827 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2828 cur_node.host_start = (uintptr_t) hostaddrs[i];
2829 cur_node.host_end = cur_node.host_start + sizes[i];
2830 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2831 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2832 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2833 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2834 if (!k)
2835 continue;
2837 bool delete_p = (kind == GOMP_MAP_DELETE
2838 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2839 bool do_copy, do_remove;
2840 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2841 &do_remove);
2843 if ((kind == GOMP_MAP_FROM && do_copy)
2844 || kind == GOMP_MAP_ALWAYS_FROM)
2845 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2846 (void *) (k->tgt->tgt_start + k->tgt_offset
2847 + cur_node.host_start
2848 - k->host_start),
2849 cur_node.host_end - cur_node.host_start);
2851 /* Structure elements lists are removed altogether at once, which
2852 may cause immediate deallocation of the target_mem_desc, causing
2853 errors if we still have following element siblings to copy back.
2854 While we're at it, it also seems more disciplined to simply
2855 queue all removals together for processing below.
2857 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
2858 not have this problem, since they maintain an additional
2859 tgt->refcount = 1 reference to the target_mem_desc to start with.
2861 if (do_remove)
2862 remove_vars[nrmvars++] = k;
2863 break;
2865 case GOMP_MAP_DETACH:
2866 break;
2867 default:
2868 gomp_mutex_unlock (&devicep->lock);
2869 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2870 kind);
2874 for (int i = 0; i < nrmvars; i++)
2875 gomp_remove_var (devicep, remove_vars[i]);
2877 gomp_mutex_unlock (&devicep->lock);
2880 void
2881 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2882 size_t *sizes, unsigned short *kinds,
2883 unsigned int flags, void **depend)
2885 struct gomp_device_descr *devicep = resolve_device (device);
2887 /* If there are depend clauses, but nowait is not present,
2888 block the parent task until the dependencies are resolved
2889 and then just continue with the rest of the function as if it
2890 is a merged task. Until we are able to schedule task during
2891 variable mapping or unmapping, ignore nowait if depend clauses
2892 are not present. */
2893 if (depend != NULL)
2895 struct gomp_thread *thr = gomp_thread ();
2896 if (thr->task && thr->task->depend_hash)
2898 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2899 && thr->ts.team
2900 && !thr->task->final_task)
2902 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2903 mapnum, hostaddrs, sizes, kinds,
2904 flags, depend, NULL,
2905 GOMP_TARGET_TASK_DATA))
2906 return;
2908 else
2910 struct gomp_team *team = thr->ts.team;
2911 /* If parallel or taskgroup has been cancelled, don't start new
2912 tasks. */
2913 if (__builtin_expect (gomp_cancel_var, 0) && team)
2915 if (gomp_team_barrier_cancelled (&team->barrier))
2916 return;
2917 if (thr->task->taskgroup)
2919 if (thr->task->taskgroup->cancelled)
2920 return;
2921 if (thr->task->taskgroup->workshare
2922 && thr->task->taskgroup->prev
2923 && thr->task->taskgroup->prev->cancelled)
2924 return;
2928 gomp_task_maybe_wait_for_dependencies (depend);
2933 if (devicep == NULL
2934 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2935 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2936 return;
2938 struct gomp_thread *thr = gomp_thread ();
2939 struct gomp_team *team = thr->ts.team;
2940 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2941 if (__builtin_expect (gomp_cancel_var, 0) && team)
2943 if (gomp_team_barrier_cancelled (&team->barrier))
2944 return;
2945 if (thr->task->taskgroup)
2947 if (thr->task->taskgroup->cancelled)
2948 return;
2949 if (thr->task->taskgroup->workshare
2950 && thr->task->taskgroup->prev
2951 && thr->task->taskgroup->prev->cancelled)
2952 return;
2956 htab_t refcount_set = htab_create (mapnum);
2958 /* The variables are mapped separately such that they can be released
2959 independently. */
2960 size_t i, j;
2961 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2962 for (i = 0; i < mapnum; i++)
2963 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2965 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2966 &kinds[i], true, &refcount_set,
2967 GOMP_MAP_VARS_ENTER_DATA);
2968 i += sizes[i];
2970 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2972 for (j = i + 1; j < mapnum; j++)
2973 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2974 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
2975 break;
2976 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2977 &kinds[i], true, &refcount_set,
2978 GOMP_MAP_VARS_ENTER_DATA);
2979 i += j - i - 1;
2981 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
2983 /* An attach operation must be processed together with the mapped
2984 base-pointer list item. */
2985 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2986 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
2987 i += 1;
2989 else
2990 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2991 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
2992 else
2993 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
2994 htab_free (refcount_set);
2997 bool
2998 gomp_target_task_fn (void *data)
3000 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3001 struct gomp_device_descr *devicep = ttask->devicep;
3003 if (ttask->fn != NULL)
3005 void *fn_addr;
3006 if (devicep == NULL
3007 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3008 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3009 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3011 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3012 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
3013 return false;
3016 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3018 if (ttask->tgt)
3019 gomp_unmap_vars (ttask->tgt, true, NULL);
3020 return false;
3023 void *actual_arguments;
3024 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3026 ttask->tgt = NULL;
3027 actual_arguments = ttask->hostaddrs;
3029 else
3031 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3032 NULL, ttask->sizes, ttask->kinds, true,
3033 NULL, GOMP_MAP_VARS_TARGET);
3034 actual_arguments = (void *) ttask->tgt->tgt_start;
3036 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3038 assert (devicep->async_run_func);
3039 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3040 ttask->args, (void *) ttask);
3041 return true;
3043 else if (devicep == NULL
3044 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3045 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3046 return false;
3048 size_t i;
3049 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3050 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3051 ttask->kinds, true);
3052 else
3054 htab_t refcount_set = htab_create (ttask->mapnum);
3055 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3056 for (i = 0; i < ttask->mapnum; i++)
3057 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3059 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3060 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3061 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3062 i += ttask->sizes[i];
3064 else
3065 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3066 &ttask->kinds[i], true, &refcount_set,
3067 GOMP_MAP_VARS_ENTER_DATA);
3068 else
3069 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3070 ttask->kinds, &refcount_set);
3071 htab_free (refcount_set);
3073 return false;
3076 void
3077 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3079 if (thread_limit)
3081 struct gomp_task_icv *icv = gomp_icv (true);
3082 icv->thread_limit_var
3083 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3085 (void) num_teams;
3088 void *
3089 omp_target_alloc (size_t size, int device_num)
3091 if (device_num == gomp_get_num_devices ())
3092 return malloc (size);
3094 if (device_num < 0)
3095 return NULL;
3097 struct gomp_device_descr *devicep = resolve_device (device_num);
3098 if (devicep == NULL)
3099 return NULL;
3101 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3102 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3103 return malloc (size);
3105 gomp_mutex_lock (&devicep->lock);
3106 void *ret = devicep->alloc_func (devicep->target_id, size);
3107 gomp_mutex_unlock (&devicep->lock);
3108 return ret;
3111 void
3112 omp_target_free (void *device_ptr, int device_num)
3114 if (device_ptr == NULL)
3115 return;
3117 if (device_num == gomp_get_num_devices ())
3119 free (device_ptr);
3120 return;
3123 if (device_num < 0)
3124 return;
3126 struct gomp_device_descr *devicep = resolve_device (device_num);
3127 if (devicep == NULL)
3128 return;
3130 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3131 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3133 free (device_ptr);
3134 return;
3137 gomp_mutex_lock (&devicep->lock);
3138 gomp_free_device_memory (devicep, device_ptr);
3139 gomp_mutex_unlock (&devicep->lock);
3143 omp_target_is_present (const void *ptr, int device_num)
3145 if (ptr == NULL)
3146 return 1;
3148 if (device_num == gomp_get_num_devices ())
3149 return 1;
3151 if (device_num < 0)
3152 return 0;
3154 struct gomp_device_descr *devicep = resolve_device (device_num);
3155 if (devicep == NULL)
3156 return 0;
3158 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3159 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3160 return 1;
3162 gomp_mutex_lock (&devicep->lock);
3163 struct splay_tree_s *mem_map = &devicep->mem_map;
3164 struct splay_tree_key_s cur_node;
3166 cur_node.host_start = (uintptr_t) ptr;
3167 cur_node.host_end = cur_node.host_start;
3168 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3169 int ret = n != NULL;
3170 gomp_mutex_unlock (&devicep->lock);
3171 return ret;
3175 omp_target_memcpy (void *dst, const void *src, size_t length,
3176 size_t dst_offset, size_t src_offset, int dst_device_num,
3177 int src_device_num)
3179 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3180 bool ret;
3182 if (dst_device_num != gomp_get_num_devices ())
3184 if (dst_device_num < 0)
3185 return EINVAL;
3187 dst_devicep = resolve_device (dst_device_num);
3188 if (dst_devicep == NULL)
3189 return EINVAL;
3191 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3192 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3193 dst_devicep = NULL;
3195 if (src_device_num != num_devices_openmp)
3197 if (src_device_num < 0)
3198 return EINVAL;
3200 src_devicep = resolve_device (src_device_num);
3201 if (src_devicep == NULL)
3202 return EINVAL;
3204 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3205 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3206 src_devicep = NULL;
3208 if (src_devicep == NULL && dst_devicep == NULL)
3210 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3211 return 0;
3213 if (src_devicep == NULL)
3215 gomp_mutex_lock (&dst_devicep->lock);
3216 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3217 (char *) dst + dst_offset,
3218 (char *) src + src_offset, length);
3219 gomp_mutex_unlock (&dst_devicep->lock);
3220 return (ret ? 0 : EINVAL);
3222 if (dst_devicep == NULL)
3224 gomp_mutex_lock (&src_devicep->lock);
3225 ret = src_devicep->dev2host_func (src_devicep->target_id,
3226 (char *) dst + dst_offset,
3227 (char *) src + src_offset, length);
3228 gomp_mutex_unlock (&src_devicep->lock);
3229 return (ret ? 0 : EINVAL);
3231 if (src_devicep == dst_devicep)
3233 gomp_mutex_lock (&src_devicep->lock);
3234 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3235 (char *) dst + dst_offset,
3236 (char *) src + src_offset, length);
3237 gomp_mutex_unlock (&src_devicep->lock);
3238 return (ret ? 0 : EINVAL);
3240 return EINVAL;
3243 static int
3244 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3245 int num_dims, const size_t *volume,
3246 const size_t *dst_offsets,
3247 const size_t *src_offsets,
3248 const size_t *dst_dimensions,
3249 const size_t *src_dimensions,
3250 struct gomp_device_descr *dst_devicep,
3251 struct gomp_device_descr *src_devicep)
3253 size_t dst_slice = element_size;
3254 size_t src_slice = element_size;
3255 size_t j, dst_off, src_off, length;
3256 int i, ret;
3258 if (num_dims == 1)
3260 if (__builtin_mul_overflow (element_size, volume[0], &length)
3261 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3262 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3263 return EINVAL;
3264 if (dst_devicep == NULL && src_devicep == NULL)
3266 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3267 length);
3268 ret = 1;
3270 else if (src_devicep == NULL)
3271 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3272 (char *) dst + dst_off,
3273 (const char *) src + src_off,
3274 length);
3275 else if (dst_devicep == NULL)
3276 ret = src_devicep->dev2host_func (src_devicep->target_id,
3277 (char *) dst + dst_off,
3278 (const char *) src + src_off,
3279 length);
3280 else if (src_devicep == dst_devicep)
3281 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3282 (char *) dst + dst_off,
3283 (const char *) src + src_off,
3284 length);
3285 else
3286 ret = 0;
3287 return ret ? 0 : EINVAL;
3290 /* FIXME: it would be nice to have some plugin function to handle
3291 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3292 be handled in the generic recursion below, and for host-host it
3293 should be used even for any num_dims >= 2. */
3295 for (i = 1; i < num_dims; i++)
3296 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3297 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3298 return EINVAL;
3299 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3300 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3301 return EINVAL;
3302 for (j = 0; j < volume[0]; j++)
3304 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3305 (const char *) src + src_off,
3306 element_size, num_dims - 1,
3307 volume + 1, dst_offsets + 1,
3308 src_offsets + 1, dst_dimensions + 1,
3309 src_dimensions + 1, dst_devicep,
3310 src_devicep);
3311 if (ret)
3312 return ret;
3313 dst_off += dst_slice;
3314 src_off += src_slice;
3316 return 0;
3320 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3321 int num_dims, const size_t *volume,
3322 const size_t *dst_offsets,
3323 const size_t *src_offsets,
3324 const size_t *dst_dimensions,
3325 const size_t *src_dimensions,
3326 int dst_device_num, int src_device_num)
3328 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3330 if (!dst && !src)
3331 return INT_MAX;
3333 if (dst_device_num != gomp_get_num_devices ())
3335 if (dst_device_num < 0)
3336 return EINVAL;
3338 dst_devicep = resolve_device (dst_device_num);
3339 if (dst_devicep == NULL)
3340 return EINVAL;
3342 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3343 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3344 dst_devicep = NULL;
3346 if (src_device_num != num_devices_openmp)
3348 if (src_device_num < 0)
3349 return EINVAL;
3351 src_devicep = resolve_device (src_device_num);
3352 if (src_devicep == NULL)
3353 return EINVAL;
3355 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3356 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3357 src_devicep = NULL;
3360 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3361 return EINVAL;
3363 if (src_devicep)
3364 gomp_mutex_lock (&src_devicep->lock);
3365 else if (dst_devicep)
3366 gomp_mutex_lock (&dst_devicep->lock);
3367 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3368 volume, dst_offsets, src_offsets,
3369 dst_dimensions, src_dimensions,
3370 dst_devicep, src_devicep);
3371 if (src_devicep)
3372 gomp_mutex_unlock (&src_devicep->lock);
3373 else if (dst_devicep)
3374 gomp_mutex_unlock (&dst_devicep->lock);
3375 return ret;
3379 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3380 size_t size, size_t device_offset, int device_num)
3382 if (device_num == gomp_get_num_devices ())
3383 return EINVAL;
3385 if (device_num < 0)
3386 return EINVAL;
3388 struct gomp_device_descr *devicep = resolve_device (device_num);
3389 if (devicep == NULL)
3390 return EINVAL;
3392 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3393 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3394 return EINVAL;
3396 gomp_mutex_lock (&devicep->lock);
3398 struct splay_tree_s *mem_map = &devicep->mem_map;
3399 struct splay_tree_key_s cur_node;
3400 int ret = EINVAL;
3402 cur_node.host_start = (uintptr_t) host_ptr;
3403 cur_node.host_end = cur_node.host_start + size;
3404 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3405 if (n)
3407 if (n->tgt->tgt_start + n->tgt_offset
3408 == (uintptr_t) device_ptr + device_offset
3409 && n->host_start <= cur_node.host_start
3410 && n->host_end >= cur_node.host_end)
3411 ret = 0;
3413 else
3415 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3416 tgt->array = gomp_malloc (sizeof (*tgt->array));
3417 tgt->refcount = 1;
3418 tgt->tgt_start = 0;
3419 tgt->tgt_end = 0;
3420 tgt->to_free = NULL;
3421 tgt->prev = NULL;
3422 tgt->list_count = 0;
3423 tgt->device_descr = devicep;
3424 splay_tree_node array = tgt->array;
3425 splay_tree_key k = &array->key;
3426 k->host_start = cur_node.host_start;
3427 k->host_end = cur_node.host_end;
3428 k->tgt = tgt;
3429 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3430 k->refcount = REFCOUNT_INFINITY;
3431 k->dynamic_refcount = 0;
3432 k->aux = NULL;
3433 array->left = NULL;
3434 array->right = NULL;
3435 splay_tree_insert (&devicep->mem_map, array);
3436 ret = 0;
3438 gomp_mutex_unlock (&devicep->lock);
3439 return ret;
3443 omp_target_disassociate_ptr (const void *ptr, int device_num)
3445 if (device_num == gomp_get_num_devices ())
3446 return EINVAL;
3448 if (device_num < 0)
3449 return EINVAL;
3451 struct gomp_device_descr *devicep = resolve_device (device_num);
3452 if (devicep == NULL)
3453 return EINVAL;
3455 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3456 return EINVAL;
3458 gomp_mutex_lock (&devicep->lock);
3460 struct splay_tree_s *mem_map = &devicep->mem_map;
3461 struct splay_tree_key_s cur_node;
3462 int ret = EINVAL;
3464 cur_node.host_start = (uintptr_t) ptr;
3465 cur_node.host_end = cur_node.host_start;
3466 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3467 if (n
3468 && n->host_start == cur_node.host_start
3469 && n->refcount == REFCOUNT_INFINITY
3470 && n->tgt->tgt_start == 0
3471 && n->tgt->to_free == NULL
3472 && n->tgt->refcount == 1
3473 && n->tgt->list_count == 0)
3475 splay_tree_remove (&devicep->mem_map, n);
3476 gomp_unmap_tgt (n->tgt);
3477 ret = 0;
3480 gomp_mutex_unlock (&devicep->lock);
3481 return ret;
3485 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3487 (void) kind;
3488 if (device_num == gomp_get_num_devices ())
3489 return gomp_pause_host ();
3490 if (device_num < 0 || device_num >= num_devices_openmp)
3491 return -1;
3492 /* Do nothing for target devices for now. */
3493 return 0;
3497 omp_pause_resource_all (omp_pause_resource_t kind)
3499 (void) kind;
3500 if (gomp_pause_host ())
3501 return -1;
3502 /* Do nothing for target devices for now. */
3503 return 0;
3506 ialias (omp_pause_resource)
3507 ialias (omp_pause_resource_all)
3509 #ifdef PLUGIN_SUPPORT
3511 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3512 in PLUGIN_NAME.
3513 The handles of the found functions are stored in the corresponding fields
3514 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3516 static bool
3517 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3518 const char *plugin_name)
3520 const char *err = NULL, *last_missing = NULL;
3522 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3523 if (!plugin_handle)
3524 #if OFFLOAD_DEFAULTED
3525 return 0;
3526 #else
3527 goto dl_fail;
3528 #endif
3530 /* Check if all required functions are available in the plugin and store
3531 their handlers. None of the symbols can legitimately be NULL,
3532 so we don't need to check dlerror all the time. */
3533 #define DLSYM(f) \
3534 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3535 goto dl_fail
3536 /* Similar, but missing functions are not an error. Return false if
3537 failed, true otherwise. */
3538 #define DLSYM_OPT(f, n) \
3539 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3540 || (last_missing = #n, 0))
3542 DLSYM (version);
3543 if (device->version_func () != GOMP_VERSION)
3545 err = "plugin version mismatch";
3546 goto fail;
3549 DLSYM (get_name);
3550 DLSYM (get_caps);
3551 DLSYM (get_type);
3552 DLSYM (get_num_devices);
3553 DLSYM (init_device);
3554 DLSYM (fini_device);
3555 DLSYM (load_image);
3556 DLSYM (unload_image);
3557 DLSYM (alloc);
3558 DLSYM (free);
3559 DLSYM (dev2host);
3560 DLSYM (host2dev);
3561 device->capabilities = device->get_caps_func ();
3562 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3564 DLSYM (run);
3565 DLSYM_OPT (async_run, async_run);
3566 DLSYM_OPT (can_run, can_run);
3567 DLSYM (dev2dev);
3569 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3571 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3572 || !DLSYM_OPT (openacc.create_thread_data,
3573 openacc_create_thread_data)
3574 || !DLSYM_OPT (openacc.destroy_thread_data,
3575 openacc_destroy_thread_data)
3576 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3577 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3578 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3579 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3580 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3581 || !DLSYM_OPT (openacc.async.queue_callback,
3582 openacc_async_queue_callback)
3583 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3584 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3585 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3586 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3588 /* Require all the OpenACC handlers if we have
3589 GOMP_OFFLOAD_CAP_OPENACC_200. */
3590 err = "plugin missing OpenACC handler function";
3591 goto fail;
3594 unsigned cuda = 0;
3595 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3596 openacc_cuda_get_current_device);
3597 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3598 openacc_cuda_get_current_context);
3599 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3600 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3601 if (cuda && cuda != 4)
3603 /* Make sure all the CUDA functions are there if any of them are. */
3604 err = "plugin missing OpenACC CUDA handler function";
3605 goto fail;
3608 #undef DLSYM
3609 #undef DLSYM_OPT
3611 return 1;
3613 dl_fail:
3614 err = dlerror ();
3615 fail:
3616 gomp_error ("while loading %s: %s", plugin_name, err);
3617 if (last_missing)
3618 gomp_error ("missing function was %s", last_missing);
3619 if (plugin_handle)
3620 dlclose (plugin_handle);
3622 return 0;
3625 /* This function finalizes all initialized devices. */
3627 static void
3628 gomp_target_fini (void)
3630 int i;
3631 for (i = 0; i < num_devices; i++)
3633 bool ret = true;
3634 struct gomp_device_descr *devicep = &devices[i];
3635 gomp_mutex_lock (&devicep->lock);
3636 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3637 ret = gomp_fini_device (devicep);
3638 gomp_mutex_unlock (&devicep->lock);
3639 if (!ret)
3640 gomp_fatal ("device finalization failed");
3644 /* This function initializes the runtime for offloading.
3645 It parses the list of offload plugins, and tries to load these.
3646 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3647 will be set, and the array DEVICES initialized, containing descriptors for
3648 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3649 by the others. */
3651 static void
3652 gomp_target_init (void)
3654 const char *prefix ="libgomp-plugin-";
3655 const char *suffix = SONAME_SUFFIX (1);
3656 const char *cur, *next;
3657 char *plugin_name;
3658 int i, new_num_devs;
3659 int num_devs = 0, num_devs_openmp;
3660 struct gomp_device_descr *devs = NULL;
3662 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3663 return;
3665 cur = OFFLOAD_PLUGINS;
3666 if (*cur)
3669 struct gomp_device_descr current_device;
3670 size_t prefix_len, suffix_len, cur_len;
3672 next = strchr (cur, ',');
3674 prefix_len = strlen (prefix);
3675 cur_len = next ? next - cur : strlen (cur);
3676 suffix_len = strlen (suffix);
3678 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3679 if (!plugin_name)
3681 num_devs = 0;
3682 break;
3685 memcpy (plugin_name, prefix, prefix_len);
3686 memcpy (plugin_name + prefix_len, cur, cur_len);
3687 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3689 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3691 new_num_devs = current_device.get_num_devices_func ();
3692 if (new_num_devs >= 1)
3694 /* Augment DEVICES and NUM_DEVICES. */
3696 devs = realloc (devs, (num_devs + new_num_devs)
3697 * sizeof (struct gomp_device_descr));
3698 if (!devs)
3700 num_devs = 0;
3701 free (plugin_name);
3702 break;
3705 current_device.name = current_device.get_name_func ();
3706 /* current_device.capabilities has already been set. */
3707 current_device.type = current_device.get_type_func ();
3708 current_device.mem_map.root = NULL;
3709 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3710 for (i = 0; i < new_num_devs; i++)
3712 current_device.target_id = i;
3713 devs[num_devs] = current_device;
3714 gomp_mutex_init (&devs[num_devs].lock);
3715 num_devs++;
3720 free (plugin_name);
3721 cur = next + 1;
3723 while (next);
3725 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3726 NUM_DEVICES_OPENMP. */
3727 struct gomp_device_descr *devs_s
3728 = malloc (num_devs * sizeof (struct gomp_device_descr));
3729 if (!devs_s)
3731 num_devs = 0;
3732 free (devs);
3733 devs = NULL;
3735 num_devs_openmp = 0;
3736 for (i = 0; i < num_devs; i++)
3737 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3738 devs_s[num_devs_openmp++] = devs[i];
3739 int num_devs_after_openmp = num_devs_openmp;
3740 for (i = 0; i < num_devs; i++)
3741 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3742 devs_s[num_devs_after_openmp++] = devs[i];
3743 free (devs);
3744 devs = devs_s;
3746 for (i = 0; i < num_devs; i++)
3748 /* The 'devices' array can be moved (by the realloc call) until we have
3749 found all the plugins, so registering with the OpenACC runtime (which
3750 takes a copy of the pointer argument) must be delayed until now. */
3751 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3752 goacc_register (&devs[i]);
3755 num_devices = num_devs;
3756 num_devices_openmp = num_devs_openmp;
3757 devices = devs;
3758 if (atexit (gomp_target_fini) != 0)
3759 gomp_fatal ("atexit failed");
3762 #else /* PLUGIN_SUPPORT */
3763 /* If dlfcn.h is unavailable we always fallback to host execution.
3764 GOMP_target* routines are just stubs for this case. */
3765 static void
3766 gomp_target_init (void)
3769 #endif /* PLUGIN_SUPPORT */