Daily bump.
[official-gcc.git] / libgomp / target.c
blob196dba4f08cc1a01b2a8522070bf1229e426b48d
1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 typedef uintptr_t *hash_entry_type;
48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
52 static inline hashval_t
53 htab_hash (hash_entry_type element)
55 return hash_pointer ((void *) element);
58 static inline bool
59 htab_eq (hash_entry_type x, hash_entry_type y)
61 return x == y;
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr {
78 unsigned version;
79 enum offload_target_type type;
80 const void *host_table;
81 const void *target_data;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr *offload_images;
87 /* Total number of offload images. */
88 static int num_offload_images;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr *devices;
93 /* Total number of available devices. */
94 static int num_devices;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
101 static void *
102 gomp_realloc_unlock (void *old, size_t size)
104 void *ret = realloc (old, size);
105 if (ret == NULL)
107 gomp_mutex_unlock (&register_lock);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
110 return ret;
113 attribute_hidden void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
119 attribute_hidden int
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp;
126 static struct gomp_device_descr *
127 resolve_device (int device_id)
129 if (device_id == GOMP_DEVICE_ICV)
131 struct gomp_task_icv *icv = gomp_icv (false);
132 device_id = icv->default_device_var;
135 if (device_id < 0 || device_id >= gomp_get_num_devices ())
137 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id != GOMP_DEVICE_HOST_FALLBACK
139 && device_id != num_devices_openmp)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
143 return NULL;
146 gomp_mutex_lock (&devices[device_id].lock);
147 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148 gomp_init_device (&devices[device_id]);
149 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
151 gomp_mutex_unlock (&devices[device_id].lock);
153 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
157 return NULL;
159 gomp_mutex_unlock (&devices[device_id].lock);
161 return &devices[device_id];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
168 if (key->host_start != key->host_end)
169 return splay_tree_lookup (mem_map, key);
171 key->host_end++;
172 splay_tree_key n = splay_tree_lookup (mem_map, key);
173 key->host_end--;
174 if (n)
175 return n;
176 key->host_start--;
177 n = splay_tree_lookup (mem_map, key);
178 key->host_start++;
179 if (n)
180 return n;
181 return splay_tree_lookup (mem_map, key);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
187 if (key->host_start != key->host_end)
188 return splay_tree_lookup (mem_map, key);
190 key->host_end++;
191 splay_tree_key n = splay_tree_lookup (mem_map, key);
192 key->host_end--;
193 return n;
196 static inline void
197 gomp_device_copy (struct gomp_device_descr *devicep,
198 bool (*copy_func) (int, void *, const void *, size_t),
199 const char *dst, void *dstaddr,
200 const char *src, const void *srcaddr,
201 size_t size)
203 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
205 gomp_mutex_unlock (&devicep->lock);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
211 static inline void
212 goacc_device_copy_async (struct gomp_device_descr *devicep,
213 bool (*copy_func) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue *),
215 const char *dst, void *dstaddr,
216 const char *src, const void *srcaddr,
217 const void *srcaddr_orig,
218 size_t size, struct goacc_asyncqueue *aq)
220 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
222 gomp_mutex_unlock (&devicep->lock);
223 if (srcaddr_orig && srcaddr_orig != srcaddr)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src, srcaddr_orig, srcaddr_orig + size,
228 src, srcaddr, srcaddr + size,
229 dst, dstaddr, dstaddr + size);
230 else
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src, srcaddr, srcaddr + size,
234 dst, dstaddr, dstaddr + size);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
244 size_t start, end;
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
251 void *buf;
252 struct target_mem_desc *tgt;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk *chunks;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258 be performed. */
259 long chunk_cnt;
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
263 long use_cnt;
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
282 example).
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
287 static inline void
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
290 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291 return;
292 if (cbuf->chunk_cnt)
294 if (cbuf->chunk_cnt < 0)
295 return;
296 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
298 cbuf->chunk_cnt = -1;
299 return;
301 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
303 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304 cbuf->use_cnt++;
305 return;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf->use_cnt == 1)
311 cbuf->chunk_cnt--;
313 cbuf->chunks[cbuf->chunk_cnt].start = start;
314 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315 cbuf->chunk_cnt++;
316 cbuf->use_cnt = 1;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
322 static inline bool
323 gomp_to_device_kind_p (int kind)
325 switch (kind)
327 case GOMP_MAP_ALLOC:
328 case GOMP_MAP_FROM:
329 case GOMP_MAP_FORCE_ALLOC:
330 case GOMP_MAP_FORCE_FROM:
331 case GOMP_MAP_ALWAYS_FROM:
332 return false;
333 default:
334 return true;
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
341 TRUE. */
343 attribute_hidden void
344 gomp_copy_host2dev (struct gomp_device_descr *devicep,
345 struct goacc_asyncqueue *aq,
346 void *d, const void *h, size_t sz,
347 bool ephemeral, struct gomp_coalesce_buf *cbuf)
349 if (__builtin_expect (aq != NULL, 0))
351 /* See 'gomp_coalesce_buf_add'. */
352 assert (!cbuf);
354 void *h_buf = (void *) h;
355 if (ephemeral)
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf = gomp_malloc (sz);
362 memcpy (h_buf, h, sz);
364 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 "dev", d, "host", h_buf, h, sz, aq);
366 if (ephemeral)
367 /* Free temporary buffer once the transfer has completed. */
368 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
370 return;
373 if (cbuf)
375 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
378 long first = 0;
379 long last = cbuf->chunk_cnt - 1;
380 while (first <= last)
382 long middle = (first + last) >> 1;
383 if (cbuf->chunks[middle].end <= doff)
384 first = middle + 1;
385 else if (cbuf->chunks[middle].start <= doff)
387 if (doff + sz > cbuf->chunks[middle].end)
389 gomp_mutex_unlock (&devicep->lock);
390 gomp_fatal ("internal libgomp cbuf error");
392 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393 h, sz);
394 return;
396 else
397 last = middle - 1;
402 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
405 attribute_hidden void
406 gomp_copy_dev2host (struct gomp_device_descr *devicep,
407 struct goacc_asyncqueue *aq,
408 void *h, const void *d, size_t sz)
410 if (__builtin_expect (aq != NULL, 0))
411 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412 "host", h, "dev", d, NULL, sz, aq);
413 else
414 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
417 static void
418 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
420 if (!devicep->free_func (devicep->target_id, devptr))
422 gomp_mutex_unlock (&devicep->lock);
423 gomp_fatal ("error in freeing device memory block at %p", devptr);
427 /* Increment reference count of a splay_tree_key region K by 1.
428 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429 increment the value if refcount is not yet contained in the set (used for
430 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431 once for each construct). */
433 static inline void
434 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
436 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437 return;
439 uintptr_t *refcount_ptr = &k->refcount;
441 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442 refcount_ptr = &k->structelem_refcount;
443 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444 refcount_ptr = k->structelem_refcount_ptr;
446 if (refcount_set)
448 if (htab_find (*refcount_set, refcount_ptr))
449 return;
450 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451 *slot = refcount_ptr;
454 *refcount_ptr += 1;
455 return;
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460 track already seen refcounts, and only adjust the value if refcount is not
461 yet contained in the set (like gomp_increment_refcount).
463 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464 it is already zero and we know we decremented it earlier. This signals that
465 associated maps should be copied back to host.
467 *DO_REMOVE is set to true when we this is the first handling of this refcount
468 and we are setting it to zero. This signals a removal of this key from the
469 splay-tree map.
471 Copy and removal are separated due to cases like handling of structure
472 elements, e.g. each map of a structure element representing a possible copy
473 out of a structure field has to be handled individually, but we only signal
474 removal for one (the first encountered) sibing map. */
476 static inline void
477 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478 bool *do_copy, bool *do_remove)
480 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
482 *do_copy = *do_remove = false;
483 return;
486 uintptr_t *refcount_ptr = &k->refcount;
488 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489 refcount_ptr = &k->structelem_refcount;
490 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491 refcount_ptr = k->structelem_refcount_ptr;
493 bool new_encountered_refcount;
494 bool set_to_zero = false;
495 bool is_zero = false;
497 uintptr_t orig_refcount = *refcount_ptr;
499 if (refcount_set)
501 if (htab_find (*refcount_set, refcount_ptr))
503 new_encountered_refcount = false;
504 goto end;
507 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508 *slot = refcount_ptr;
509 new_encountered_refcount = true;
511 else
512 /* If no refcount_set being used, assume all keys are being decremented
513 for the first time. */
514 new_encountered_refcount = true;
516 if (delete_p)
517 *refcount_ptr = 0;
518 else if (*refcount_ptr > 0)
519 *refcount_ptr -= 1;
521 end:
522 if (*refcount_ptr == 0)
524 if (orig_refcount > 0)
525 set_to_zero = true;
527 is_zero = true;
530 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531 *do_remove = (new_encountered_refcount && set_to_zero);
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535 gomp_map_0len_lookup found oldn for newn.
536 Helper function of gomp_map_vars. */
538 static inline void
539 gomp_map_vars_existing (struct gomp_device_descr *devicep,
540 struct goacc_asyncqueue *aq, splay_tree_key oldn,
541 splay_tree_key newn, struct target_var_desc *tgt_var,
542 unsigned char kind, bool always_to_flag,
543 struct gomp_coalesce_buf *cbuf,
544 htab_t *refcount_set)
546 assert (kind != GOMP_MAP_ATTACH);
548 tgt_var->key = oldn;
549 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
550 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
551 tgt_var->is_attach = false;
552 tgt_var->offset = newn->host_start - oldn->host_start;
553 tgt_var->length = newn->host_end - newn->host_start;
555 if ((kind & GOMP_MAP_FLAG_FORCE)
556 || oldn->host_start > newn->host_start
557 || oldn->host_end < newn->host_end)
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("Trying to map into device [%p..%p) object when "
561 "[%p..%p) is already mapped",
562 (void *) newn->host_start, (void *) newn->host_end,
563 (void *) oldn->host_start, (void *) oldn->host_end);
566 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
567 gomp_copy_host2dev (devicep, aq,
568 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
569 + newn->host_start - oldn->host_start),
570 (void *) newn->host_start,
571 newn->host_end - newn->host_start, false, cbuf);
573 gomp_increment_refcount (oldn, refcount_set);
576 static int
577 get_kind (bool short_mapkind, void *kinds, int idx)
579 return short_mapkind ? ((unsigned short *) kinds)[idx]
580 : ((unsigned char *) kinds)[idx];
583 static void
584 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
585 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
586 struct gomp_coalesce_buf *cbuf)
588 struct gomp_device_descr *devicep = tgt->device_descr;
589 struct splay_tree_s *mem_map = &devicep->mem_map;
590 struct splay_tree_key_s cur_node;
592 cur_node.host_start = host_ptr;
593 if (cur_node.host_start == (uintptr_t) NULL)
595 cur_node.tgt_offset = (uintptr_t) NULL;
596 gomp_copy_host2dev (devicep, aq,
597 (void *) (tgt->tgt_start + target_offset),
598 (void *) &cur_node.tgt_offset, sizeof (void *),
599 true, cbuf);
600 return;
602 /* Add bias to the pointer value. */
603 cur_node.host_start += bias;
604 cur_node.host_end = cur_node.host_start;
605 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
606 if (n == NULL)
608 gomp_mutex_unlock (&devicep->lock);
609 gomp_fatal ("Pointer target of array section wasn't mapped");
611 cur_node.host_start -= n->host_start;
612 cur_node.tgt_offset
613 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
614 /* At this point tgt_offset is target address of the
615 array section. Now subtract bias to get what we want
616 to initialize the pointer with. */
617 cur_node.tgt_offset -= bias;
618 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
619 (void *) &cur_node.tgt_offset, sizeof (void *),
620 true, cbuf);
623 static void
624 gomp_map_fields_existing (struct target_mem_desc *tgt,
625 struct goacc_asyncqueue *aq, splay_tree_key n,
626 size_t first, size_t i, void **hostaddrs,
627 size_t *sizes, void *kinds,
628 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
630 struct gomp_device_descr *devicep = tgt->device_descr;
631 struct splay_tree_s *mem_map = &devicep->mem_map;
632 struct splay_tree_key_s cur_node;
633 int kind;
634 const bool short_mapkind = true;
635 const int typemask = short_mapkind ? 0xff : 0x7;
637 cur_node.host_start = (uintptr_t) hostaddrs[i];
638 cur_node.host_end = cur_node.host_start + sizes[i];
639 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
640 kind = get_kind (short_mapkind, kinds, i);
641 if (n2
642 && n2->tgt == n->tgt
643 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
645 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
646 kind & typemask, false, cbuf, refcount_set);
647 return;
649 if (sizes[i] == 0)
651 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
653 cur_node.host_start--;
654 n2 = splay_tree_lookup (mem_map, &cur_node);
655 cur_node.host_start++;
656 if (n2
657 && n2->tgt == n->tgt
658 && n2->host_start - n->host_start
659 == n2->tgt_offset - n->tgt_offset)
661 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
662 kind & typemask, false, cbuf, refcount_set);
663 return;
666 cur_node.host_end++;
667 n2 = splay_tree_lookup (mem_map, &cur_node);
668 cur_node.host_end--;
669 if (n2
670 && n2->tgt == n->tgt
671 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
673 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
674 kind & typemask, false, cbuf, refcount_set);
675 return;
678 gomp_mutex_unlock (&devicep->lock);
679 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
680 "other mapped elements from the same structure weren't mapped "
681 "together with it", (void *) cur_node.host_start,
682 (void *) cur_node.host_end);
685 attribute_hidden void
686 gomp_attach_pointer (struct gomp_device_descr *devicep,
687 struct goacc_asyncqueue *aq, splay_tree mem_map,
688 splay_tree_key n, uintptr_t attach_to, size_t bias,
689 struct gomp_coalesce_buf *cbufp)
691 struct splay_tree_key_s s;
692 size_t size, idx;
694 if (n == NULL)
696 gomp_mutex_unlock (&devicep->lock);
697 gomp_fatal ("enclosing struct not mapped for attach");
700 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
701 /* We might have a pointer in a packed struct: however we cannot have more
702 than one such pointer in each pointer-sized portion of the struct, so
703 this is safe. */
704 idx = (attach_to - n->host_start) / sizeof (void *);
706 if (!n->aux)
707 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
709 if (!n->aux->attach_count)
710 n->aux->attach_count
711 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
713 if (n->aux->attach_count[idx] < UINTPTR_MAX)
714 n->aux->attach_count[idx]++;
715 else
717 gomp_mutex_unlock (&devicep->lock);
718 gomp_fatal ("attach count overflow");
721 if (n->aux->attach_count[idx] == 1)
723 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
724 - n->host_start;
725 uintptr_t target = (uintptr_t) *(void **) attach_to;
726 splay_tree_key tn;
727 uintptr_t data;
729 if ((void *) target == NULL)
731 gomp_mutex_unlock (&devicep->lock);
732 gomp_fatal ("attempt to attach null pointer");
735 s.host_start = target + bias;
736 s.host_end = s.host_start + 1;
737 tn = splay_tree_lookup (mem_map, &s);
739 if (!tn)
741 gomp_mutex_unlock (&devicep->lock);
742 gomp_fatal ("pointer target not mapped for attach");
745 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
747 gomp_debug (1,
748 "%s: attaching host %p, target %p (struct base %p) to %p\n",
749 __FUNCTION__, (void *) attach_to, (void *) devptr,
750 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
752 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
753 sizeof (void *), true, cbufp);
755 else
756 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
757 (void *) attach_to, (int) n->aux->attach_count[idx]);
760 attribute_hidden void
761 gomp_detach_pointer (struct gomp_device_descr *devicep,
762 struct goacc_asyncqueue *aq, splay_tree_key n,
763 uintptr_t detach_from, bool finalize,
764 struct gomp_coalesce_buf *cbufp)
766 size_t idx;
768 if (n == NULL)
770 gomp_mutex_unlock (&devicep->lock);
771 gomp_fatal ("enclosing struct not mapped for detach");
774 idx = (detach_from - n->host_start) / sizeof (void *);
776 if (!n->aux || !n->aux->attach_count)
778 gomp_mutex_unlock (&devicep->lock);
779 gomp_fatal ("no attachment counters for struct");
782 if (finalize)
783 n->aux->attach_count[idx] = 1;
785 if (n->aux->attach_count[idx] == 0)
787 gomp_mutex_unlock (&devicep->lock);
788 gomp_fatal ("attach count underflow");
790 else
791 n->aux->attach_count[idx]--;
793 if (n->aux->attach_count[idx] == 0)
795 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
796 - n->host_start;
797 uintptr_t target = (uintptr_t) *(void **) detach_from;
799 gomp_debug (1,
800 "%s: detaching host %p, target %p (struct base %p) to %p\n",
801 __FUNCTION__, (void *) detach_from, (void *) devptr,
802 (void *) (n->tgt->tgt_start + n->tgt_offset),
803 (void *) target);
805 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
806 sizeof (void *), true, cbufp);
808 else
809 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
810 (void *) detach_from, (int) n->aux->attach_count[idx]);
813 attribute_hidden uintptr_t
814 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
816 if (tgt->list[i].key != NULL)
817 return tgt->list[i].key->tgt->tgt_start
818 + tgt->list[i].key->tgt_offset
819 + tgt->list[i].offset;
821 switch (tgt->list[i].offset)
823 case OFFSET_INLINED:
824 return (uintptr_t) hostaddrs[i];
826 case OFFSET_POINTER:
827 return 0;
829 case OFFSET_STRUCT:
830 return tgt->list[i + 1].key->tgt->tgt_start
831 + tgt->list[i + 1].key->tgt_offset
832 + tgt->list[i + 1].offset
833 + (uintptr_t) hostaddrs[i]
834 - (uintptr_t) hostaddrs[i + 1];
836 default:
837 return tgt->tgt_start + tgt->list[i].offset;
841 static inline __attribute__((always_inline)) struct target_mem_desc *
842 gomp_map_vars_internal (struct gomp_device_descr *devicep,
843 struct goacc_asyncqueue *aq, size_t mapnum,
844 void **hostaddrs, void **devaddrs, size_t *sizes,
845 void *kinds, bool short_mapkind,
846 htab_t *refcount_set,
847 enum gomp_map_vars_kind pragma_kind)
849 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
850 bool has_firstprivate = false;
851 bool has_always_ptrset = false;
852 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
853 const int rshift = short_mapkind ? 8 : 3;
854 const int typemask = short_mapkind ? 0xff : 0x7;
855 struct splay_tree_s *mem_map = &devicep->mem_map;
856 struct splay_tree_key_s cur_node;
857 struct target_mem_desc *tgt
858 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
859 tgt->list_count = mapnum;
860 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
861 tgt->device_descr = devicep;
862 tgt->prev = NULL;
863 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
865 if (mapnum == 0)
867 tgt->tgt_start = 0;
868 tgt->tgt_end = 0;
869 return tgt;
872 tgt_align = sizeof (void *);
873 tgt_size = 0;
874 cbuf.chunks = NULL;
875 cbuf.chunk_cnt = -1;
876 cbuf.use_cnt = 0;
877 cbuf.buf = NULL;
878 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
880 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
881 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
882 cbuf.chunk_cnt = 0;
884 if (pragma_kind == GOMP_MAP_VARS_TARGET)
886 size_t align = 4 * sizeof (void *);
887 tgt_align = align;
888 tgt_size = mapnum * sizeof (void *);
889 cbuf.chunk_cnt = 1;
890 cbuf.use_cnt = 1 + (mapnum > 1);
891 cbuf.chunks[0].start = 0;
892 cbuf.chunks[0].end = tgt_size;
895 gomp_mutex_lock (&devicep->lock);
896 if (devicep->state == GOMP_DEVICE_FINALIZED)
898 gomp_mutex_unlock (&devicep->lock);
899 free (tgt);
900 return NULL;
903 for (i = 0; i < mapnum; i++)
905 int kind = get_kind (short_mapkind, kinds, i);
906 if (hostaddrs[i] == NULL
907 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
909 tgt->list[i].key = NULL;
910 tgt->list[i].offset = OFFSET_INLINED;
911 continue;
913 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
914 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
916 tgt->list[i].key = NULL;
917 if (!not_found_cnt)
919 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
920 on a separate construct prior to using use_device_{addr,ptr}.
921 In OpenMP 5.0, map directives need to be ordered by the
922 middle-end before the use_device_* clauses. If
923 !not_found_cnt, all mappings requested (if any) are already
924 mapped, so use_device_{addr,ptr} can be resolved right away.
925 Otherwise, if not_found_cnt, gomp_map_lookup might fail
926 now but would succeed after performing the mappings in the
927 following loop. We can't defer this always to the second
928 loop, because it is not even invoked when !not_found_cnt
929 after the first loop. */
930 cur_node.host_start = (uintptr_t) hostaddrs[i];
931 cur_node.host_end = cur_node.host_start;
932 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
933 if (n != NULL)
935 cur_node.host_start -= n->host_start;
936 hostaddrs[i]
937 = (void *) (n->tgt->tgt_start + n->tgt_offset
938 + cur_node.host_start);
940 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
942 gomp_mutex_unlock (&devicep->lock);
943 gomp_fatal ("use_device_ptr pointer wasn't mapped");
945 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
946 /* If not present, continue using the host address. */
948 else
949 __builtin_unreachable ();
950 tgt->list[i].offset = OFFSET_INLINED;
952 else
953 tgt->list[i].offset = 0;
954 continue;
956 else if ((kind & typemask) == GOMP_MAP_STRUCT)
958 size_t first = i + 1;
959 size_t last = i + sizes[i];
960 cur_node.host_start = (uintptr_t) hostaddrs[i];
961 cur_node.host_end = (uintptr_t) hostaddrs[last]
962 + sizes[last];
963 tgt->list[i].key = NULL;
964 tgt->list[i].offset = OFFSET_STRUCT;
965 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
966 if (n == NULL)
968 size_t align = (size_t) 1 << (kind >> rshift);
969 if (tgt_align < align)
970 tgt_align = align;
971 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
972 tgt_size = (tgt_size + align - 1) & ~(align - 1);
973 tgt_size += cur_node.host_end - cur_node.host_start;
974 not_found_cnt += last - i;
975 for (i = first; i <= last; i++)
977 tgt->list[i].key = NULL;
978 if (!aq
979 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
980 & typemask))
981 gomp_coalesce_buf_add (&cbuf,
982 tgt_size - cur_node.host_end
983 + (uintptr_t) hostaddrs[i],
984 sizes[i]);
986 i--;
987 continue;
989 for (i = first; i <= last; i++)
990 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
991 sizes, kinds, NULL, refcount_set);
992 i--;
993 continue;
995 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
997 tgt->list[i].key = NULL;
998 tgt->list[i].offset = OFFSET_POINTER;
999 has_firstprivate = true;
1000 continue;
1002 else if ((kind & typemask) == GOMP_MAP_ATTACH)
1004 tgt->list[i].key = NULL;
1005 has_firstprivate = true;
1006 continue;
1008 cur_node.host_start = (uintptr_t) hostaddrs[i];
1009 if (!GOMP_MAP_POINTER_P (kind & typemask))
1010 cur_node.host_end = cur_node.host_start + sizes[i];
1011 else
1012 cur_node.host_end = cur_node.host_start + sizeof (void *);
1013 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1015 tgt->list[i].key = NULL;
1017 size_t align = (size_t) 1 << (kind >> rshift);
1018 if (tgt_align < align)
1019 tgt_align = align;
1020 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1021 if (!aq)
1022 gomp_coalesce_buf_add (&cbuf, tgt_size,
1023 cur_node.host_end - cur_node.host_start);
1024 tgt_size += cur_node.host_end - cur_node.host_start;
1025 has_firstprivate = true;
1026 continue;
1028 splay_tree_key n;
1029 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1031 n = gomp_map_0len_lookup (mem_map, &cur_node);
1032 if (!n)
1034 tgt->list[i].key = NULL;
1035 tgt->list[i].offset = OFFSET_POINTER;
1036 continue;
1039 else
1040 n = splay_tree_lookup (mem_map, &cur_node);
1041 if (n && n->refcount != REFCOUNT_LINK)
1043 int always_to_cnt = 0;
1044 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1046 bool has_nullptr = false;
1047 size_t j;
1048 for (j = 0; j < n->tgt->list_count; j++)
1049 if (n->tgt->list[j].key == n)
1051 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1052 break;
1054 if (n->tgt->list_count == 0)
1056 /* 'declare target'; assume has_nullptr; it could also be
1057 statically assigned pointer, but that it should be to
1058 the equivalent variable on the host. */
1059 assert (n->refcount == REFCOUNT_INFINITY);
1060 has_nullptr = true;
1062 else
1063 assert (j < n->tgt->list_count);
1064 /* Re-map the data if there is an 'always' modifier or if it a
1065 null pointer was there and non a nonnull has been found; that
1066 permits transparent re-mapping for Fortran array descriptors
1067 which were previously mapped unallocated. */
1068 for (j = i + 1; j < mapnum; j++)
1070 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1071 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1072 && (!has_nullptr
1073 || !GOMP_MAP_POINTER_P (ptr_kind)
1074 || *(void **) hostaddrs[j] == NULL))
1075 break;
1076 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1077 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1078 > cur_node.host_end))
1079 break;
1080 else
1082 has_always_ptrset = true;
1083 ++always_to_cnt;
1087 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1088 kind & typemask, always_to_cnt > 0, NULL,
1089 refcount_set);
1090 i += always_to_cnt;
1092 else
1094 tgt->list[i].key = NULL;
1096 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1098 /* Not present, hence, skip entry - including its MAP_POINTER,
1099 when existing. */
1100 tgt->list[i].offset = OFFSET_POINTER;
1101 if (i + 1 < mapnum
1102 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1103 == GOMP_MAP_POINTER))
1105 ++i;
1106 tgt->list[i].key = NULL;
1107 tgt->list[i].offset = 0;
1109 continue;
1111 size_t align = (size_t) 1 << (kind >> rshift);
1112 not_found_cnt++;
1113 if (tgt_align < align)
1114 tgt_align = align;
1115 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1116 if (!aq
1117 && gomp_to_device_kind_p (kind & typemask))
1118 gomp_coalesce_buf_add (&cbuf, tgt_size,
1119 cur_node.host_end - cur_node.host_start);
1120 tgt_size += cur_node.host_end - cur_node.host_start;
1121 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1123 size_t j;
1124 int kind;
1125 for (j = i + 1; j < mapnum; j++)
1126 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1127 kinds, j)) & typemask))
1128 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1129 break;
1130 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1131 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1132 > cur_node.host_end))
1133 break;
1134 else
1136 tgt->list[j].key = NULL;
1137 i++;
1143 if (devaddrs)
1145 if (mapnum != 1)
1147 gomp_mutex_unlock (&devicep->lock);
1148 gomp_fatal ("unexpected aggregation");
1150 tgt->to_free = devaddrs[0];
1151 tgt->tgt_start = (uintptr_t) tgt->to_free;
1152 tgt->tgt_end = tgt->tgt_start + sizes[0];
1154 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1156 /* Allocate tgt_align aligned tgt_size block of memory. */
1157 /* FIXME: Perhaps change interface to allocate properly aligned
1158 memory. */
1159 tgt->to_free = devicep->alloc_func (devicep->target_id,
1160 tgt_size + tgt_align - 1);
1161 if (!tgt->to_free)
1163 gomp_mutex_unlock (&devicep->lock);
1164 gomp_fatal ("device memory allocation fail");
1167 tgt->tgt_start = (uintptr_t) tgt->to_free;
1168 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1169 tgt->tgt_end = tgt->tgt_start + tgt_size;
1171 if (cbuf.use_cnt == 1)
1172 cbuf.chunk_cnt--;
1173 if (cbuf.chunk_cnt > 0)
1175 cbuf.buf
1176 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1177 if (cbuf.buf)
1179 cbuf.tgt = tgt;
1180 cbufp = &cbuf;
1184 else
1186 tgt->to_free = NULL;
1187 tgt->tgt_start = 0;
1188 tgt->tgt_end = 0;
1191 tgt_size = 0;
1192 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1193 tgt_size = mapnum * sizeof (void *);
1195 tgt->array = NULL;
1196 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1198 if (not_found_cnt)
1199 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1200 splay_tree_node array = tgt->array;
1201 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1202 uintptr_t field_tgt_base = 0;
1203 splay_tree_key field_tgt_structelem_first = NULL;
1205 for (i = 0; i < mapnum; i++)
1206 if (has_always_ptrset
1207 && tgt->list[i].key
1208 && (get_kind (short_mapkind, kinds, i) & typemask)
1209 == GOMP_MAP_TO_PSET)
1211 splay_tree_key k = tgt->list[i].key;
1212 bool has_nullptr = false;
1213 size_t j;
1214 for (j = 0; j < k->tgt->list_count; j++)
1215 if (k->tgt->list[j].key == k)
1217 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1218 break;
1220 if (k->tgt->list_count == 0)
1221 has_nullptr = true;
1222 else
1223 assert (j < k->tgt->list_count);
1225 tgt->list[i].has_null_ptr_assoc = false;
1226 for (j = i + 1; j < mapnum; j++)
1228 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1229 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1230 && (!has_nullptr
1231 || !GOMP_MAP_POINTER_P (ptr_kind)
1232 || *(void **) hostaddrs[j] == NULL))
1233 break;
1234 else if ((uintptr_t) hostaddrs[j] < k->host_start
1235 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1236 > k->host_end))
1237 break;
1238 else
1240 if (*(void **) hostaddrs[j] == NULL)
1241 tgt->list[i].has_null_ptr_assoc = true;
1242 tgt->list[j].key = k;
1243 tgt->list[j].copy_from = false;
1244 tgt->list[j].always_copy_from = false;
1245 tgt->list[j].is_attach = false;
1246 gomp_increment_refcount (k, refcount_set);
1247 gomp_map_pointer (k->tgt, aq,
1248 (uintptr_t) *(void **) hostaddrs[j],
1249 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1250 - k->host_start),
1251 sizes[j], cbufp);
1254 i = j - 1;
1256 else if (tgt->list[i].key == NULL)
1258 int kind = get_kind (short_mapkind, kinds, i);
1259 if (hostaddrs[i] == NULL)
1260 continue;
1261 switch (kind & typemask)
1263 size_t align, len, first, last;
1264 splay_tree_key n;
1265 case GOMP_MAP_FIRSTPRIVATE:
1266 align = (size_t) 1 << (kind >> rshift);
1267 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1268 tgt->list[i].offset = tgt_size;
1269 len = sizes[i];
1270 gomp_copy_host2dev (devicep, aq,
1271 (void *) (tgt->tgt_start + tgt_size),
1272 (void *) hostaddrs[i], len, false, cbufp);
1273 tgt_size += len;
1274 continue;
1275 case GOMP_MAP_FIRSTPRIVATE_INT:
1276 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1277 continue;
1278 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1279 /* The OpenACC 'host_data' construct only allows 'use_device'
1280 "mapping" clauses, so in the first loop, 'not_found_cnt'
1281 must always have been zero, so all OpenACC 'use_device'
1282 clauses have already been handled. (We can only easily test
1283 'use_device' with 'if_present' clause here.) */
1284 assert (tgt->list[i].offset == OFFSET_INLINED);
1285 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1286 code conceptually simple, similar to the first loop. */
1287 case GOMP_MAP_USE_DEVICE_PTR:
1288 if (tgt->list[i].offset == 0)
1290 cur_node.host_start = (uintptr_t) hostaddrs[i];
1291 cur_node.host_end = cur_node.host_start;
1292 n = gomp_map_lookup (mem_map, &cur_node);
1293 if (n != NULL)
1295 cur_node.host_start -= n->host_start;
1296 hostaddrs[i]
1297 = (void *) (n->tgt->tgt_start + n->tgt_offset
1298 + cur_node.host_start);
1300 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1302 gomp_mutex_unlock (&devicep->lock);
1303 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1305 else if ((kind & typemask)
1306 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1307 /* If not present, continue using the host address. */
1309 else
1310 __builtin_unreachable ();
1311 tgt->list[i].offset = OFFSET_INLINED;
1313 continue;
1314 case GOMP_MAP_STRUCT:
1315 first = i + 1;
1316 last = i + sizes[i];
1317 cur_node.host_start = (uintptr_t) hostaddrs[i];
1318 cur_node.host_end = (uintptr_t) hostaddrs[last]
1319 + sizes[last];
1320 if (tgt->list[first].key != NULL)
1321 continue;
1322 n = splay_tree_lookup (mem_map, &cur_node);
1323 if (n == NULL)
1325 size_t align = (size_t) 1 << (kind >> rshift);
1326 tgt_size -= (uintptr_t) hostaddrs[first]
1327 - (uintptr_t) hostaddrs[i];
1328 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1329 tgt_size += (uintptr_t) hostaddrs[first]
1330 - (uintptr_t) hostaddrs[i];
1331 field_tgt_base = (uintptr_t) hostaddrs[first];
1332 field_tgt_offset = tgt_size;
1333 field_tgt_clear = last;
1334 field_tgt_structelem_first = NULL;
1335 tgt_size += cur_node.host_end
1336 - (uintptr_t) hostaddrs[first];
1337 continue;
1339 for (i = first; i <= last; i++)
1340 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1341 sizes, kinds, cbufp, refcount_set);
1342 i--;
1343 continue;
1344 case GOMP_MAP_ALWAYS_POINTER:
1345 cur_node.host_start = (uintptr_t) hostaddrs[i];
1346 cur_node.host_end = cur_node.host_start + sizeof (void *);
1347 n = splay_tree_lookup (mem_map, &cur_node);
1348 if (n == NULL
1349 || n->host_start > cur_node.host_start
1350 || n->host_end < cur_node.host_end)
1352 gomp_mutex_unlock (&devicep->lock);
1353 gomp_fatal ("always pointer not mapped");
1355 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1356 != GOMP_MAP_ALWAYS_POINTER)
1357 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1358 if (cur_node.tgt_offset)
1359 cur_node.tgt_offset -= sizes[i];
1360 gomp_copy_host2dev (devicep, aq,
1361 (void *) (n->tgt->tgt_start
1362 + n->tgt_offset
1363 + cur_node.host_start
1364 - n->host_start),
1365 (void *) &cur_node.tgt_offset,
1366 sizeof (void *), true, cbufp);
1367 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1368 + cur_node.host_start - n->host_start;
1369 continue;
1370 case GOMP_MAP_IF_PRESENT:
1371 /* Not present - otherwise handled above. Skip over its
1372 MAP_POINTER as well. */
1373 if (i + 1 < mapnum
1374 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1375 == GOMP_MAP_POINTER))
1376 ++i;
1377 continue;
1378 case GOMP_MAP_ATTACH:
1380 cur_node.host_start = (uintptr_t) hostaddrs[i];
1381 cur_node.host_end = cur_node.host_start + sizeof (void *);
1382 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1383 if (n != NULL)
1385 tgt->list[i].key = n;
1386 tgt->list[i].offset = cur_node.host_start - n->host_start;
1387 tgt->list[i].length = n->host_end - n->host_start;
1388 tgt->list[i].copy_from = false;
1389 tgt->list[i].always_copy_from = false;
1390 tgt->list[i].is_attach = true;
1391 /* OpenACC 'attach'/'detach' doesn't affect
1392 structured/dynamic reference counts ('n->refcount',
1393 'n->dynamic_refcount'). */
1395 gomp_attach_pointer (devicep, aq, mem_map, n,
1396 (uintptr_t) hostaddrs[i], sizes[i],
1397 cbufp);
1399 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1401 gomp_mutex_unlock (&devicep->lock);
1402 gomp_fatal ("outer struct not mapped for attach");
1404 continue;
1406 default:
1407 break;
1409 splay_tree_key k = &array->key;
1410 k->host_start = (uintptr_t) hostaddrs[i];
1411 if (!GOMP_MAP_POINTER_P (kind & typemask))
1412 k->host_end = k->host_start + sizes[i];
1413 else
1414 k->host_end = k->host_start + sizeof (void *);
1415 splay_tree_key n = splay_tree_lookup (mem_map, k);
1416 if (n && n->refcount != REFCOUNT_LINK)
1417 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1418 kind & typemask, false, cbufp,
1419 refcount_set);
1420 else
1422 k->aux = NULL;
1423 if (n && n->refcount == REFCOUNT_LINK)
1425 /* Replace target address of the pointer with target address
1426 of mapped object in the splay tree. */
1427 splay_tree_remove (mem_map, n);
1428 k->aux
1429 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1430 k->aux->link_key = n;
1432 size_t align = (size_t) 1 << (kind >> rshift);
1433 tgt->list[i].key = k;
1434 k->tgt = tgt;
1435 k->refcount = 0;
1436 k->dynamic_refcount = 0;
1437 if (field_tgt_clear != FIELD_TGT_EMPTY)
1439 k->tgt_offset = k->host_start - field_tgt_base
1440 + field_tgt_offset;
1441 if (openmp_p)
1443 k->refcount = REFCOUNT_STRUCTELEM;
1444 if (field_tgt_structelem_first == NULL)
1446 /* Set to first structure element of sequence. */
1447 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1448 field_tgt_structelem_first = k;
1450 else
1451 /* Point to refcount of leading element, but do not
1452 increment again. */
1453 k->structelem_refcount_ptr
1454 = &field_tgt_structelem_first->structelem_refcount;
1456 if (i == field_tgt_clear)
1458 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1459 field_tgt_structelem_first = NULL;
1462 if (i == field_tgt_clear)
1463 field_tgt_clear = FIELD_TGT_EMPTY;
1465 else
1467 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1468 k->tgt_offset = tgt_size;
1469 tgt_size += k->host_end - k->host_start;
1471 /* First increment, from 0 to 1. gomp_increment_refcount
1472 encapsulates the different increment cases, so use this
1473 instead of directly setting 1 during initialization. */
1474 gomp_increment_refcount (k, refcount_set);
1476 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1477 tgt->list[i].always_copy_from
1478 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1479 tgt->list[i].is_attach = false;
1480 tgt->list[i].offset = 0;
1481 tgt->list[i].length = k->host_end - k->host_start;
1482 tgt->refcount++;
1483 array->left = NULL;
1484 array->right = NULL;
1485 splay_tree_insert (mem_map, array);
1486 switch (kind & typemask)
1488 case GOMP_MAP_ALLOC:
1489 case GOMP_MAP_FROM:
1490 case GOMP_MAP_FORCE_ALLOC:
1491 case GOMP_MAP_FORCE_FROM:
1492 case GOMP_MAP_ALWAYS_FROM:
1493 break;
1494 case GOMP_MAP_TO:
1495 case GOMP_MAP_TOFROM:
1496 case GOMP_MAP_FORCE_TO:
1497 case GOMP_MAP_FORCE_TOFROM:
1498 case GOMP_MAP_ALWAYS_TO:
1499 case GOMP_MAP_ALWAYS_TOFROM:
1500 gomp_copy_host2dev (devicep, aq,
1501 (void *) (tgt->tgt_start
1502 + k->tgt_offset),
1503 (void *) k->host_start,
1504 k->host_end - k->host_start,
1505 false, cbufp);
1506 break;
1507 case GOMP_MAP_POINTER:
1508 gomp_map_pointer (tgt, aq,
1509 (uintptr_t) *(void **) k->host_start,
1510 k->tgt_offset, sizes[i], cbufp);
1511 break;
1512 case GOMP_MAP_TO_PSET:
1513 gomp_copy_host2dev (devicep, aq,
1514 (void *) (tgt->tgt_start
1515 + k->tgt_offset),
1516 (void *) k->host_start,
1517 k->host_end - k->host_start,
1518 false, cbufp);
1519 tgt->list[i].has_null_ptr_assoc = false;
1521 for (j = i + 1; j < mapnum; j++)
1523 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1524 & typemask);
1525 if (!GOMP_MAP_POINTER_P (ptr_kind)
1526 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1527 break;
1528 else if ((uintptr_t) hostaddrs[j] < k->host_start
1529 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1530 > k->host_end))
1531 break;
1532 else
1534 tgt->list[j].key = k;
1535 tgt->list[j].copy_from = false;
1536 tgt->list[j].always_copy_from = false;
1537 tgt->list[j].is_attach = false;
1538 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1539 /* For OpenMP, the use of refcount_sets causes
1540 errors if we set k->refcount = 1 above but also
1541 increment it again here, for decrementing will
1542 not properly match, since we decrement only once
1543 for each key's refcount. Therefore avoid this
1544 increment for OpenMP constructs. */
1545 if (!openmp_p)
1546 gomp_increment_refcount (k, refcount_set);
1547 gomp_map_pointer (tgt, aq,
1548 (uintptr_t) *(void **) hostaddrs[j],
1549 k->tgt_offset
1550 + ((uintptr_t) hostaddrs[j]
1551 - k->host_start),
1552 sizes[j], cbufp);
1555 i = j - 1;
1556 break;
1557 case GOMP_MAP_FORCE_PRESENT:
1559 /* We already looked up the memory region above and it
1560 was missing. */
1561 size_t size = k->host_end - k->host_start;
1562 gomp_mutex_unlock (&devicep->lock);
1563 #ifdef HAVE_INTTYPES_H
1564 gomp_fatal ("present clause: !acc_is_present (%p, "
1565 "%"PRIu64" (0x%"PRIx64"))",
1566 (void *) k->host_start,
1567 (uint64_t) size, (uint64_t) size);
1568 #else
1569 gomp_fatal ("present clause: !acc_is_present (%p, "
1570 "%lu (0x%lx))", (void *) k->host_start,
1571 (unsigned long) size, (unsigned long) size);
1572 #endif
1574 break;
1575 case GOMP_MAP_FORCE_DEVICEPTR:
1576 assert (k->host_end - k->host_start == sizeof (void *));
1577 gomp_copy_host2dev (devicep, aq,
1578 (void *) (tgt->tgt_start
1579 + k->tgt_offset),
1580 (void *) k->host_start,
1581 sizeof (void *), false, cbufp);
1582 break;
1583 default:
1584 gomp_mutex_unlock (&devicep->lock);
1585 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1586 kind);
1589 if (k->aux && k->aux->link_key)
1591 /* Set link pointer on target to the device address of the
1592 mapped object. */
1593 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1594 /* We intentionally do not use coalescing here, as it's not
1595 data allocated by the current call to this function. */
1596 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1597 &tgt_addr, sizeof (void *), true, NULL);
1599 array++;
1604 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1606 for (i = 0; i < mapnum; i++)
1608 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1609 gomp_copy_host2dev (devicep, aq,
1610 (void *) (tgt->tgt_start + i * sizeof (void *)),
1611 (void *) &cur_node.tgt_offset, sizeof (void *),
1612 true, cbufp);
1616 if (cbufp)
1618 /* See 'gomp_coalesce_buf_add'. */
1619 assert (!aq);
1621 long c = 0;
1622 for (c = 0; c < cbuf.chunk_cnt; ++c)
1623 gomp_copy_host2dev (devicep, aq,
1624 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1625 (char *) cbuf.buf + (cbuf.chunks[c].start
1626 - cbuf.chunks[0].start),
1627 cbuf.chunks[c].end - cbuf.chunks[c].start,
1628 true, NULL);
1629 free (cbuf.buf);
1630 cbuf.buf = NULL;
1631 cbufp = NULL;
1634 /* If the variable from "omp target enter data" map-list was already mapped,
1635 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1636 gomp_exit_data. */
1637 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1639 free (tgt);
1640 tgt = NULL;
1643 gomp_mutex_unlock (&devicep->lock);
1644 return tgt;
1647 static struct target_mem_desc *
1648 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1649 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1650 bool short_mapkind, htab_t *refcount_set,
1651 enum gomp_map_vars_kind pragma_kind)
1653 /* This management of a local refcount_set is for convenience of callers
1654 who do not share a refcount_set over multiple map/unmap uses. */
1655 htab_t local_refcount_set = NULL;
1656 if (refcount_set == NULL)
1658 local_refcount_set = htab_create (mapnum);
1659 refcount_set = &local_refcount_set;
1662 struct target_mem_desc *tgt;
1663 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1664 sizes, kinds, short_mapkind, refcount_set,
1665 pragma_kind);
1666 if (local_refcount_set)
1667 htab_free (local_refcount_set);
1669 return tgt;
1672 attribute_hidden struct target_mem_desc *
1673 goacc_map_vars (struct gomp_device_descr *devicep,
1674 struct goacc_asyncqueue *aq, size_t mapnum,
1675 void **hostaddrs, void **devaddrs, size_t *sizes,
1676 void *kinds, bool short_mapkind,
1677 enum gomp_map_vars_kind pragma_kind)
1679 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1680 sizes, kinds, short_mapkind, NULL,
1681 GOMP_MAP_VARS_OPENACC | pragma_kind);
1684 static void
1685 gomp_unmap_tgt (struct target_mem_desc *tgt)
1687 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1688 if (tgt->tgt_end)
1689 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1691 free (tgt->array);
1692 free (tgt);
1695 static bool
1696 gomp_unref_tgt (void *ptr)
1698 bool is_tgt_unmapped = false;
1700 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1702 if (tgt->refcount > 1)
1703 tgt->refcount--;
1704 else
1706 gomp_unmap_tgt (tgt);
1707 is_tgt_unmapped = true;
1710 return is_tgt_unmapped;
1713 static void
1714 gomp_unref_tgt_void (void *ptr)
1716 (void) gomp_unref_tgt (ptr);
1719 static void
1720 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1722 splay_tree_remove (sp, k);
1723 if (k->aux)
1725 if (k->aux->link_key)
1726 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1727 if (k->aux->attach_count)
1728 free (k->aux->attach_count);
1729 free (k->aux);
1730 k->aux = NULL;
1734 static inline __attribute__((always_inline)) bool
1735 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1736 struct goacc_asyncqueue *aq)
1738 bool is_tgt_unmapped = false;
1740 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1742 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1743 /* Infer the splay_tree_key of the first structelem key using the
1744 pointer to the first structleme_refcount. */
1745 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1746 - offsetof (struct splay_tree_key_s,
1747 structelem_refcount));
1748 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1750 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1751 with the splay_tree_keys embedded inside. */
1752 splay_tree_node node =
1753 (splay_tree_node) ((char *) k
1754 - offsetof (struct splay_tree_node_s, key));
1755 while (true)
1757 /* Starting from the _FIRST key, and continue for all following
1758 sibling keys. */
1759 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1760 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1761 break;
1762 else
1763 k = &(++node)->key;
1766 else
1767 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1769 if (aq)
1770 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1771 (void *) k->tgt);
1772 else
1773 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1774 return is_tgt_unmapped;
1777 attribute_hidden bool
1778 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1780 return gomp_remove_var_internal (devicep, k, NULL);
1783 /* Remove a variable asynchronously. This actually removes the variable
1784 mapping immediately, but retains the linked target_mem_desc until the
1785 asynchronous operation has completed (as it may still refer to target
1786 memory). The device lock must be held before entry, and remains locked on
1787 exit. */
1789 attribute_hidden void
1790 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1791 struct goacc_asyncqueue *aq)
1793 (void) gomp_remove_var_internal (devicep, k, aq);
1796 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1797 variables back from device to host: if it is false, it is assumed that this
1798 has been done already. */
1800 static inline __attribute__((always_inline)) void
1801 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1802 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1804 struct gomp_device_descr *devicep = tgt->device_descr;
1806 if (tgt->list_count == 0)
1808 free (tgt);
1809 return;
1812 gomp_mutex_lock (&devicep->lock);
1813 if (devicep->state == GOMP_DEVICE_FINALIZED)
1815 gomp_mutex_unlock (&devicep->lock);
1816 free (tgt->array);
1817 free (tgt);
1818 return;
1821 size_t i;
1823 /* We must perform detachments before any copies back to the host. */
1824 for (i = 0; i < tgt->list_count; i++)
1826 splay_tree_key k = tgt->list[i].key;
1828 if (k != NULL && tgt->list[i].is_attach)
1829 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1830 + tgt->list[i].offset,
1831 false, NULL);
1834 for (i = 0; i < tgt->list_count; i++)
1836 splay_tree_key k = tgt->list[i].key;
1837 if (k == NULL)
1838 continue;
1840 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1841 counts ('n->refcount', 'n->dynamic_refcount'). */
1842 if (tgt->list[i].is_attach)
1843 continue;
1845 bool do_copy, do_remove;
1846 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1848 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1849 || tgt->list[i].always_copy_from)
1850 gomp_copy_dev2host (devicep, aq,
1851 (void *) (k->host_start + tgt->list[i].offset),
1852 (void *) (k->tgt->tgt_start + k->tgt_offset
1853 + tgt->list[i].offset),
1854 tgt->list[i].length);
1855 if (do_remove)
1857 struct target_mem_desc *k_tgt = k->tgt;
1858 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1859 /* It would be bad if TGT got unmapped while we're still iterating
1860 over its LIST_COUNT, and also expect to use it in the following
1861 code. */
1862 assert (!is_tgt_unmapped
1863 || k_tgt != tgt);
1867 if (aq)
1868 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1869 (void *) tgt);
1870 else
1871 gomp_unref_tgt ((void *) tgt);
1873 gomp_mutex_unlock (&devicep->lock);
1876 static void
1877 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1878 htab_t *refcount_set)
1880 /* This management of a local refcount_set is for convenience of callers
1881 who do not share a refcount_set over multiple map/unmap uses. */
1882 htab_t local_refcount_set = NULL;
1883 if (refcount_set == NULL)
1885 local_refcount_set = htab_create (tgt->list_count);
1886 refcount_set = &local_refcount_set;
1889 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1891 if (local_refcount_set)
1892 htab_free (local_refcount_set);
1895 attribute_hidden void
1896 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1897 struct goacc_asyncqueue *aq)
1899 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1902 static void
1903 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1904 size_t *sizes, void *kinds, bool short_mapkind)
1906 size_t i;
1907 struct splay_tree_key_s cur_node;
1908 const int typemask = short_mapkind ? 0xff : 0x7;
1910 if (!devicep)
1911 return;
1913 if (mapnum == 0)
1914 return;
1916 gomp_mutex_lock (&devicep->lock);
1917 if (devicep->state == GOMP_DEVICE_FINALIZED)
1919 gomp_mutex_unlock (&devicep->lock);
1920 return;
1923 for (i = 0; i < mapnum; i++)
1924 if (sizes[i])
1926 cur_node.host_start = (uintptr_t) hostaddrs[i];
1927 cur_node.host_end = cur_node.host_start + sizes[i];
1928 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1929 if (n)
1931 int kind = get_kind (short_mapkind, kinds, i);
1932 if (n->host_start > cur_node.host_start
1933 || n->host_end < cur_node.host_end)
1935 gomp_mutex_unlock (&devicep->lock);
1936 gomp_fatal ("Trying to update [%p..%p) object when "
1937 "only [%p..%p) is mapped",
1938 (void *) cur_node.host_start,
1939 (void *) cur_node.host_end,
1940 (void *) n->host_start,
1941 (void *) n->host_end);
1945 void *hostaddr = (void *) cur_node.host_start;
1946 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1947 + cur_node.host_start - n->host_start);
1948 size_t size = cur_node.host_end - cur_node.host_start;
1950 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1951 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1952 false, NULL);
1953 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1954 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1957 gomp_mutex_unlock (&devicep->lock);
1960 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1961 And insert to splay tree the mapping between addresses from HOST_TABLE and
1962 from loaded target image. We rely in the host and device compiler
1963 emitting variable and functions in the same order. */
1965 static void
1966 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1967 const void *host_table, const void *target_data,
1968 bool is_register_lock)
1970 void **host_func_table = ((void ***) host_table)[0];
1971 void **host_funcs_end = ((void ***) host_table)[1];
1972 void **host_var_table = ((void ***) host_table)[2];
1973 void **host_vars_end = ((void ***) host_table)[3];
1975 /* The func table contains only addresses, the var table contains addresses
1976 and corresponding sizes. */
1977 int num_funcs = host_funcs_end - host_func_table;
1978 int num_vars = (host_vars_end - host_var_table) / 2;
1980 /* Others currently is only 'device_num' */
1981 int num_others = 1;
1983 /* Load image to device and get target addresses for the image. */
1984 struct addr_pair *target_table = NULL;
1985 int i, num_target_entries;
1987 num_target_entries
1988 = devicep->load_image_func (devicep->target_id, version,
1989 target_data, &target_table);
1991 if (num_target_entries != num_funcs + num_vars
1992 /* Others (device_num) are included as trailing entries in pair list. */
1993 && num_target_entries != num_funcs + num_vars + num_others)
1995 gomp_mutex_unlock (&devicep->lock);
1996 if (is_register_lock)
1997 gomp_mutex_unlock (&register_lock);
1998 gomp_fatal ("Cannot map target functions or variables"
1999 " (expected %u, have %u)", num_funcs + num_vars,
2000 num_target_entries);
2003 /* Insert host-target address mapping into splay tree. */
2004 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2005 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2006 tgt->refcount = REFCOUNT_INFINITY;
2007 tgt->tgt_start = 0;
2008 tgt->tgt_end = 0;
2009 tgt->to_free = NULL;
2010 tgt->prev = NULL;
2011 tgt->list_count = 0;
2012 tgt->device_descr = devicep;
2013 splay_tree_node array = tgt->array;
2015 for (i = 0; i < num_funcs; i++)
2017 splay_tree_key k = &array->key;
2018 k->host_start = (uintptr_t) host_func_table[i];
2019 k->host_end = k->host_start + 1;
2020 k->tgt = tgt;
2021 k->tgt_offset = target_table[i].start;
2022 k->refcount = REFCOUNT_INFINITY;
2023 k->dynamic_refcount = 0;
2024 k->aux = NULL;
2025 array->left = NULL;
2026 array->right = NULL;
2027 splay_tree_insert (&devicep->mem_map, array);
2028 array++;
2031 /* Most significant bit of the size in host and target tables marks
2032 "omp declare target link" variables. */
2033 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2034 const uintptr_t size_mask = ~link_bit;
2036 for (i = 0; i < num_vars; i++)
2038 struct addr_pair *target_var = &target_table[num_funcs + i];
2039 uintptr_t target_size = target_var->end - target_var->start;
2040 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2042 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2044 gomp_mutex_unlock (&devicep->lock);
2045 if (is_register_lock)
2046 gomp_mutex_unlock (&register_lock);
2047 gomp_fatal ("Cannot map target variables (size mismatch)");
2050 splay_tree_key k = &array->key;
2051 k->host_start = (uintptr_t) host_var_table[i * 2];
2052 k->host_end
2053 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2054 k->tgt = tgt;
2055 k->tgt_offset = target_var->start;
2056 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2057 k->dynamic_refcount = 0;
2058 k->aux = NULL;
2059 array->left = NULL;
2060 array->right = NULL;
2061 splay_tree_insert (&devicep->mem_map, array);
2062 array++;
2065 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2066 where plugin does not return this entry. */
2067 if (num_funcs + num_vars < num_target_entries)
2069 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2070 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2071 was found in this image. */
2072 if (device_num_var->start != 0)
2074 /* The index of the devicep within devices[] is regarded as its
2075 'device number', which is different from the per-device type
2076 devicep->target_id. */
2077 int device_num_val = (int) (devicep - &devices[0]);
2078 if (device_num_var->end - device_num_var->start != sizeof (int))
2080 gomp_mutex_unlock (&devicep->lock);
2081 if (is_register_lock)
2082 gomp_mutex_unlock (&register_lock);
2083 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2084 "format");
2087 /* Copy device_num value to place on device memory, hereby actually
2088 designating its device number into effect. */
2089 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2090 &device_num_val, sizeof (int), false, NULL);
2094 free (target_table);
2097 /* Unload the mappings described by target_data from device DEVICE_P.
2098 The device must be locked. */
2100 static void
2101 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2102 unsigned version,
2103 const void *host_table, const void *target_data)
2105 void **host_func_table = ((void ***) host_table)[0];
2106 void **host_funcs_end = ((void ***) host_table)[1];
2107 void **host_var_table = ((void ***) host_table)[2];
2108 void **host_vars_end = ((void ***) host_table)[3];
2110 /* The func table contains only addresses, the var table contains addresses
2111 and corresponding sizes. */
2112 int num_funcs = host_funcs_end - host_func_table;
2113 int num_vars = (host_vars_end - host_var_table) / 2;
2115 struct splay_tree_key_s k;
2116 splay_tree_key node = NULL;
2118 /* Find mapping at start of node array */
2119 if (num_funcs || num_vars)
2121 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2122 : (uintptr_t) host_var_table[0]);
2123 k.host_end = k.host_start + 1;
2124 node = splay_tree_lookup (&devicep->mem_map, &k);
2127 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2129 gomp_mutex_unlock (&devicep->lock);
2130 gomp_fatal ("image unload fail");
2133 /* Remove mappings from splay tree. */
2134 int i;
2135 for (i = 0; i < num_funcs; i++)
2137 k.host_start = (uintptr_t) host_func_table[i];
2138 k.host_end = k.host_start + 1;
2139 splay_tree_remove (&devicep->mem_map, &k);
2142 /* Most significant bit of the size in host and target tables marks
2143 "omp declare target link" variables. */
2144 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2145 const uintptr_t size_mask = ~link_bit;
2146 bool is_tgt_unmapped = false;
2148 for (i = 0; i < num_vars; i++)
2150 k.host_start = (uintptr_t) host_var_table[i * 2];
2151 k.host_end
2152 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2154 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2155 splay_tree_remove (&devicep->mem_map, &k);
2156 else
2158 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2159 is_tgt_unmapped = gomp_remove_var (devicep, n);
2163 if (node && !is_tgt_unmapped)
2165 free (node->tgt);
2166 free (node);
2170 /* This function should be called from every offload image while loading.
2171 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2172 the target, and TARGET_DATA needed by target plugin. */
2174 void
2175 GOMP_offload_register_ver (unsigned version, const void *host_table,
2176 int target_type, const void *target_data)
2178 int i;
2180 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2181 gomp_fatal ("Library too old for offload (version %u < %u)",
2182 GOMP_VERSION, GOMP_VERSION_LIB (version));
2184 gomp_mutex_lock (&register_lock);
2186 /* Load image to all initialized devices. */
2187 for (i = 0; i < num_devices; i++)
2189 struct gomp_device_descr *devicep = &devices[i];
2190 gomp_mutex_lock (&devicep->lock);
2191 if (devicep->type == target_type
2192 && devicep->state == GOMP_DEVICE_INITIALIZED)
2193 gomp_load_image_to_device (devicep, version,
2194 host_table, target_data, true);
2195 gomp_mutex_unlock (&devicep->lock);
2198 /* Insert image to array of pending images. */
2199 offload_images
2200 = gomp_realloc_unlock (offload_images,
2201 (num_offload_images + 1)
2202 * sizeof (struct offload_image_descr));
2203 offload_images[num_offload_images].version = version;
2204 offload_images[num_offload_images].type = target_type;
2205 offload_images[num_offload_images].host_table = host_table;
2206 offload_images[num_offload_images].target_data = target_data;
2208 num_offload_images++;
2209 gomp_mutex_unlock (&register_lock);
2212 void
2213 GOMP_offload_register (const void *host_table, int target_type,
2214 const void *target_data)
2216 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2219 /* This function should be called from every offload image while unloading.
2220 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2221 the target, and TARGET_DATA needed by target plugin. */
2223 void
2224 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2225 int target_type, const void *target_data)
2227 int i;
2229 gomp_mutex_lock (&register_lock);
2231 /* Unload image from all initialized devices. */
2232 for (i = 0; i < num_devices; i++)
2234 struct gomp_device_descr *devicep = &devices[i];
2235 gomp_mutex_lock (&devicep->lock);
2236 if (devicep->type == target_type
2237 && devicep->state == GOMP_DEVICE_INITIALIZED)
2238 gomp_unload_image_from_device (devicep, version,
2239 host_table, target_data);
2240 gomp_mutex_unlock (&devicep->lock);
2243 /* Remove image from array of pending images. */
2244 for (i = 0; i < num_offload_images; i++)
2245 if (offload_images[i].target_data == target_data)
2247 offload_images[i] = offload_images[--num_offload_images];
2248 break;
2251 gomp_mutex_unlock (&register_lock);
2254 void
2255 GOMP_offload_unregister (const void *host_table, int target_type,
2256 const void *target_data)
2258 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2261 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2262 must be locked on entry, and remains locked on return. */
2264 attribute_hidden void
2265 gomp_init_device (struct gomp_device_descr *devicep)
2267 int i;
2268 if (!devicep->init_device_func (devicep->target_id))
2270 gomp_mutex_unlock (&devicep->lock);
2271 gomp_fatal ("device initialization failed");
2274 /* Load to device all images registered by the moment. */
2275 for (i = 0; i < num_offload_images; i++)
2277 struct offload_image_descr *image = &offload_images[i];
2278 if (image->type == devicep->type)
2279 gomp_load_image_to_device (devicep, image->version,
2280 image->host_table, image->target_data,
2281 false);
2284 /* Initialize OpenACC asynchronous queues. */
2285 goacc_init_asyncqueues (devicep);
2287 devicep->state = GOMP_DEVICE_INITIALIZED;
2290 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2291 must be locked on entry, and remains locked on return. */
2293 attribute_hidden bool
2294 gomp_fini_device (struct gomp_device_descr *devicep)
2296 bool ret = goacc_fini_asyncqueues (devicep);
2297 ret &= devicep->fini_device_func (devicep->target_id);
2298 devicep->state = GOMP_DEVICE_FINALIZED;
2299 return ret;
2302 attribute_hidden void
2303 gomp_unload_device (struct gomp_device_descr *devicep)
2305 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2307 unsigned i;
2309 /* Unload from device all images registered at the moment. */
2310 for (i = 0; i < num_offload_images; i++)
2312 struct offload_image_descr *image = &offload_images[i];
2313 if (image->type == devicep->type)
2314 gomp_unload_image_from_device (devicep, image->version,
2315 image->host_table,
2316 image->target_data);
2321 /* Host fallback for GOMP_target{,_ext} routines. */
2323 static void
2324 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2325 struct gomp_device_descr *devicep)
2327 struct gomp_thread old_thr, *thr = gomp_thread ();
2329 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2330 && devicep != NULL)
2331 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2332 "be used for offloading");
2334 old_thr = *thr;
2335 memset (thr, '\0', sizeof (*thr));
2336 if (gomp_places_list)
2338 thr->place = old_thr.place;
2339 thr->ts.place_partition_len = gomp_places_list_len;
2341 fn (hostaddrs);
2342 gomp_free_thread (thr);
2343 *thr = old_thr;
2346 /* Calculate alignment and size requirements of a private copy of data shared
2347 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2349 static inline void
2350 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2351 unsigned short *kinds, size_t *tgt_align,
2352 size_t *tgt_size)
2354 size_t i;
2355 for (i = 0; i < mapnum; i++)
2356 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2358 size_t align = (size_t) 1 << (kinds[i] >> 8);
2359 if (*tgt_align < align)
2360 *tgt_align = align;
2361 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2362 *tgt_size += sizes[i];
2366 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2368 static inline void
2369 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2370 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2371 size_t tgt_size)
2373 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2374 if (al)
2375 tgt += tgt_align - al;
2376 tgt_size = 0;
2377 size_t i;
2378 for (i = 0; i < mapnum; i++)
2379 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2381 size_t align = (size_t) 1 << (kinds[i] >> 8);
2382 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2383 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2384 hostaddrs[i] = tgt + tgt_size;
2385 tgt_size = tgt_size + sizes[i];
2389 /* Helper function of GOMP_target{,_ext} routines. */
2391 static void *
2392 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2393 void (*host_fn) (void *))
2395 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2396 return (void *) host_fn;
2397 else
2399 gomp_mutex_lock (&devicep->lock);
2400 if (devicep->state == GOMP_DEVICE_FINALIZED)
2402 gomp_mutex_unlock (&devicep->lock);
2403 return NULL;
2406 struct splay_tree_key_s k;
2407 k.host_start = (uintptr_t) host_fn;
2408 k.host_end = k.host_start + 1;
2409 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2410 gomp_mutex_unlock (&devicep->lock);
2411 if (tgt_fn == NULL)
2412 return NULL;
2414 return (void *) tgt_fn->tgt_offset;
2418 /* Called when encountering a target directive. If DEVICE
2419 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2420 GOMP_DEVICE_HOST_FALLBACK (or any value
2421 larger than last available hw device), use host fallback.
2422 FN is address of host code, UNUSED is part of the current ABI, but
2423 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2424 with MAPNUM entries, with addresses of the host objects,
2425 sizes of the host objects (resp. for pointer kind pointer bias
2426 and assumed sizeof (void *) size) and kinds. */
2428 void
2429 GOMP_target (int device, void (*fn) (void *), const void *unused,
2430 size_t mapnum, void **hostaddrs, size_t *sizes,
2431 unsigned char *kinds)
2433 struct gomp_device_descr *devicep = resolve_device (device);
2435 void *fn_addr;
2436 if (devicep == NULL
2437 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2438 /* All shared memory devices should use the GOMP_target_ext function. */
2439 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2440 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2441 return gomp_target_fallback (fn, hostaddrs, devicep);
2443 htab_t refcount_set = htab_create (mapnum);
2444 struct target_mem_desc *tgt_vars
2445 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2446 &refcount_set, GOMP_MAP_VARS_TARGET);
2447 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2448 NULL);
2449 htab_clear (refcount_set);
2450 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2451 htab_free (refcount_set);
2454 static inline unsigned int
2455 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2457 /* If we cannot run asynchronously, simply ignore nowait. */
2458 if (devicep != NULL && devicep->async_run_func == NULL)
2459 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2461 return flags;
2464 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2465 and several arguments have been added:
2466 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2467 DEPEND is array of dependencies, see GOMP_task for details.
2469 ARGS is a pointer to an array consisting of a variable number of both
2470 device-independent and device-specific arguments, which can take one two
2471 elements where the first specifies for which device it is intended, the type
2472 and optionally also the value. If the value is not present in the first
2473 one, the whole second element the actual value. The last element of the
2474 array is a single NULL. Among the device independent can be for example
2475 NUM_TEAMS and THREAD_LIMIT.
2477 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2478 that value, or 1 if teams construct is not present, or 0, if
2479 teams construct does not have num_teams clause and so the choice is
2480 implementation defined, and -1 if it can't be determined on the host
2481 what value will GOMP_teams have on the device.
2482 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2483 body with that value, or 0, if teams construct does not have thread_limit
2484 clause or the teams construct is not present, or -1 if it can't be
2485 determined on the host what value will GOMP_teams have on the device. */
2487 void
2488 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2489 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2490 unsigned int flags, void **depend, void **args)
2492 struct gomp_device_descr *devicep = resolve_device (device);
2493 size_t tgt_align = 0, tgt_size = 0;
2494 bool fpc_done = false;
2496 flags = clear_unsupported_flags (devicep, flags);
2498 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2500 struct gomp_thread *thr = gomp_thread ();
2501 /* Create a team if we don't have any around, as nowait
2502 target tasks make sense to run asynchronously even when
2503 outside of any parallel. */
2504 if (__builtin_expect (thr->ts.team == NULL, 0))
2506 struct gomp_team *team = gomp_new_team (1);
2507 struct gomp_task *task = thr->task;
2508 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2509 team->prev_ts = thr->ts;
2510 thr->ts.team = team;
2511 thr->ts.team_id = 0;
2512 thr->ts.work_share = &team->work_shares[0];
2513 thr->ts.last_work_share = NULL;
2514 #ifdef HAVE_SYNC_BUILTINS
2515 thr->ts.single_count = 0;
2516 #endif
2517 thr->ts.static_trip = 0;
2518 thr->task = &team->implicit_task[0];
2519 gomp_init_task (thr->task, NULL, icv);
2520 if (task)
2522 thr->task = task;
2523 gomp_end_task ();
2524 free (task);
2525 thr->task = &team->implicit_task[0];
2527 else
2528 pthread_setspecific (gomp_thread_destructor, thr);
2530 if (thr->ts.team
2531 && !thr->task->final_task)
2533 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2534 sizes, kinds, flags, depend, args,
2535 GOMP_TARGET_TASK_BEFORE_MAP);
2536 return;
2540 /* If there are depend clauses, but nowait is not present
2541 (or we are in a final task), block the parent task until the
2542 dependencies are resolved and then just continue with the rest
2543 of the function as if it is a merged task. */
2544 if (depend != NULL)
2546 struct gomp_thread *thr = gomp_thread ();
2547 if (thr->task && thr->task->depend_hash)
2549 /* If we might need to wait, copy firstprivate now. */
2550 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2551 &tgt_align, &tgt_size);
2552 if (tgt_align)
2554 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2555 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2556 tgt_align, tgt_size);
2558 fpc_done = true;
2559 gomp_task_maybe_wait_for_dependencies (depend);
2563 void *fn_addr;
2564 if (devicep == NULL
2565 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2566 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2567 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2569 if (!fpc_done)
2571 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2572 &tgt_align, &tgt_size);
2573 if (tgt_align)
2575 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2576 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2577 tgt_align, tgt_size);
2580 gomp_target_fallback (fn, hostaddrs, devicep);
2581 return;
2584 struct target_mem_desc *tgt_vars;
2585 htab_t refcount_set = NULL;
2587 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2589 if (!fpc_done)
2591 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2592 &tgt_align, &tgt_size);
2593 if (tgt_align)
2595 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2596 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2597 tgt_align, tgt_size);
2600 tgt_vars = NULL;
2602 else
2604 refcount_set = htab_create (mapnum);
2605 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2606 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2608 devicep->run_func (devicep->target_id, fn_addr,
2609 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2610 args);
2611 if (tgt_vars)
2613 htab_clear (refcount_set);
2614 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2616 if (refcount_set)
2617 htab_free (refcount_set);
2620 /* Host fallback for GOMP_target_data{,_ext} routines. */
2622 static void
2623 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2625 struct gomp_task_icv *icv = gomp_icv (false);
2627 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2628 && devicep != NULL)
2629 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2630 "be used for offloading");
2632 if (icv->target_data)
2634 /* Even when doing a host fallback, if there are any active
2635 #pragma omp target data constructs, need to remember the
2636 new #pragma omp target data, otherwise GOMP_target_end_data
2637 would get out of sync. */
2638 struct target_mem_desc *tgt
2639 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2640 NULL, GOMP_MAP_VARS_DATA);
2641 tgt->prev = icv->target_data;
2642 icv->target_data = tgt;
2646 void
2647 GOMP_target_data (int device, const void *unused, size_t mapnum,
2648 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2650 struct gomp_device_descr *devicep = resolve_device (device);
2652 if (devicep == NULL
2653 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2654 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2655 return gomp_target_data_fallback (devicep);
2657 struct target_mem_desc *tgt
2658 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2659 NULL, GOMP_MAP_VARS_DATA);
2660 struct gomp_task_icv *icv = gomp_icv (true);
2661 tgt->prev = icv->target_data;
2662 icv->target_data = tgt;
2665 void
2666 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2667 size_t *sizes, unsigned short *kinds)
2669 struct gomp_device_descr *devicep = resolve_device (device);
2671 if (devicep == NULL
2672 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2673 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2674 return gomp_target_data_fallback (devicep);
2676 struct target_mem_desc *tgt
2677 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2678 NULL, GOMP_MAP_VARS_DATA);
2679 struct gomp_task_icv *icv = gomp_icv (true);
2680 tgt->prev = icv->target_data;
2681 icv->target_data = tgt;
2684 void
2685 GOMP_target_end_data (void)
2687 struct gomp_task_icv *icv = gomp_icv (false);
2688 if (icv->target_data)
2690 struct target_mem_desc *tgt = icv->target_data;
2691 icv->target_data = tgt->prev;
2692 gomp_unmap_vars (tgt, true, NULL);
2696 void
2697 GOMP_target_update (int device, const void *unused, size_t mapnum,
2698 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2700 struct gomp_device_descr *devicep = resolve_device (device);
2702 if (devicep == NULL
2703 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2704 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2705 return;
2707 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2710 void
2711 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2712 size_t *sizes, unsigned short *kinds,
2713 unsigned int flags, void **depend)
2715 struct gomp_device_descr *devicep = resolve_device (device);
2717 /* If there are depend clauses, but nowait is not present,
2718 block the parent task until the dependencies are resolved
2719 and then just continue with the rest of the function as if it
2720 is a merged task. Until we are able to schedule task during
2721 variable mapping or unmapping, ignore nowait if depend clauses
2722 are not present. */
2723 if (depend != NULL)
2725 struct gomp_thread *thr = gomp_thread ();
2726 if (thr->task && thr->task->depend_hash)
2728 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2729 && thr->ts.team
2730 && !thr->task->final_task)
2732 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2733 mapnum, hostaddrs, sizes, kinds,
2734 flags | GOMP_TARGET_FLAG_UPDATE,
2735 depend, NULL, GOMP_TARGET_TASK_DATA))
2736 return;
2738 else
2740 struct gomp_team *team = thr->ts.team;
2741 /* If parallel or taskgroup has been cancelled, don't start new
2742 tasks. */
2743 if (__builtin_expect (gomp_cancel_var, 0) && team)
2745 if (gomp_team_barrier_cancelled (&team->barrier))
2746 return;
2747 if (thr->task->taskgroup)
2749 if (thr->task->taskgroup->cancelled)
2750 return;
2751 if (thr->task->taskgroup->workshare
2752 && thr->task->taskgroup->prev
2753 && thr->task->taskgroup->prev->cancelled)
2754 return;
2758 gomp_task_maybe_wait_for_dependencies (depend);
2763 if (devicep == NULL
2764 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2765 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2766 return;
2768 struct gomp_thread *thr = gomp_thread ();
2769 struct gomp_team *team = thr->ts.team;
2770 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2771 if (__builtin_expect (gomp_cancel_var, 0) && team)
2773 if (gomp_team_barrier_cancelled (&team->barrier))
2774 return;
2775 if (thr->task->taskgroup)
2777 if (thr->task->taskgroup->cancelled)
2778 return;
2779 if (thr->task->taskgroup->workshare
2780 && thr->task->taskgroup->prev
2781 && thr->task->taskgroup->prev->cancelled)
2782 return;
2786 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2789 static void
2790 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2791 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2792 htab_t *refcount_set)
2794 const int typemask = 0xff;
2795 size_t i;
2796 gomp_mutex_lock (&devicep->lock);
2797 if (devicep->state == GOMP_DEVICE_FINALIZED)
2799 gomp_mutex_unlock (&devicep->lock);
2800 return;
2803 for (i = 0; i < mapnum; i++)
2804 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2806 struct splay_tree_key_s cur_node;
2807 cur_node.host_start = (uintptr_t) hostaddrs[i];
2808 cur_node.host_end = cur_node.host_start + sizeof (void *);
2809 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2811 if (n)
2812 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2813 false, NULL);
2816 int nrmvars = 0;
2817 splay_tree_key remove_vars[mapnum];
2819 for (i = 0; i < mapnum; i++)
2821 struct splay_tree_key_s cur_node;
2822 unsigned char kind = kinds[i] & typemask;
2823 switch (kind)
2825 case GOMP_MAP_FROM:
2826 case GOMP_MAP_ALWAYS_FROM:
2827 case GOMP_MAP_DELETE:
2828 case GOMP_MAP_RELEASE:
2829 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2830 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2831 cur_node.host_start = (uintptr_t) hostaddrs[i];
2832 cur_node.host_end = cur_node.host_start + sizes[i];
2833 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2834 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2835 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2836 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2837 if (!k)
2838 continue;
2840 bool delete_p = (kind == GOMP_MAP_DELETE
2841 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2842 bool do_copy, do_remove;
2843 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2844 &do_remove);
2846 if ((kind == GOMP_MAP_FROM && do_copy)
2847 || kind == GOMP_MAP_ALWAYS_FROM)
2848 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2849 (void *) (k->tgt->tgt_start + k->tgt_offset
2850 + cur_node.host_start
2851 - k->host_start),
2852 cur_node.host_end - cur_node.host_start);
2854 /* Structure elements lists are removed altogether at once, which
2855 may cause immediate deallocation of the target_mem_desc, causing
2856 errors if we still have following element siblings to copy back.
2857 While we're at it, it also seems more disciplined to simply
2858 queue all removals together for processing below.
2860 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
2861 not have this problem, since they maintain an additional
2862 tgt->refcount = 1 reference to the target_mem_desc to start with.
2864 if (do_remove)
2865 remove_vars[nrmvars++] = k;
2866 break;
2868 case GOMP_MAP_DETACH:
2869 break;
2870 default:
2871 gomp_mutex_unlock (&devicep->lock);
2872 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2873 kind);
2877 for (int i = 0; i < nrmvars; i++)
2878 gomp_remove_var (devicep, remove_vars[i]);
2880 gomp_mutex_unlock (&devicep->lock);
2883 void
2884 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2885 size_t *sizes, unsigned short *kinds,
2886 unsigned int flags, void **depend)
2888 struct gomp_device_descr *devicep = resolve_device (device);
2890 /* If there are depend clauses, but nowait is not present,
2891 block the parent task until the dependencies are resolved
2892 and then just continue with the rest of the function as if it
2893 is a merged task. Until we are able to schedule task during
2894 variable mapping or unmapping, ignore nowait if depend clauses
2895 are not present. */
2896 if (depend != NULL)
2898 struct gomp_thread *thr = gomp_thread ();
2899 if (thr->task && thr->task->depend_hash)
2901 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2902 && thr->ts.team
2903 && !thr->task->final_task)
2905 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2906 mapnum, hostaddrs, sizes, kinds,
2907 flags, depend, NULL,
2908 GOMP_TARGET_TASK_DATA))
2909 return;
2911 else
2913 struct gomp_team *team = thr->ts.team;
2914 /* If parallel or taskgroup has been cancelled, don't start new
2915 tasks. */
2916 if (__builtin_expect (gomp_cancel_var, 0) && team)
2918 if (gomp_team_barrier_cancelled (&team->barrier))
2919 return;
2920 if (thr->task->taskgroup)
2922 if (thr->task->taskgroup->cancelled)
2923 return;
2924 if (thr->task->taskgroup->workshare
2925 && thr->task->taskgroup->prev
2926 && thr->task->taskgroup->prev->cancelled)
2927 return;
2931 gomp_task_maybe_wait_for_dependencies (depend);
2936 if (devicep == NULL
2937 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2938 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2939 return;
2941 struct gomp_thread *thr = gomp_thread ();
2942 struct gomp_team *team = thr->ts.team;
2943 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2944 if (__builtin_expect (gomp_cancel_var, 0) && team)
2946 if (gomp_team_barrier_cancelled (&team->barrier))
2947 return;
2948 if (thr->task->taskgroup)
2950 if (thr->task->taskgroup->cancelled)
2951 return;
2952 if (thr->task->taskgroup->workshare
2953 && thr->task->taskgroup->prev
2954 && thr->task->taskgroup->prev->cancelled)
2955 return;
2959 htab_t refcount_set = htab_create (mapnum);
2961 /* The variables are mapped separately such that they can be released
2962 independently. */
2963 size_t i, j;
2964 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2965 for (i = 0; i < mapnum; i++)
2966 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2968 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2969 &kinds[i], true, &refcount_set,
2970 GOMP_MAP_VARS_ENTER_DATA);
2971 i += sizes[i];
2973 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2975 for (j = i + 1; j < mapnum; j++)
2976 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2977 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
2978 break;
2979 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2980 &kinds[i], true, &refcount_set,
2981 GOMP_MAP_VARS_ENTER_DATA);
2982 i += j - i - 1;
2984 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
2986 /* An attach operation must be processed together with the mapped
2987 base-pointer list item. */
2988 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2989 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
2990 i += 1;
2992 else
2993 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2994 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
2995 else
2996 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
2997 htab_free (refcount_set);
3000 bool
3001 gomp_target_task_fn (void *data)
3003 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3004 struct gomp_device_descr *devicep = ttask->devicep;
3006 if (ttask->fn != NULL)
3008 void *fn_addr;
3009 if (devicep == NULL
3010 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3011 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3012 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3014 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3015 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
3016 return false;
3019 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3021 if (ttask->tgt)
3022 gomp_unmap_vars (ttask->tgt, true, NULL);
3023 return false;
3026 void *actual_arguments;
3027 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3029 ttask->tgt = NULL;
3030 actual_arguments = ttask->hostaddrs;
3032 else
3034 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3035 NULL, ttask->sizes, ttask->kinds, true,
3036 NULL, GOMP_MAP_VARS_TARGET);
3037 actual_arguments = (void *) ttask->tgt->tgt_start;
3039 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3041 assert (devicep->async_run_func);
3042 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3043 ttask->args, (void *) ttask);
3044 return true;
3046 else if (devicep == NULL
3047 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3048 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3049 return false;
3051 size_t i;
3052 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3053 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3054 ttask->kinds, true);
3055 else
3057 htab_t refcount_set = htab_create (ttask->mapnum);
3058 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3059 for (i = 0; i < ttask->mapnum; i++)
3060 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3062 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3063 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3064 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3065 i += ttask->sizes[i];
3067 else
3068 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3069 &ttask->kinds[i], true, &refcount_set,
3070 GOMP_MAP_VARS_ENTER_DATA);
3071 else
3072 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3073 ttask->kinds, &refcount_set);
3074 htab_free (refcount_set);
3076 return false;
3079 void
3080 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3082 if (thread_limit)
3084 struct gomp_task_icv *icv = gomp_icv (true);
3085 icv->thread_limit_var
3086 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3088 (void) num_teams;
3091 void *
3092 omp_target_alloc (size_t size, int device_num)
3094 if (device_num == gomp_get_num_devices ())
3095 return malloc (size);
3097 if (device_num < 0)
3098 return NULL;
3100 struct gomp_device_descr *devicep = resolve_device (device_num);
3101 if (devicep == NULL)
3102 return NULL;
3104 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3105 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3106 return malloc (size);
3108 gomp_mutex_lock (&devicep->lock);
3109 void *ret = devicep->alloc_func (devicep->target_id, size);
3110 gomp_mutex_unlock (&devicep->lock);
3111 return ret;
3114 void
3115 omp_target_free (void *device_ptr, int device_num)
3117 if (device_ptr == NULL)
3118 return;
3120 if (device_num == gomp_get_num_devices ())
3122 free (device_ptr);
3123 return;
3126 if (device_num < 0)
3127 return;
3129 struct gomp_device_descr *devicep = resolve_device (device_num);
3130 if (devicep == NULL)
3131 return;
3133 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3134 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3136 free (device_ptr);
3137 return;
3140 gomp_mutex_lock (&devicep->lock);
3141 gomp_free_device_memory (devicep, device_ptr);
3142 gomp_mutex_unlock (&devicep->lock);
3146 omp_target_is_present (const void *ptr, int device_num)
3148 if (ptr == NULL)
3149 return 1;
3151 if (device_num == gomp_get_num_devices ())
3152 return 1;
3154 if (device_num < 0)
3155 return 0;
3157 struct gomp_device_descr *devicep = resolve_device (device_num);
3158 if (devicep == NULL)
3159 return 0;
3161 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3162 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3163 return 1;
3165 gomp_mutex_lock (&devicep->lock);
3166 struct splay_tree_s *mem_map = &devicep->mem_map;
3167 struct splay_tree_key_s cur_node;
3169 cur_node.host_start = (uintptr_t) ptr;
3170 cur_node.host_end = cur_node.host_start;
3171 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3172 int ret = n != NULL;
3173 gomp_mutex_unlock (&devicep->lock);
3174 return ret;
3178 omp_target_memcpy (void *dst, const void *src, size_t length,
3179 size_t dst_offset, size_t src_offset, int dst_device_num,
3180 int src_device_num)
3182 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3183 bool ret;
3185 if (dst_device_num != gomp_get_num_devices ())
3187 if (dst_device_num < 0)
3188 return EINVAL;
3190 dst_devicep = resolve_device (dst_device_num);
3191 if (dst_devicep == NULL)
3192 return EINVAL;
3194 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3195 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3196 dst_devicep = NULL;
3198 if (src_device_num != num_devices_openmp)
3200 if (src_device_num < 0)
3201 return EINVAL;
3203 src_devicep = resolve_device (src_device_num);
3204 if (src_devicep == NULL)
3205 return EINVAL;
3207 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3208 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3209 src_devicep = NULL;
3211 if (src_devicep == NULL && dst_devicep == NULL)
3213 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3214 return 0;
3216 if (src_devicep == NULL)
3218 gomp_mutex_lock (&dst_devicep->lock);
3219 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3220 (char *) dst + dst_offset,
3221 (char *) src + src_offset, length);
3222 gomp_mutex_unlock (&dst_devicep->lock);
3223 return (ret ? 0 : EINVAL);
3225 if (dst_devicep == NULL)
3227 gomp_mutex_lock (&src_devicep->lock);
3228 ret = src_devicep->dev2host_func (src_devicep->target_id,
3229 (char *) dst + dst_offset,
3230 (char *) src + src_offset, length);
3231 gomp_mutex_unlock (&src_devicep->lock);
3232 return (ret ? 0 : EINVAL);
3234 if (src_devicep == dst_devicep)
3236 gomp_mutex_lock (&src_devicep->lock);
3237 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3238 (char *) dst + dst_offset,
3239 (char *) src + src_offset, length);
3240 gomp_mutex_unlock (&src_devicep->lock);
3241 return (ret ? 0 : EINVAL);
3243 return EINVAL;
3246 static int
3247 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3248 int num_dims, const size_t *volume,
3249 const size_t *dst_offsets,
3250 const size_t *src_offsets,
3251 const size_t *dst_dimensions,
3252 const size_t *src_dimensions,
3253 struct gomp_device_descr *dst_devicep,
3254 struct gomp_device_descr *src_devicep)
3256 size_t dst_slice = element_size;
3257 size_t src_slice = element_size;
3258 size_t j, dst_off, src_off, length;
3259 int i, ret;
3261 if (num_dims == 1)
3263 if (__builtin_mul_overflow (element_size, volume[0], &length)
3264 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3265 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3266 return EINVAL;
3267 if (dst_devicep == NULL && src_devicep == NULL)
3269 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3270 length);
3271 ret = 1;
3273 else if (src_devicep == NULL)
3274 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3275 (char *) dst + dst_off,
3276 (const char *) src + src_off,
3277 length);
3278 else if (dst_devicep == NULL)
3279 ret = src_devicep->dev2host_func (src_devicep->target_id,
3280 (char *) dst + dst_off,
3281 (const char *) src + src_off,
3282 length);
3283 else if (src_devicep == dst_devicep)
3284 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3285 (char *) dst + dst_off,
3286 (const char *) src + src_off,
3287 length);
3288 else
3289 ret = 0;
3290 return ret ? 0 : EINVAL;
3293 /* FIXME: it would be nice to have some plugin function to handle
3294 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3295 be handled in the generic recursion below, and for host-host it
3296 should be used even for any num_dims >= 2. */
3298 for (i = 1; i < num_dims; i++)
3299 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3300 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3301 return EINVAL;
3302 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3303 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3304 return EINVAL;
3305 for (j = 0; j < volume[0]; j++)
3307 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3308 (const char *) src + src_off,
3309 element_size, num_dims - 1,
3310 volume + 1, dst_offsets + 1,
3311 src_offsets + 1, dst_dimensions + 1,
3312 src_dimensions + 1, dst_devicep,
3313 src_devicep);
3314 if (ret)
3315 return ret;
3316 dst_off += dst_slice;
3317 src_off += src_slice;
3319 return 0;
3323 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3324 int num_dims, const size_t *volume,
3325 const size_t *dst_offsets,
3326 const size_t *src_offsets,
3327 const size_t *dst_dimensions,
3328 const size_t *src_dimensions,
3329 int dst_device_num, int src_device_num)
3331 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3333 if (!dst && !src)
3334 return INT_MAX;
3336 if (dst_device_num != gomp_get_num_devices ())
3338 if (dst_device_num < 0)
3339 return EINVAL;
3341 dst_devicep = resolve_device (dst_device_num);
3342 if (dst_devicep == NULL)
3343 return EINVAL;
3345 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3346 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3347 dst_devicep = NULL;
3349 if (src_device_num != num_devices_openmp)
3351 if (src_device_num < 0)
3352 return EINVAL;
3354 src_devicep = resolve_device (src_device_num);
3355 if (src_devicep == NULL)
3356 return EINVAL;
3358 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3359 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3360 src_devicep = NULL;
3363 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3364 return EINVAL;
3366 if (src_devicep)
3367 gomp_mutex_lock (&src_devicep->lock);
3368 else if (dst_devicep)
3369 gomp_mutex_lock (&dst_devicep->lock);
3370 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3371 volume, dst_offsets, src_offsets,
3372 dst_dimensions, src_dimensions,
3373 dst_devicep, src_devicep);
3374 if (src_devicep)
3375 gomp_mutex_unlock (&src_devicep->lock);
3376 else if (dst_devicep)
3377 gomp_mutex_unlock (&dst_devicep->lock);
3378 return ret;
3382 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3383 size_t size, size_t device_offset, int device_num)
3385 if (device_num == gomp_get_num_devices ())
3386 return EINVAL;
3388 if (device_num < 0)
3389 return EINVAL;
3391 struct gomp_device_descr *devicep = resolve_device (device_num);
3392 if (devicep == NULL)
3393 return EINVAL;
3395 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3396 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3397 return EINVAL;
3399 gomp_mutex_lock (&devicep->lock);
3401 struct splay_tree_s *mem_map = &devicep->mem_map;
3402 struct splay_tree_key_s cur_node;
3403 int ret = EINVAL;
3405 cur_node.host_start = (uintptr_t) host_ptr;
3406 cur_node.host_end = cur_node.host_start + size;
3407 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3408 if (n)
3410 if (n->tgt->tgt_start + n->tgt_offset
3411 == (uintptr_t) device_ptr + device_offset
3412 && n->host_start <= cur_node.host_start
3413 && n->host_end >= cur_node.host_end)
3414 ret = 0;
3416 else
3418 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3419 tgt->array = gomp_malloc (sizeof (*tgt->array));
3420 tgt->refcount = 1;
3421 tgt->tgt_start = 0;
3422 tgt->tgt_end = 0;
3423 tgt->to_free = NULL;
3424 tgt->prev = NULL;
3425 tgt->list_count = 0;
3426 tgt->device_descr = devicep;
3427 splay_tree_node array = tgt->array;
3428 splay_tree_key k = &array->key;
3429 k->host_start = cur_node.host_start;
3430 k->host_end = cur_node.host_end;
3431 k->tgt = tgt;
3432 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3433 k->refcount = REFCOUNT_INFINITY;
3434 k->dynamic_refcount = 0;
3435 k->aux = NULL;
3436 array->left = NULL;
3437 array->right = NULL;
3438 splay_tree_insert (&devicep->mem_map, array);
3439 ret = 0;
3441 gomp_mutex_unlock (&devicep->lock);
3442 return ret;
3446 omp_target_disassociate_ptr (const void *ptr, int device_num)
3448 if (device_num == gomp_get_num_devices ())
3449 return EINVAL;
3451 if (device_num < 0)
3452 return EINVAL;
3454 struct gomp_device_descr *devicep = resolve_device (device_num);
3455 if (devicep == NULL)
3456 return EINVAL;
3458 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3459 return EINVAL;
3461 gomp_mutex_lock (&devicep->lock);
3463 struct splay_tree_s *mem_map = &devicep->mem_map;
3464 struct splay_tree_key_s cur_node;
3465 int ret = EINVAL;
3467 cur_node.host_start = (uintptr_t) ptr;
3468 cur_node.host_end = cur_node.host_start;
3469 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3470 if (n
3471 && n->host_start == cur_node.host_start
3472 && n->refcount == REFCOUNT_INFINITY
3473 && n->tgt->tgt_start == 0
3474 && n->tgt->to_free == NULL
3475 && n->tgt->refcount == 1
3476 && n->tgt->list_count == 0)
3478 splay_tree_remove (&devicep->mem_map, n);
3479 gomp_unmap_tgt (n->tgt);
3480 ret = 0;
3483 gomp_mutex_unlock (&devicep->lock);
3484 return ret;
3488 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3490 (void) kind;
3491 if (device_num == gomp_get_num_devices ())
3492 return gomp_pause_host ();
3493 if (device_num < 0 || device_num >= num_devices_openmp)
3494 return -1;
3495 /* Do nothing for target devices for now. */
3496 return 0;
3500 omp_pause_resource_all (omp_pause_resource_t kind)
3502 (void) kind;
3503 if (gomp_pause_host ())
3504 return -1;
3505 /* Do nothing for target devices for now. */
3506 return 0;
3509 ialias (omp_pause_resource)
3510 ialias (omp_pause_resource_all)
3512 #ifdef PLUGIN_SUPPORT
3514 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3515 in PLUGIN_NAME.
3516 The handles of the found functions are stored in the corresponding fields
3517 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3519 static bool
3520 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3521 const char *plugin_name)
3523 const char *err = NULL, *last_missing = NULL;
3525 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3526 if (!plugin_handle)
3527 #if OFFLOAD_DEFAULTED
3528 return 0;
3529 #else
3530 goto dl_fail;
3531 #endif
3533 /* Check if all required functions are available in the plugin and store
3534 their handlers. None of the symbols can legitimately be NULL,
3535 so we don't need to check dlerror all the time. */
3536 #define DLSYM(f) \
3537 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3538 goto dl_fail
3539 /* Similar, but missing functions are not an error. Return false if
3540 failed, true otherwise. */
3541 #define DLSYM_OPT(f, n) \
3542 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3543 || (last_missing = #n, 0))
3545 DLSYM (version);
3546 if (device->version_func () != GOMP_VERSION)
3548 err = "plugin version mismatch";
3549 goto fail;
3552 DLSYM (get_name);
3553 DLSYM (get_caps);
3554 DLSYM (get_type);
3555 DLSYM (get_num_devices);
3556 DLSYM (init_device);
3557 DLSYM (fini_device);
3558 DLSYM (load_image);
3559 DLSYM (unload_image);
3560 DLSYM (alloc);
3561 DLSYM (free);
3562 DLSYM (dev2host);
3563 DLSYM (host2dev);
3564 device->capabilities = device->get_caps_func ();
3565 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3567 DLSYM (run);
3568 DLSYM_OPT (async_run, async_run);
3569 DLSYM_OPT (can_run, can_run);
3570 DLSYM (dev2dev);
3572 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3574 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3575 || !DLSYM_OPT (openacc.create_thread_data,
3576 openacc_create_thread_data)
3577 || !DLSYM_OPT (openacc.destroy_thread_data,
3578 openacc_destroy_thread_data)
3579 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3580 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3581 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3582 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3583 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3584 || !DLSYM_OPT (openacc.async.queue_callback,
3585 openacc_async_queue_callback)
3586 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3587 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3588 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3589 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3591 /* Require all the OpenACC handlers if we have
3592 GOMP_OFFLOAD_CAP_OPENACC_200. */
3593 err = "plugin missing OpenACC handler function";
3594 goto fail;
3597 unsigned cuda = 0;
3598 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3599 openacc_cuda_get_current_device);
3600 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3601 openacc_cuda_get_current_context);
3602 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3603 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3604 if (cuda && cuda != 4)
3606 /* Make sure all the CUDA functions are there if any of them are. */
3607 err = "plugin missing OpenACC CUDA handler function";
3608 goto fail;
3611 #undef DLSYM
3612 #undef DLSYM_OPT
3614 return 1;
3616 dl_fail:
3617 err = dlerror ();
3618 fail:
3619 gomp_error ("while loading %s: %s", plugin_name, err);
3620 if (last_missing)
3621 gomp_error ("missing function was %s", last_missing);
3622 if (plugin_handle)
3623 dlclose (plugin_handle);
3625 return 0;
3628 /* This function finalizes all initialized devices. */
3630 static void
3631 gomp_target_fini (void)
3633 int i;
3634 for (i = 0; i < num_devices; i++)
3636 bool ret = true;
3637 struct gomp_device_descr *devicep = &devices[i];
3638 gomp_mutex_lock (&devicep->lock);
3639 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3640 ret = gomp_fini_device (devicep);
3641 gomp_mutex_unlock (&devicep->lock);
3642 if (!ret)
3643 gomp_fatal ("device finalization failed");
3647 /* This function initializes the runtime for offloading.
3648 It parses the list of offload plugins, and tries to load these.
3649 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3650 will be set, and the array DEVICES initialized, containing descriptors for
3651 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3652 by the others. */
3654 static void
3655 gomp_target_init (void)
3657 const char *prefix ="libgomp-plugin-";
3658 const char *suffix = SONAME_SUFFIX (1);
3659 const char *cur, *next;
3660 char *plugin_name;
3661 int i, new_num_devs;
3662 int num_devs = 0, num_devs_openmp;
3663 struct gomp_device_descr *devs = NULL;
3665 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3666 return;
3668 cur = OFFLOAD_PLUGINS;
3669 if (*cur)
3672 struct gomp_device_descr current_device;
3673 size_t prefix_len, suffix_len, cur_len;
3675 next = strchr (cur, ',');
3677 prefix_len = strlen (prefix);
3678 cur_len = next ? next - cur : strlen (cur);
3679 suffix_len = strlen (suffix);
3681 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3682 if (!plugin_name)
3684 num_devs = 0;
3685 break;
3688 memcpy (plugin_name, prefix, prefix_len);
3689 memcpy (plugin_name + prefix_len, cur, cur_len);
3690 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3692 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3694 new_num_devs = current_device.get_num_devices_func ();
3695 if (new_num_devs >= 1)
3697 /* Augment DEVICES and NUM_DEVICES. */
3699 devs = realloc (devs, (num_devs + new_num_devs)
3700 * sizeof (struct gomp_device_descr));
3701 if (!devs)
3703 num_devs = 0;
3704 free (plugin_name);
3705 break;
3708 current_device.name = current_device.get_name_func ();
3709 /* current_device.capabilities has already been set. */
3710 current_device.type = current_device.get_type_func ();
3711 current_device.mem_map.root = NULL;
3712 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3713 for (i = 0; i < new_num_devs; i++)
3715 current_device.target_id = i;
3716 devs[num_devs] = current_device;
3717 gomp_mutex_init (&devs[num_devs].lock);
3718 num_devs++;
3723 free (plugin_name);
3724 cur = next + 1;
3726 while (next);
3728 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3729 NUM_DEVICES_OPENMP. */
3730 struct gomp_device_descr *devs_s
3731 = malloc (num_devs * sizeof (struct gomp_device_descr));
3732 if (!devs_s)
3734 num_devs = 0;
3735 free (devs);
3736 devs = NULL;
3738 num_devs_openmp = 0;
3739 for (i = 0; i < num_devs; i++)
3740 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3741 devs_s[num_devs_openmp++] = devs[i];
3742 int num_devs_after_openmp = num_devs_openmp;
3743 for (i = 0; i < num_devs; i++)
3744 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3745 devs_s[num_devs_after_openmp++] = devs[i];
3746 free (devs);
3747 devs = devs_s;
3749 for (i = 0; i < num_devs; i++)
3751 /* The 'devices' array can be moved (by the realloc call) until we have
3752 found all the plugins, so registering with the OpenACC runtime (which
3753 takes a copy of the pointer argument) must be delayed until now. */
3754 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3755 goacc_register (&devs[i]);
3758 num_devices = num_devs;
3759 num_devices_openmp = num_devs_openmp;
3760 devices = devs;
3761 if (atexit (gomp_target_fini) != 0)
3762 gomp_fatal ("atexit failed");
3765 #else /* PLUGIN_SUPPORT */
3766 /* If dlfcn.h is unavailable we always fallback to host execution.
3767 GOMP_target* routines are just stubs for this case. */
3768 static void
3769 gomp_target_init (void)
3772 #endif /* PLUGIN_SUPPORT */