re PR target/86753 (gcc.target/aarch64/sve/vcond_[45].c fail after recent combine...
[official-gcc.git] / libgomp / target.c
blob84d6daa76ca87b968f7ddbba18daded5e7ab505a
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
70 /* Total number of offload images. */
71 static int num_offload_images;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
76 /* Total number of available devices. */
77 static int num_devices;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
87 void *ret = realloc (old, size);
88 if (ret == NULL)
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
93 return ret;
96 attribute_hidden void
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
102 attribute_hidden int
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp;
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
112 if (device_id == GOMP_DEVICE_ICV)
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
129 gomp_mutex_unlock (&devices[device_id].lock);
131 return &devices[device_id];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
250 cbuf->chunk_cnt = -1;
251 return;
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
274 static inline bool
275 gomp_to_device_kind_p (int kind)
277 switch (kind)
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_ALWAYS_FROM:
283 return false;
284 default:
285 return true;
289 attribute_hidden void
290 gomp_copy_host2dev (struct gomp_device_descr *devicep,
291 struct goacc_asyncqueue *aq,
292 void *d, const void *h, size_t sz,
293 struct gomp_coalesce_buf *cbuf)
295 if (cbuf)
297 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
298 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 long first = 0;
301 long last = cbuf->chunk_cnt - 1;
302 while (first <= last)
304 long middle = (first + last) >> 1;
305 if (cbuf->chunks[middle].end <= doff)
306 first = middle + 1;
307 else if (cbuf->chunks[middle].start <= doff)
309 if (doff + sz > cbuf->chunks[middle].end)
310 gomp_fatal ("internal libgomp cbuf error");
311 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
312 h, sz);
313 return;
315 else
316 last = middle - 1;
320 if (__builtin_expect (aq != NULL, 0))
321 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
322 "dev", d, "host", h, sz, aq);
323 else
324 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
327 attribute_hidden void
328 gomp_copy_dev2host (struct gomp_device_descr *devicep,
329 struct goacc_asyncqueue *aq,
330 void *h, const void *d, size_t sz)
332 if (__builtin_expect (aq != NULL, 0))
333 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
334 "host", h, "dev", d, sz, aq);
335 else
336 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
339 static void
340 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 if (!devicep->free_func (devicep->target_id, devptr))
344 gomp_mutex_unlock (&devicep->lock);
345 gomp_fatal ("error in freeing device memory block at %p", devptr);
349 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
350 gomp_map_0len_lookup found oldn for newn.
351 Helper function of gomp_map_vars. */
353 static inline void
354 gomp_map_vars_existing (struct gomp_device_descr *devicep,
355 struct goacc_asyncqueue *aq, splay_tree_key oldn,
356 splay_tree_key newn, struct target_var_desc *tgt_var,
357 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 tgt_var->key = oldn;
360 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
361 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
362 tgt_var->offset = newn->host_start - oldn->host_start;
363 tgt_var->length = newn->host_end - newn->host_start;
365 if ((kind & GOMP_MAP_FLAG_FORCE)
366 || oldn->host_start > newn->host_start
367 || oldn->host_end < newn->host_end)
369 gomp_mutex_unlock (&devicep->lock);
370 gomp_fatal ("Trying to map into device [%p..%p) object when "
371 "[%p..%p) is already mapped",
372 (void *) newn->host_start, (void *) newn->host_end,
373 (void *) oldn->host_start, (void *) oldn->host_end);
376 if (GOMP_MAP_ALWAYS_TO_P (kind))
377 gomp_copy_host2dev (devicep, aq,
378 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
379 + newn->host_start - oldn->host_start),
380 (void *) newn->host_start,
381 newn->host_end - newn->host_start, cbuf);
383 if (oldn->refcount != REFCOUNT_INFINITY)
384 oldn->refcount++;
387 static int
388 get_kind (bool short_mapkind, void *kinds, int idx)
390 return short_mapkind ? ((unsigned short *) kinds)[idx]
391 : ((unsigned char *) kinds)[idx];
394 static void
395 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
396 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
397 struct gomp_coalesce_buf *cbuf)
399 struct gomp_device_descr *devicep = tgt->device_descr;
400 struct splay_tree_s *mem_map = &devicep->mem_map;
401 struct splay_tree_key_s cur_node;
403 cur_node.host_start = host_ptr;
404 if (cur_node.host_start == (uintptr_t) NULL)
406 cur_node.tgt_offset = (uintptr_t) NULL;
407 gomp_copy_host2dev (devicep, aq,
408 (void *) (tgt->tgt_start + target_offset),
409 (void *) &cur_node.tgt_offset,
410 sizeof (void *), cbuf);
411 return;
413 /* Add bias to the pointer value. */
414 cur_node.host_start += bias;
415 cur_node.host_end = cur_node.host_start;
416 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
417 if (n == NULL)
419 gomp_mutex_unlock (&devicep->lock);
420 gomp_fatal ("Pointer target of array section wasn't mapped");
422 cur_node.host_start -= n->host_start;
423 cur_node.tgt_offset
424 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
425 /* At this point tgt_offset is target address of the
426 array section. Now subtract bias to get what we want
427 to initialize the pointer with. */
428 cur_node.tgt_offset -= bias;
429 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
430 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
433 static void
434 gomp_map_fields_existing (struct target_mem_desc *tgt,
435 struct goacc_asyncqueue *aq, splay_tree_key n,
436 size_t first, size_t i, void **hostaddrs,
437 size_t *sizes, void *kinds,
438 struct gomp_coalesce_buf *cbuf)
440 struct gomp_device_descr *devicep = tgt->device_descr;
441 struct splay_tree_s *mem_map = &devicep->mem_map;
442 struct splay_tree_key_s cur_node;
443 int kind;
444 const bool short_mapkind = true;
445 const int typemask = short_mapkind ? 0xff : 0x7;
447 cur_node.host_start = (uintptr_t) hostaddrs[i];
448 cur_node.host_end = cur_node.host_start + sizes[i];
449 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
450 kind = get_kind (short_mapkind, kinds, i);
451 if (n2
452 && n2->tgt == n->tgt
453 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
455 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
456 &tgt->list[i], kind & typemask, cbuf);
457 return;
459 if (sizes[i] == 0)
461 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
463 cur_node.host_start--;
464 n2 = splay_tree_lookup (mem_map, &cur_node);
465 cur_node.host_start++;
466 if (n2
467 && n2->tgt == n->tgt
468 && n2->host_start - n->host_start
469 == n2->tgt_offset - n->tgt_offset)
471 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
472 &tgt->list[i], kind & typemask, cbuf);
473 return;
476 cur_node.host_end++;
477 n2 = splay_tree_lookup (mem_map, &cur_node);
478 cur_node.host_end--;
479 if (n2
480 && n2->tgt == n->tgt
481 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
483 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
484 kind & typemask, cbuf);
485 return;
488 gomp_mutex_unlock (&devicep->lock);
489 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
490 "other mapped elements from the same structure weren't mapped "
491 "together with it", (void *) cur_node.host_start,
492 (void *) cur_node.host_end);
495 static inline uintptr_t
496 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
498 if (tgt->list[i].key != NULL)
499 return tgt->list[i].key->tgt->tgt_start
500 + tgt->list[i].key->tgt_offset
501 + tgt->list[i].offset;
503 switch (tgt->list[i].offset)
505 case OFFSET_INLINED:
506 return (uintptr_t) hostaddrs[i];
508 case OFFSET_POINTER:
509 return 0;
511 case OFFSET_STRUCT:
512 return tgt->list[i + 1].key->tgt->tgt_start
513 + tgt->list[i + 1].key->tgt_offset
514 + tgt->list[i + 1].offset
515 + (uintptr_t) hostaddrs[i]
516 - (uintptr_t) hostaddrs[i + 1];
518 default:
519 return tgt->tgt_start + tgt->list[i].offset;
523 static inline __attribute__((always_inline)) struct target_mem_desc *
524 gomp_map_vars_internal (struct gomp_device_descr *devicep,
525 struct goacc_asyncqueue *aq, size_t mapnum,
526 void **hostaddrs, void **devaddrs, size_t *sizes,
527 void *kinds, bool short_mapkind,
528 enum gomp_map_vars_kind pragma_kind)
530 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
531 bool has_firstprivate = false;
532 const int rshift = short_mapkind ? 8 : 3;
533 const int typemask = short_mapkind ? 0xff : 0x7;
534 struct splay_tree_s *mem_map = &devicep->mem_map;
535 struct splay_tree_key_s cur_node;
536 struct target_mem_desc *tgt
537 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
538 tgt->list_count = mapnum;
539 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
540 tgt->device_descr = devicep;
541 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
543 if (mapnum == 0)
545 tgt->tgt_start = 0;
546 tgt->tgt_end = 0;
547 return tgt;
550 tgt_align = sizeof (void *);
551 tgt_size = 0;
552 cbuf.chunks = NULL;
553 cbuf.chunk_cnt = -1;
554 cbuf.use_cnt = 0;
555 cbuf.buf = NULL;
556 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
558 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
559 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
560 cbuf.chunk_cnt = 0;
562 if (pragma_kind == GOMP_MAP_VARS_TARGET)
564 size_t align = 4 * sizeof (void *);
565 tgt_align = align;
566 tgt_size = mapnum * sizeof (void *);
567 cbuf.chunk_cnt = 1;
568 cbuf.use_cnt = 1 + (mapnum > 1);
569 cbuf.chunks[0].start = 0;
570 cbuf.chunks[0].end = tgt_size;
573 gomp_mutex_lock (&devicep->lock);
574 if (devicep->state == GOMP_DEVICE_FINALIZED)
576 gomp_mutex_unlock (&devicep->lock);
577 free (tgt);
578 return NULL;
581 for (i = 0; i < mapnum; i++)
583 int kind = get_kind (short_mapkind, kinds, i);
584 if (hostaddrs[i] == NULL
585 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
587 tgt->list[i].key = NULL;
588 tgt->list[i].offset = OFFSET_INLINED;
589 continue;
591 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
593 tgt->list[i].key = NULL;
594 if (!not_found_cnt)
596 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
597 on a separate construct prior to using use_device_{addr,ptr}.
598 In OpenMP 5.0, map directives need to be ordered by the
599 middle-end before the use_device_* clauses. If
600 !not_found_cnt, all mappings requested (if any) are already
601 mapped, so use_device_{addr,ptr} can be resolved right away.
602 Otherwise, if not_found_cnt, gomp_map_lookup might fail
603 now but would succeed after performing the mappings in the
604 following loop. We can't defer this always to the second
605 loop, because it is not even invoked when !not_found_cnt
606 after the first loop. */
607 cur_node.host_start = (uintptr_t) hostaddrs[i];
608 cur_node.host_end = cur_node.host_start;
609 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
610 if (n == NULL)
612 gomp_mutex_unlock (&devicep->lock);
613 gomp_fatal ("use_device_ptr pointer wasn't mapped");
615 cur_node.host_start -= n->host_start;
616 hostaddrs[i]
617 = (void *) (n->tgt->tgt_start + n->tgt_offset
618 + cur_node.host_start);
619 tgt->list[i].offset = ~(uintptr_t) 0;
621 else
622 tgt->list[i].offset = 0;
623 continue;
625 else if ((kind & typemask) == GOMP_MAP_STRUCT)
627 size_t first = i + 1;
628 size_t last = i + sizes[i];
629 cur_node.host_start = (uintptr_t) hostaddrs[i];
630 cur_node.host_end = (uintptr_t) hostaddrs[last]
631 + sizes[last];
632 tgt->list[i].key = NULL;
633 tgt->list[i].offset = OFFSET_STRUCT;
634 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
635 if (n == NULL)
637 size_t align = (size_t) 1 << (kind >> rshift);
638 if (tgt_align < align)
639 tgt_align = align;
640 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
641 tgt_size = (tgt_size + align - 1) & ~(align - 1);
642 tgt_size += cur_node.host_end - cur_node.host_start;
643 not_found_cnt += last - i;
644 for (i = first; i <= last; i++)
646 tgt->list[i].key = NULL;
647 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
648 & typemask))
649 gomp_coalesce_buf_add (&cbuf,
650 tgt_size - cur_node.host_end
651 + (uintptr_t) hostaddrs[i],
652 sizes[i]);
654 i--;
655 continue;
657 for (i = first; i <= last; i++)
658 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
659 sizes, kinds, NULL);
660 i--;
661 continue;
663 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
665 tgt->list[i].key = NULL;
666 tgt->list[i].offset = OFFSET_POINTER;
667 has_firstprivate = true;
668 continue;
670 cur_node.host_start = (uintptr_t) hostaddrs[i];
671 if (!GOMP_MAP_POINTER_P (kind & typemask))
672 cur_node.host_end = cur_node.host_start + sizes[i];
673 else
674 cur_node.host_end = cur_node.host_start + sizeof (void *);
675 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
677 tgt->list[i].key = NULL;
679 size_t align = (size_t) 1 << (kind >> rshift);
680 if (tgt_align < align)
681 tgt_align = align;
682 tgt_size = (tgt_size + align - 1) & ~(align - 1);
683 gomp_coalesce_buf_add (&cbuf, tgt_size,
684 cur_node.host_end - cur_node.host_start);
685 tgt_size += cur_node.host_end - cur_node.host_start;
686 has_firstprivate = true;
687 continue;
689 splay_tree_key n;
690 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
692 n = gomp_map_0len_lookup (mem_map, &cur_node);
693 if (!n)
695 tgt->list[i].key = NULL;
696 tgt->list[i].offset = OFFSET_POINTER;
697 continue;
700 else
701 n = splay_tree_lookup (mem_map, &cur_node);
702 if (n && n->refcount != REFCOUNT_LINK)
703 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
704 kind & typemask, NULL);
705 else
707 tgt->list[i].key = NULL;
709 size_t align = (size_t) 1 << (kind >> rshift);
710 not_found_cnt++;
711 if (tgt_align < align)
712 tgt_align = align;
713 tgt_size = (tgt_size + align - 1) & ~(align - 1);
714 if (gomp_to_device_kind_p (kind & typemask))
715 gomp_coalesce_buf_add (&cbuf, tgt_size,
716 cur_node.host_end - cur_node.host_start);
717 tgt_size += cur_node.host_end - cur_node.host_start;
718 if ((kind & typemask) == GOMP_MAP_TO_PSET)
720 size_t j;
721 for (j = i + 1; j < mapnum; j++)
722 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
723 & typemask))
724 break;
725 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
726 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
727 > cur_node.host_end))
728 break;
729 else
731 tgt->list[j].key = NULL;
732 i++;
738 if (devaddrs)
740 if (mapnum != 1)
742 gomp_mutex_unlock (&devicep->lock);
743 gomp_fatal ("unexpected aggregation");
745 tgt->to_free = devaddrs[0];
746 tgt->tgt_start = (uintptr_t) tgt->to_free;
747 tgt->tgt_end = tgt->tgt_start + sizes[0];
749 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
751 /* Allocate tgt_align aligned tgt_size block of memory. */
752 /* FIXME: Perhaps change interface to allocate properly aligned
753 memory. */
754 tgt->to_free = devicep->alloc_func (devicep->target_id,
755 tgt_size + tgt_align - 1);
756 if (!tgt->to_free)
758 gomp_mutex_unlock (&devicep->lock);
759 gomp_fatal ("device memory allocation fail");
762 tgt->tgt_start = (uintptr_t) tgt->to_free;
763 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
764 tgt->tgt_end = tgt->tgt_start + tgt_size;
766 if (cbuf.use_cnt == 1)
767 cbuf.chunk_cnt--;
768 if (cbuf.chunk_cnt > 0)
770 cbuf.buf
771 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
772 if (cbuf.buf)
774 cbuf.tgt = tgt;
775 cbufp = &cbuf;
779 else
781 tgt->to_free = NULL;
782 tgt->tgt_start = 0;
783 tgt->tgt_end = 0;
786 tgt_size = 0;
787 if (pragma_kind == GOMP_MAP_VARS_TARGET)
788 tgt_size = mapnum * sizeof (void *);
790 tgt->array = NULL;
791 if (not_found_cnt || has_firstprivate)
793 if (not_found_cnt)
794 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
795 splay_tree_node array = tgt->array;
796 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
797 uintptr_t field_tgt_base = 0;
799 for (i = 0; i < mapnum; i++)
800 if (tgt->list[i].key == NULL)
802 int kind = get_kind (short_mapkind, kinds, i);
803 if (hostaddrs[i] == NULL)
804 continue;
805 switch (kind & typemask)
807 size_t align, len, first, last;
808 splay_tree_key n;
809 case GOMP_MAP_FIRSTPRIVATE:
810 align = (size_t) 1 << (kind >> rshift);
811 tgt_size = (tgt_size + align - 1) & ~(align - 1);
812 tgt->list[i].offset = tgt_size;
813 len = sizes[i];
814 gomp_copy_host2dev (devicep, aq,
815 (void *) (tgt->tgt_start + tgt_size),
816 (void *) hostaddrs[i], len, cbufp);
817 tgt_size += len;
818 continue;
819 case GOMP_MAP_FIRSTPRIVATE_INT:
820 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
821 continue;
822 case GOMP_MAP_USE_DEVICE_PTR:
823 if (tgt->list[i].offset == 0)
825 cur_node.host_start = (uintptr_t) hostaddrs[i];
826 cur_node.host_end = cur_node.host_start;
827 n = gomp_map_lookup (mem_map, &cur_node);
828 if (n == NULL)
830 gomp_mutex_unlock (&devicep->lock);
831 gomp_fatal ("use_device_ptr pointer wasn't mapped");
833 cur_node.host_start -= n->host_start;
834 hostaddrs[i]
835 = (void *) (n->tgt->tgt_start + n->tgt_offset
836 + cur_node.host_start);
837 tgt->list[i].offset = ~(uintptr_t) 0;
839 continue;
840 case GOMP_MAP_STRUCT:
841 first = i + 1;
842 last = i + sizes[i];
843 cur_node.host_start = (uintptr_t) hostaddrs[i];
844 cur_node.host_end = (uintptr_t) hostaddrs[last]
845 + sizes[last];
846 if (tgt->list[first].key != NULL)
847 continue;
848 n = splay_tree_lookup (mem_map, &cur_node);
849 if (n == NULL)
851 size_t align = (size_t) 1 << (kind >> rshift);
852 tgt_size -= (uintptr_t) hostaddrs[first]
853 - (uintptr_t) hostaddrs[i];
854 tgt_size = (tgt_size + align - 1) & ~(align - 1);
855 tgt_size += (uintptr_t) hostaddrs[first]
856 - (uintptr_t) hostaddrs[i];
857 field_tgt_base = (uintptr_t) hostaddrs[first];
858 field_tgt_offset = tgt_size;
859 field_tgt_clear = last;
860 tgt_size += cur_node.host_end
861 - (uintptr_t) hostaddrs[first];
862 continue;
864 for (i = first; i <= last; i++)
865 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
866 sizes, kinds, cbufp);
867 i--;
868 continue;
869 case GOMP_MAP_ALWAYS_POINTER:
870 cur_node.host_start = (uintptr_t) hostaddrs[i];
871 cur_node.host_end = cur_node.host_start + sizeof (void *);
872 n = splay_tree_lookup (mem_map, &cur_node);
873 if (n == NULL
874 || n->host_start > cur_node.host_start
875 || n->host_end < cur_node.host_end)
877 gomp_mutex_unlock (&devicep->lock);
878 gomp_fatal ("always pointer not mapped");
880 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
881 != GOMP_MAP_ALWAYS_POINTER)
882 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
883 if (cur_node.tgt_offset)
884 cur_node.tgt_offset -= sizes[i];
885 gomp_copy_host2dev (devicep, aq,
886 (void *) (n->tgt->tgt_start
887 + n->tgt_offset
888 + cur_node.host_start
889 - n->host_start),
890 (void *) &cur_node.tgt_offset,
891 sizeof (void *), cbufp);
892 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
893 + cur_node.host_start - n->host_start;
894 continue;
895 default:
896 break;
898 splay_tree_key k = &array->key;
899 k->host_start = (uintptr_t) hostaddrs[i];
900 if (!GOMP_MAP_POINTER_P (kind & typemask))
901 k->host_end = k->host_start + sizes[i];
902 else
903 k->host_end = k->host_start + sizeof (void *);
904 splay_tree_key n = splay_tree_lookup (mem_map, k);
905 if (n && n->refcount != REFCOUNT_LINK)
906 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
907 kind & typemask, cbufp);
908 else
910 k->link_key = NULL;
911 if (n && n->refcount == REFCOUNT_LINK)
913 /* Replace target address of the pointer with target address
914 of mapped object in the splay tree. */
915 splay_tree_remove (mem_map, n);
916 k->link_key = n;
918 size_t align = (size_t) 1 << (kind >> rshift);
919 tgt->list[i].key = k;
920 k->tgt = tgt;
921 if (field_tgt_clear != FIELD_TGT_EMPTY)
923 k->tgt_offset = k->host_start - field_tgt_base
924 + field_tgt_offset;
925 if (i == field_tgt_clear)
926 field_tgt_clear = FIELD_TGT_EMPTY;
928 else
930 tgt_size = (tgt_size + align - 1) & ~(align - 1);
931 k->tgt_offset = tgt_size;
932 tgt_size += k->host_end - k->host_start;
934 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
935 tgt->list[i].always_copy_from
936 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
937 tgt->list[i].offset = 0;
938 tgt->list[i].length = k->host_end - k->host_start;
939 k->refcount = 1;
940 k->dynamic_refcount = 0;
941 tgt->refcount++;
942 array->left = NULL;
943 array->right = NULL;
944 splay_tree_insert (mem_map, array);
945 switch (kind & typemask)
947 case GOMP_MAP_ALLOC:
948 case GOMP_MAP_FROM:
949 case GOMP_MAP_FORCE_ALLOC:
950 case GOMP_MAP_FORCE_FROM:
951 case GOMP_MAP_ALWAYS_FROM:
952 break;
953 case GOMP_MAP_TO:
954 case GOMP_MAP_TOFROM:
955 case GOMP_MAP_FORCE_TO:
956 case GOMP_MAP_FORCE_TOFROM:
957 case GOMP_MAP_ALWAYS_TO:
958 case GOMP_MAP_ALWAYS_TOFROM:
959 gomp_copy_host2dev (devicep, aq,
960 (void *) (tgt->tgt_start
961 + k->tgt_offset),
962 (void *) k->host_start,
963 k->host_end - k->host_start, cbufp);
964 break;
965 case GOMP_MAP_POINTER:
966 gomp_map_pointer (tgt, aq,
967 (uintptr_t) *(void **) k->host_start,
968 k->tgt_offset, sizes[i], cbufp);
969 break;
970 case GOMP_MAP_TO_PSET:
971 gomp_copy_host2dev (devicep, aq,
972 (void *) (tgt->tgt_start
973 + k->tgt_offset),
974 (void *) k->host_start,
975 k->host_end - k->host_start, cbufp);
977 for (j = i + 1; j < mapnum; j++)
978 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
980 & typemask))
981 break;
982 else if ((uintptr_t) hostaddrs[j] < k->host_start
983 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
984 > k->host_end))
985 break;
986 else
988 tgt->list[j].key = k;
989 tgt->list[j].copy_from = false;
990 tgt->list[j].always_copy_from = false;
991 if (k->refcount != REFCOUNT_INFINITY)
992 k->refcount++;
993 gomp_map_pointer (tgt, aq,
994 (uintptr_t) *(void **) hostaddrs[j],
995 k->tgt_offset
996 + ((uintptr_t) hostaddrs[j]
997 - k->host_start),
998 sizes[j], cbufp);
999 i++;
1001 break;
1002 case GOMP_MAP_FORCE_PRESENT:
1004 /* We already looked up the memory region above and it
1005 was missing. */
1006 size_t size = k->host_end - k->host_start;
1007 gomp_mutex_unlock (&devicep->lock);
1008 #ifdef HAVE_INTTYPES_H
1009 gomp_fatal ("present clause: !acc_is_present (%p, "
1010 "%"PRIu64" (0x%"PRIx64"))",
1011 (void *) k->host_start,
1012 (uint64_t) size, (uint64_t) size);
1013 #else
1014 gomp_fatal ("present clause: !acc_is_present (%p, "
1015 "%lu (0x%lx))", (void *) k->host_start,
1016 (unsigned long) size, (unsigned long) size);
1017 #endif
1019 break;
1020 case GOMP_MAP_FORCE_DEVICEPTR:
1021 assert (k->host_end - k->host_start == sizeof (void *));
1022 gomp_copy_host2dev (devicep, aq,
1023 (void *) (tgt->tgt_start
1024 + k->tgt_offset),
1025 (void *) k->host_start,
1026 sizeof (void *), cbufp);
1027 break;
1028 default:
1029 gomp_mutex_unlock (&devicep->lock);
1030 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1031 kind);
1034 if (k->link_key)
1036 /* Set link pointer on target to the device address of the
1037 mapped object. */
1038 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1039 /* We intentionally do not use coalescing here, as it's not
1040 data allocated by the current call to this function. */
1041 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1042 &tgt_addr, sizeof (void *), NULL);
1044 array++;
1049 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1051 for (i = 0; i < mapnum; i++)
1053 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1054 gomp_copy_host2dev (devicep, aq,
1055 (void *) (tgt->tgt_start + i * sizeof (void *)),
1056 (void *) &cur_node.tgt_offset, sizeof (void *),
1057 cbufp);
1061 if (cbufp)
1063 long c = 0;
1064 for (c = 0; c < cbuf.chunk_cnt; ++c)
1065 gomp_copy_host2dev (devicep, aq,
1066 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1067 (char *) cbuf.buf + (cbuf.chunks[c].start
1068 - cbuf.chunks[0].start),
1069 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1070 free (cbuf.buf);
1071 cbuf.buf = NULL;
1072 cbufp = NULL;
1075 /* If the variable from "omp target enter data" map-list was already mapped,
1076 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1077 gomp_exit_data. */
1078 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1080 free (tgt);
1081 tgt = NULL;
1084 gomp_mutex_unlock (&devicep->lock);
1085 return tgt;
1088 attribute_hidden struct target_mem_desc *
1089 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1090 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1091 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1093 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1094 sizes, kinds, short_mapkind, pragma_kind);
1097 attribute_hidden struct target_mem_desc *
1098 gomp_map_vars_async (struct gomp_device_descr *devicep,
1099 struct goacc_asyncqueue *aq, size_t mapnum,
1100 void **hostaddrs, void **devaddrs, size_t *sizes,
1101 void *kinds, bool short_mapkind,
1102 enum gomp_map_vars_kind pragma_kind)
1104 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1105 sizes, kinds, short_mapkind, pragma_kind);
1108 attribute_hidden void
1109 gomp_unmap_tgt (struct target_mem_desc *tgt)
1111 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1112 if (tgt->tgt_end)
1113 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1115 free (tgt->array);
1116 free (tgt);
1119 attribute_hidden bool
1120 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1122 bool is_tgt_unmapped = false;
1123 splay_tree_remove (&devicep->mem_map, k);
1124 if (k->link_key)
1125 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1126 if (k->tgt->refcount > 1)
1127 k->tgt->refcount--;
1128 else
1130 is_tgt_unmapped = true;
1131 gomp_unmap_tgt (k->tgt);
1133 return is_tgt_unmapped;
1136 static void
1137 gomp_unref_tgt (void *ptr)
1139 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1141 if (tgt->refcount > 1)
1142 tgt->refcount--;
1143 else
1144 gomp_unmap_tgt (tgt);
1147 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1148 variables back from device to host: if it is false, it is assumed that this
1149 has been done already. */
1151 static inline __attribute__((always_inline)) void
1152 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1153 struct goacc_asyncqueue *aq)
1155 struct gomp_device_descr *devicep = tgt->device_descr;
1157 if (tgt->list_count == 0)
1159 free (tgt);
1160 return;
1163 gomp_mutex_lock (&devicep->lock);
1164 if (devicep->state == GOMP_DEVICE_FINALIZED)
1166 gomp_mutex_unlock (&devicep->lock);
1167 free (tgt->array);
1168 free (tgt);
1169 return;
1172 size_t i;
1173 for (i = 0; i < tgt->list_count; i++)
1175 splay_tree_key k = tgt->list[i].key;
1176 if (k == NULL)
1177 continue;
1179 bool do_unmap = false;
1180 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1181 k->refcount--;
1182 else if (k->refcount == 1)
1184 k->refcount--;
1185 do_unmap = true;
1188 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1189 || tgt->list[i].always_copy_from)
1190 gomp_copy_dev2host (devicep, aq,
1191 (void *) (k->host_start + tgt->list[i].offset),
1192 (void *) (k->tgt->tgt_start + k->tgt_offset
1193 + tgt->list[i].offset),
1194 tgt->list[i].length);
1195 if (do_unmap)
1196 gomp_remove_var (devicep, k);
1199 if (aq)
1200 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1201 (void *) tgt);
1202 else
1203 gomp_unref_tgt ((void *) tgt);
1205 gomp_mutex_unlock (&devicep->lock);
1208 attribute_hidden void
1209 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1211 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1214 attribute_hidden void
1215 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1216 struct goacc_asyncqueue *aq)
1218 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1221 static void
1222 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1223 size_t *sizes, void *kinds, bool short_mapkind)
1225 size_t i;
1226 struct splay_tree_key_s cur_node;
1227 const int typemask = short_mapkind ? 0xff : 0x7;
1229 if (!devicep)
1230 return;
1232 if (mapnum == 0)
1233 return;
1235 gomp_mutex_lock (&devicep->lock);
1236 if (devicep->state == GOMP_DEVICE_FINALIZED)
1238 gomp_mutex_unlock (&devicep->lock);
1239 return;
1242 for (i = 0; i < mapnum; i++)
1243 if (sizes[i])
1245 cur_node.host_start = (uintptr_t) hostaddrs[i];
1246 cur_node.host_end = cur_node.host_start + sizes[i];
1247 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1248 if (n)
1250 int kind = get_kind (short_mapkind, kinds, i);
1251 if (n->host_start > cur_node.host_start
1252 || n->host_end < cur_node.host_end)
1254 gomp_mutex_unlock (&devicep->lock);
1255 gomp_fatal ("Trying to update [%p..%p) object when "
1256 "only [%p..%p) is mapped",
1257 (void *) cur_node.host_start,
1258 (void *) cur_node.host_end,
1259 (void *) n->host_start,
1260 (void *) n->host_end);
1264 void *hostaddr = (void *) cur_node.host_start;
1265 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1266 + cur_node.host_start - n->host_start);
1267 size_t size = cur_node.host_end - cur_node.host_start;
1269 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1270 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1271 NULL);
1272 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1273 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1276 gomp_mutex_unlock (&devicep->lock);
1279 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1280 And insert to splay tree the mapping between addresses from HOST_TABLE and
1281 from loaded target image. We rely in the host and device compiler
1282 emitting variable and functions in the same order. */
1284 static void
1285 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1286 const void *host_table, const void *target_data,
1287 bool is_register_lock)
1289 void **host_func_table = ((void ***) host_table)[0];
1290 void **host_funcs_end = ((void ***) host_table)[1];
1291 void **host_var_table = ((void ***) host_table)[2];
1292 void **host_vars_end = ((void ***) host_table)[3];
1294 /* The func table contains only addresses, the var table contains addresses
1295 and corresponding sizes. */
1296 int num_funcs = host_funcs_end - host_func_table;
1297 int num_vars = (host_vars_end - host_var_table) / 2;
1299 /* Load image to device and get target addresses for the image. */
1300 struct addr_pair *target_table = NULL;
1301 int i, num_target_entries;
1303 num_target_entries
1304 = devicep->load_image_func (devicep->target_id, version,
1305 target_data, &target_table);
1307 if (num_target_entries != num_funcs + num_vars)
1309 gomp_mutex_unlock (&devicep->lock);
1310 if (is_register_lock)
1311 gomp_mutex_unlock (&register_lock);
1312 gomp_fatal ("Cannot map target functions or variables"
1313 " (expected %u, have %u)", num_funcs + num_vars,
1314 num_target_entries);
1317 /* Insert host-target address mapping into splay tree. */
1318 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1319 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1320 tgt->refcount = REFCOUNT_INFINITY;
1321 tgt->tgt_start = 0;
1322 tgt->tgt_end = 0;
1323 tgt->to_free = NULL;
1324 tgt->prev = NULL;
1325 tgt->list_count = 0;
1326 tgt->device_descr = devicep;
1327 splay_tree_node array = tgt->array;
1329 for (i = 0; i < num_funcs; i++)
1331 splay_tree_key k = &array->key;
1332 k->host_start = (uintptr_t) host_func_table[i];
1333 k->host_end = k->host_start + 1;
1334 k->tgt = tgt;
1335 k->tgt_offset = target_table[i].start;
1336 k->refcount = REFCOUNT_INFINITY;
1337 k->link_key = NULL;
1338 array->left = NULL;
1339 array->right = NULL;
1340 splay_tree_insert (&devicep->mem_map, array);
1341 array++;
1344 /* Most significant bit of the size in host and target tables marks
1345 "omp declare target link" variables. */
1346 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1347 const uintptr_t size_mask = ~link_bit;
1349 for (i = 0; i < num_vars; i++)
1351 struct addr_pair *target_var = &target_table[num_funcs + i];
1352 uintptr_t target_size = target_var->end - target_var->start;
1354 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1356 gomp_mutex_unlock (&devicep->lock);
1357 if (is_register_lock)
1358 gomp_mutex_unlock (&register_lock);
1359 gomp_fatal ("Cannot map target variables (size mismatch)");
1362 splay_tree_key k = &array->key;
1363 k->host_start = (uintptr_t) host_var_table[i * 2];
1364 k->host_end
1365 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1366 k->tgt = tgt;
1367 k->tgt_offset = target_var->start;
1368 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1369 k->link_key = NULL;
1370 array->left = NULL;
1371 array->right = NULL;
1372 splay_tree_insert (&devicep->mem_map, array);
1373 array++;
1376 free (target_table);
1379 /* Unload the mappings described by target_data from device DEVICE_P.
1380 The device must be locked. */
1382 static void
1383 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1384 unsigned version,
1385 const void *host_table, const void *target_data)
1387 void **host_func_table = ((void ***) host_table)[0];
1388 void **host_funcs_end = ((void ***) host_table)[1];
1389 void **host_var_table = ((void ***) host_table)[2];
1390 void **host_vars_end = ((void ***) host_table)[3];
1392 /* The func table contains only addresses, the var table contains addresses
1393 and corresponding sizes. */
1394 int num_funcs = host_funcs_end - host_func_table;
1395 int num_vars = (host_vars_end - host_var_table) / 2;
1397 struct splay_tree_key_s k;
1398 splay_tree_key node = NULL;
1400 /* Find mapping at start of node array */
1401 if (num_funcs || num_vars)
1403 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1404 : (uintptr_t) host_var_table[0]);
1405 k.host_end = k.host_start + 1;
1406 node = splay_tree_lookup (&devicep->mem_map, &k);
1409 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1411 gomp_mutex_unlock (&devicep->lock);
1412 gomp_fatal ("image unload fail");
1415 /* Remove mappings from splay tree. */
1416 int i;
1417 for (i = 0; i < num_funcs; i++)
1419 k.host_start = (uintptr_t) host_func_table[i];
1420 k.host_end = k.host_start + 1;
1421 splay_tree_remove (&devicep->mem_map, &k);
1424 /* Most significant bit of the size in host and target tables marks
1425 "omp declare target link" variables. */
1426 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1427 const uintptr_t size_mask = ~link_bit;
1428 bool is_tgt_unmapped = false;
1430 for (i = 0; i < num_vars; i++)
1432 k.host_start = (uintptr_t) host_var_table[i * 2];
1433 k.host_end
1434 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1436 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1437 splay_tree_remove (&devicep->mem_map, &k);
1438 else
1440 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1441 is_tgt_unmapped = gomp_remove_var (devicep, n);
1445 if (node && !is_tgt_unmapped)
1447 free (node->tgt);
1448 free (node);
1452 /* This function should be called from every offload image while loading.
1453 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1454 the target, and TARGET_DATA needed by target plugin. */
1456 void
1457 GOMP_offload_register_ver (unsigned version, const void *host_table,
1458 int target_type, const void *target_data)
1460 int i;
1462 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1463 gomp_fatal ("Library too old for offload (version %u < %u)",
1464 GOMP_VERSION, GOMP_VERSION_LIB (version));
1466 gomp_mutex_lock (&register_lock);
1468 /* Load image to all initialized devices. */
1469 for (i = 0; i < num_devices; i++)
1471 struct gomp_device_descr *devicep = &devices[i];
1472 gomp_mutex_lock (&devicep->lock);
1473 if (devicep->type == target_type
1474 && devicep->state == GOMP_DEVICE_INITIALIZED)
1475 gomp_load_image_to_device (devicep, version,
1476 host_table, target_data, true);
1477 gomp_mutex_unlock (&devicep->lock);
1480 /* Insert image to array of pending images. */
1481 offload_images
1482 = gomp_realloc_unlock (offload_images,
1483 (num_offload_images + 1)
1484 * sizeof (struct offload_image_descr));
1485 offload_images[num_offload_images].version = version;
1486 offload_images[num_offload_images].type = target_type;
1487 offload_images[num_offload_images].host_table = host_table;
1488 offload_images[num_offload_images].target_data = target_data;
1490 num_offload_images++;
1491 gomp_mutex_unlock (&register_lock);
1494 void
1495 GOMP_offload_register (const void *host_table, int target_type,
1496 const void *target_data)
1498 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1501 /* This function should be called from every offload image while unloading.
1502 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1503 the target, and TARGET_DATA needed by target plugin. */
1505 void
1506 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1507 int target_type, const void *target_data)
1509 int i;
1511 gomp_mutex_lock (&register_lock);
1513 /* Unload image from all initialized devices. */
1514 for (i = 0; i < num_devices; i++)
1516 struct gomp_device_descr *devicep = &devices[i];
1517 gomp_mutex_lock (&devicep->lock);
1518 if (devicep->type == target_type
1519 && devicep->state == GOMP_DEVICE_INITIALIZED)
1520 gomp_unload_image_from_device (devicep, version,
1521 host_table, target_data);
1522 gomp_mutex_unlock (&devicep->lock);
1525 /* Remove image from array of pending images. */
1526 for (i = 0; i < num_offload_images; i++)
1527 if (offload_images[i].target_data == target_data)
1529 offload_images[i] = offload_images[--num_offload_images];
1530 break;
1533 gomp_mutex_unlock (&register_lock);
1536 void
1537 GOMP_offload_unregister (const void *host_table, int target_type,
1538 const void *target_data)
1540 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1543 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1544 must be locked on entry, and remains locked on return. */
1546 attribute_hidden void
1547 gomp_init_device (struct gomp_device_descr *devicep)
1549 int i;
1550 if (!devicep->init_device_func (devicep->target_id))
1552 gomp_mutex_unlock (&devicep->lock);
1553 gomp_fatal ("device initialization failed");
1556 /* Load to device all images registered by the moment. */
1557 for (i = 0; i < num_offload_images; i++)
1559 struct offload_image_descr *image = &offload_images[i];
1560 if (image->type == devicep->type)
1561 gomp_load_image_to_device (devicep, image->version,
1562 image->host_table, image->target_data,
1563 false);
1566 /* Initialize OpenACC asynchronous queues. */
1567 goacc_init_asyncqueues (devicep);
1569 devicep->state = GOMP_DEVICE_INITIALIZED;
1572 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1573 must be locked on entry, and remains locked on return. */
1575 attribute_hidden bool
1576 gomp_fini_device (struct gomp_device_descr *devicep)
1578 bool ret = goacc_fini_asyncqueues (devicep);
1579 ret &= devicep->fini_device_func (devicep->target_id);
1580 devicep->state = GOMP_DEVICE_FINALIZED;
1581 return ret;
1584 attribute_hidden void
1585 gomp_unload_device (struct gomp_device_descr *devicep)
1587 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1589 unsigned i;
1591 /* Unload from device all images registered at the moment. */
1592 for (i = 0; i < num_offload_images; i++)
1594 struct offload_image_descr *image = &offload_images[i];
1595 if (image->type == devicep->type)
1596 gomp_unload_image_from_device (devicep, image->version,
1597 image->host_table,
1598 image->target_data);
1603 /* Free address mapping tables. MM must be locked on entry, and remains locked
1604 on return. */
1606 attribute_hidden void
1607 gomp_free_memmap (struct splay_tree_s *mem_map)
1609 while (mem_map->root)
1611 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1613 splay_tree_remove (mem_map, &mem_map->root->key);
1614 free (tgt->array);
1615 free (tgt);
1619 /* Host fallback for GOMP_target{,_ext} routines. */
1621 static void
1622 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1624 struct gomp_thread old_thr, *thr = gomp_thread ();
1625 old_thr = *thr;
1626 memset (thr, '\0', sizeof (*thr));
1627 if (gomp_places_list)
1629 thr->place = old_thr.place;
1630 thr->ts.place_partition_len = gomp_places_list_len;
1632 fn (hostaddrs);
1633 gomp_free_thread (thr);
1634 *thr = old_thr;
1637 /* Calculate alignment and size requirements of a private copy of data shared
1638 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1640 static inline void
1641 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1642 unsigned short *kinds, size_t *tgt_align,
1643 size_t *tgt_size)
1645 size_t i;
1646 for (i = 0; i < mapnum; i++)
1647 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1649 size_t align = (size_t) 1 << (kinds[i] >> 8);
1650 if (*tgt_align < align)
1651 *tgt_align = align;
1652 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1653 *tgt_size += sizes[i];
1657 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1659 static inline void
1660 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1661 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1662 size_t tgt_size)
1664 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1665 if (al)
1666 tgt += tgt_align - al;
1667 tgt_size = 0;
1668 size_t i;
1669 for (i = 0; i < mapnum; i++)
1670 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1672 size_t align = (size_t) 1 << (kinds[i] >> 8);
1673 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1674 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1675 hostaddrs[i] = tgt + tgt_size;
1676 tgt_size = tgt_size + sizes[i];
1680 /* Helper function of GOMP_target{,_ext} routines. */
1682 static void *
1683 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1684 void (*host_fn) (void *))
1686 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1687 return (void *) host_fn;
1688 else
1690 gomp_mutex_lock (&devicep->lock);
1691 if (devicep->state == GOMP_DEVICE_FINALIZED)
1693 gomp_mutex_unlock (&devicep->lock);
1694 return NULL;
1697 struct splay_tree_key_s k;
1698 k.host_start = (uintptr_t) host_fn;
1699 k.host_end = k.host_start + 1;
1700 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1701 gomp_mutex_unlock (&devicep->lock);
1702 if (tgt_fn == NULL)
1703 return NULL;
1705 return (void *) tgt_fn->tgt_offset;
1709 /* Called when encountering a target directive. If DEVICE
1710 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1711 GOMP_DEVICE_HOST_FALLBACK (or any value
1712 larger than last available hw device), use host fallback.
1713 FN is address of host code, UNUSED is part of the current ABI, but
1714 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1715 with MAPNUM entries, with addresses of the host objects,
1716 sizes of the host objects (resp. for pointer kind pointer bias
1717 and assumed sizeof (void *) size) and kinds. */
1719 void
1720 GOMP_target (int device, void (*fn) (void *), const void *unused,
1721 size_t mapnum, void **hostaddrs, size_t *sizes,
1722 unsigned char *kinds)
1724 struct gomp_device_descr *devicep = resolve_device (device);
1726 void *fn_addr;
1727 if (devicep == NULL
1728 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1729 /* All shared memory devices should use the GOMP_target_ext function. */
1730 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1731 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1732 return gomp_target_fallback (fn, hostaddrs);
1734 struct target_mem_desc *tgt_vars
1735 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1736 GOMP_MAP_VARS_TARGET);
1737 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1738 NULL);
1739 gomp_unmap_vars (tgt_vars, true);
1742 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1743 and several arguments have been added:
1744 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1745 DEPEND is array of dependencies, see GOMP_task for details.
1747 ARGS is a pointer to an array consisting of a variable number of both
1748 device-independent and device-specific arguments, which can take one two
1749 elements where the first specifies for which device it is intended, the type
1750 and optionally also the value. If the value is not present in the first
1751 one, the whole second element the actual value. The last element of the
1752 array is a single NULL. Among the device independent can be for example
1753 NUM_TEAMS and THREAD_LIMIT.
1755 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1756 that value, or 1 if teams construct is not present, or 0, if
1757 teams construct does not have num_teams clause and so the choice is
1758 implementation defined, and -1 if it can't be determined on the host
1759 what value will GOMP_teams have on the device.
1760 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1761 body with that value, or 0, if teams construct does not have thread_limit
1762 clause or the teams construct is not present, or -1 if it can't be
1763 determined on the host what value will GOMP_teams have on the device. */
1765 void
1766 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1767 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1768 unsigned int flags, void **depend, void **args)
1770 struct gomp_device_descr *devicep = resolve_device (device);
1771 size_t tgt_align = 0, tgt_size = 0;
1772 bool fpc_done = false;
1774 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1776 struct gomp_thread *thr = gomp_thread ();
1777 /* Create a team if we don't have any around, as nowait
1778 target tasks make sense to run asynchronously even when
1779 outside of any parallel. */
1780 if (__builtin_expect (thr->ts.team == NULL, 0))
1782 struct gomp_team *team = gomp_new_team (1);
1783 struct gomp_task *task = thr->task;
1784 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1785 team->prev_ts = thr->ts;
1786 thr->ts.team = team;
1787 thr->ts.team_id = 0;
1788 thr->ts.work_share = &team->work_shares[0];
1789 thr->ts.last_work_share = NULL;
1790 #ifdef HAVE_SYNC_BUILTINS
1791 thr->ts.single_count = 0;
1792 #endif
1793 thr->ts.static_trip = 0;
1794 thr->task = &team->implicit_task[0];
1795 gomp_init_task (thr->task, NULL, icv);
1796 if (task)
1798 thr->task = task;
1799 gomp_end_task ();
1800 free (task);
1801 thr->task = &team->implicit_task[0];
1803 else
1804 pthread_setspecific (gomp_thread_destructor, thr);
1806 if (thr->ts.team
1807 && !thr->task->final_task)
1809 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1810 sizes, kinds, flags, depend, args,
1811 GOMP_TARGET_TASK_BEFORE_MAP);
1812 return;
1816 /* If there are depend clauses, but nowait is not present
1817 (or we are in a final task), block the parent task until the
1818 dependencies are resolved and then just continue with the rest
1819 of the function as if it is a merged task. */
1820 if (depend != NULL)
1822 struct gomp_thread *thr = gomp_thread ();
1823 if (thr->task && thr->task->depend_hash)
1825 /* If we might need to wait, copy firstprivate now. */
1826 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1827 &tgt_align, &tgt_size);
1828 if (tgt_align)
1830 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1831 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1832 tgt_align, tgt_size);
1834 fpc_done = true;
1835 gomp_task_maybe_wait_for_dependencies (depend);
1839 void *fn_addr;
1840 if (devicep == NULL
1841 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1842 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1843 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1845 if (!fpc_done)
1847 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1848 &tgt_align, &tgt_size);
1849 if (tgt_align)
1851 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1852 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1853 tgt_align, tgt_size);
1856 gomp_target_fallback (fn, hostaddrs);
1857 return;
1860 struct target_mem_desc *tgt_vars;
1861 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1863 if (!fpc_done)
1865 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1866 &tgt_align, &tgt_size);
1867 if (tgt_align)
1869 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1870 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1871 tgt_align, tgt_size);
1874 tgt_vars = NULL;
1876 else
1877 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1878 true, GOMP_MAP_VARS_TARGET);
1879 devicep->run_func (devicep->target_id, fn_addr,
1880 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1881 args);
1882 if (tgt_vars)
1883 gomp_unmap_vars (tgt_vars, true);
1886 /* Host fallback for GOMP_target_data{,_ext} routines. */
1888 static void
1889 gomp_target_data_fallback (void)
1891 struct gomp_task_icv *icv = gomp_icv (false);
1892 if (icv->target_data)
1894 /* Even when doing a host fallback, if there are any active
1895 #pragma omp target data constructs, need to remember the
1896 new #pragma omp target data, otherwise GOMP_target_end_data
1897 would get out of sync. */
1898 struct target_mem_desc *tgt
1899 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1900 GOMP_MAP_VARS_DATA);
1901 tgt->prev = icv->target_data;
1902 icv->target_data = tgt;
1906 void
1907 GOMP_target_data (int device, const void *unused, size_t mapnum,
1908 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1910 struct gomp_device_descr *devicep = resolve_device (device);
1912 if (devicep == NULL
1913 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1914 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1915 return gomp_target_data_fallback ();
1917 struct target_mem_desc *tgt
1918 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1919 GOMP_MAP_VARS_DATA);
1920 struct gomp_task_icv *icv = gomp_icv (true);
1921 tgt->prev = icv->target_data;
1922 icv->target_data = tgt;
1925 void
1926 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1927 size_t *sizes, unsigned short *kinds)
1929 struct gomp_device_descr *devicep = resolve_device (device);
1931 if (devicep == NULL
1932 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1933 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1934 return gomp_target_data_fallback ();
1936 struct target_mem_desc *tgt
1937 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1938 GOMP_MAP_VARS_DATA);
1939 struct gomp_task_icv *icv = gomp_icv (true);
1940 tgt->prev = icv->target_data;
1941 icv->target_data = tgt;
1944 void
1945 GOMP_target_end_data (void)
1947 struct gomp_task_icv *icv = gomp_icv (false);
1948 if (icv->target_data)
1950 struct target_mem_desc *tgt = icv->target_data;
1951 icv->target_data = tgt->prev;
1952 gomp_unmap_vars (tgt, true);
1956 void
1957 GOMP_target_update (int device, const void *unused, size_t mapnum,
1958 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1960 struct gomp_device_descr *devicep = resolve_device (device);
1962 if (devicep == NULL
1963 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1964 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1965 return;
1967 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1970 void
1971 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1972 size_t *sizes, unsigned short *kinds,
1973 unsigned int flags, void **depend)
1975 struct gomp_device_descr *devicep = resolve_device (device);
1977 /* If there are depend clauses, but nowait is not present,
1978 block the parent task until the dependencies are resolved
1979 and then just continue with the rest of the function as if it
1980 is a merged task. Until we are able to schedule task during
1981 variable mapping or unmapping, ignore nowait if depend clauses
1982 are not present. */
1983 if (depend != NULL)
1985 struct gomp_thread *thr = gomp_thread ();
1986 if (thr->task && thr->task->depend_hash)
1988 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1989 && thr->ts.team
1990 && !thr->task->final_task)
1992 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1993 mapnum, hostaddrs, sizes, kinds,
1994 flags | GOMP_TARGET_FLAG_UPDATE,
1995 depend, NULL, GOMP_TARGET_TASK_DATA))
1996 return;
1998 else
2000 struct gomp_team *team = thr->ts.team;
2001 /* If parallel or taskgroup has been cancelled, don't start new
2002 tasks. */
2003 if (__builtin_expect (gomp_cancel_var, 0) && team)
2005 if (gomp_team_barrier_cancelled (&team->barrier))
2006 return;
2007 if (thr->task->taskgroup)
2009 if (thr->task->taskgroup->cancelled)
2010 return;
2011 if (thr->task->taskgroup->workshare
2012 && thr->task->taskgroup->prev
2013 && thr->task->taskgroup->prev->cancelled)
2014 return;
2018 gomp_task_maybe_wait_for_dependencies (depend);
2023 if (devicep == NULL
2024 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2025 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2026 return;
2028 struct gomp_thread *thr = gomp_thread ();
2029 struct gomp_team *team = thr->ts.team;
2030 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2031 if (__builtin_expect (gomp_cancel_var, 0) && team)
2033 if (gomp_team_barrier_cancelled (&team->barrier))
2034 return;
2035 if (thr->task->taskgroup)
2037 if (thr->task->taskgroup->cancelled)
2038 return;
2039 if (thr->task->taskgroup->workshare
2040 && thr->task->taskgroup->prev
2041 && thr->task->taskgroup->prev->cancelled)
2042 return;
2046 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2049 static void
2050 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2051 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2053 const int typemask = 0xff;
2054 size_t i;
2055 gomp_mutex_lock (&devicep->lock);
2056 if (devicep->state == GOMP_DEVICE_FINALIZED)
2058 gomp_mutex_unlock (&devicep->lock);
2059 return;
2062 for (i = 0; i < mapnum; i++)
2064 struct splay_tree_key_s cur_node;
2065 unsigned char kind = kinds[i] & typemask;
2066 switch (kind)
2068 case GOMP_MAP_FROM:
2069 case GOMP_MAP_ALWAYS_FROM:
2070 case GOMP_MAP_DELETE:
2071 case GOMP_MAP_RELEASE:
2072 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2073 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2074 cur_node.host_start = (uintptr_t) hostaddrs[i];
2075 cur_node.host_end = cur_node.host_start + sizes[i];
2076 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2077 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2078 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2079 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2080 if (!k)
2081 continue;
2083 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2084 k->refcount--;
2085 if ((kind == GOMP_MAP_DELETE
2086 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2087 && k->refcount != REFCOUNT_INFINITY)
2088 k->refcount = 0;
2090 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2091 || kind == GOMP_MAP_ALWAYS_FROM)
2092 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2093 (void *) (k->tgt->tgt_start + k->tgt_offset
2094 + cur_node.host_start
2095 - k->host_start),
2096 cur_node.host_end - cur_node.host_start);
2097 if (k->refcount == 0)
2099 splay_tree_remove (&devicep->mem_map, k);
2100 if (k->link_key)
2101 splay_tree_insert (&devicep->mem_map,
2102 (splay_tree_node) k->link_key);
2103 if (k->tgt->refcount > 1)
2104 k->tgt->refcount--;
2105 else
2106 gomp_unmap_tgt (k->tgt);
2109 break;
2110 default:
2111 gomp_mutex_unlock (&devicep->lock);
2112 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2113 kind);
2117 gomp_mutex_unlock (&devicep->lock);
2120 void
2121 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2122 size_t *sizes, unsigned short *kinds,
2123 unsigned int flags, void **depend)
2125 struct gomp_device_descr *devicep = resolve_device (device);
2127 /* If there are depend clauses, but nowait is not present,
2128 block the parent task until the dependencies are resolved
2129 and then just continue with the rest of the function as if it
2130 is a merged task. Until we are able to schedule task during
2131 variable mapping or unmapping, ignore nowait if depend clauses
2132 are not present. */
2133 if (depend != NULL)
2135 struct gomp_thread *thr = gomp_thread ();
2136 if (thr->task && thr->task->depend_hash)
2138 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2139 && thr->ts.team
2140 && !thr->task->final_task)
2142 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2143 mapnum, hostaddrs, sizes, kinds,
2144 flags, depend, NULL,
2145 GOMP_TARGET_TASK_DATA))
2146 return;
2148 else
2150 struct gomp_team *team = thr->ts.team;
2151 /* If parallel or taskgroup has been cancelled, don't start new
2152 tasks. */
2153 if (__builtin_expect (gomp_cancel_var, 0) && team)
2155 if (gomp_team_barrier_cancelled (&team->barrier))
2156 return;
2157 if (thr->task->taskgroup)
2159 if (thr->task->taskgroup->cancelled)
2160 return;
2161 if (thr->task->taskgroup->workshare
2162 && thr->task->taskgroup->prev
2163 && thr->task->taskgroup->prev->cancelled)
2164 return;
2168 gomp_task_maybe_wait_for_dependencies (depend);
2173 if (devicep == NULL
2174 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2175 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2176 return;
2178 struct gomp_thread *thr = gomp_thread ();
2179 struct gomp_team *team = thr->ts.team;
2180 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2181 if (__builtin_expect (gomp_cancel_var, 0) && team)
2183 if (gomp_team_barrier_cancelled (&team->barrier))
2184 return;
2185 if (thr->task->taskgroup)
2187 if (thr->task->taskgroup->cancelled)
2188 return;
2189 if (thr->task->taskgroup->workshare
2190 && thr->task->taskgroup->prev
2191 && thr->task->taskgroup->prev->cancelled)
2192 return;
2196 size_t i;
2197 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2198 for (i = 0; i < mapnum; i++)
2199 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2201 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2202 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2203 i += sizes[i];
2205 else
2206 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2207 true, GOMP_MAP_VARS_ENTER_DATA);
2208 else
2209 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2212 bool
2213 gomp_target_task_fn (void *data)
2215 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2216 struct gomp_device_descr *devicep = ttask->devicep;
2218 if (ttask->fn != NULL)
2220 void *fn_addr;
2221 if (devicep == NULL
2222 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2223 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2224 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2226 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2227 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2228 return false;
2231 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2233 if (ttask->tgt)
2234 gomp_unmap_vars (ttask->tgt, true);
2235 return false;
2238 void *actual_arguments;
2239 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2241 ttask->tgt = NULL;
2242 actual_arguments = ttask->hostaddrs;
2244 else
2246 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2247 NULL, ttask->sizes, ttask->kinds, true,
2248 GOMP_MAP_VARS_TARGET);
2249 actual_arguments = (void *) ttask->tgt->tgt_start;
2251 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2253 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2254 ttask->args, (void *) ttask);
2255 return true;
2257 else if (devicep == NULL
2258 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2259 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2260 return false;
2262 size_t i;
2263 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2264 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2265 ttask->kinds, true);
2266 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2267 for (i = 0; i < ttask->mapnum; i++)
2268 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2270 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2271 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2272 GOMP_MAP_VARS_ENTER_DATA);
2273 i += ttask->sizes[i];
2275 else
2276 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2277 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2278 else
2279 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2280 ttask->kinds);
2281 return false;
2284 void
2285 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2287 if (thread_limit)
2289 struct gomp_task_icv *icv = gomp_icv (true);
2290 icv->thread_limit_var
2291 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2293 (void) num_teams;
2296 void *
2297 omp_target_alloc (size_t size, int device_num)
2299 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2300 return malloc (size);
2302 if (device_num < 0)
2303 return NULL;
2305 struct gomp_device_descr *devicep = resolve_device (device_num);
2306 if (devicep == NULL)
2307 return NULL;
2309 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2310 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2311 return malloc (size);
2313 gomp_mutex_lock (&devicep->lock);
2314 void *ret = devicep->alloc_func (devicep->target_id, size);
2315 gomp_mutex_unlock (&devicep->lock);
2316 return ret;
2319 void
2320 omp_target_free (void *device_ptr, int device_num)
2322 if (device_ptr == NULL)
2323 return;
2325 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2327 free (device_ptr);
2328 return;
2331 if (device_num < 0)
2332 return;
2334 struct gomp_device_descr *devicep = resolve_device (device_num);
2335 if (devicep == NULL)
2336 return;
2338 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2339 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2341 free (device_ptr);
2342 return;
2345 gomp_mutex_lock (&devicep->lock);
2346 gomp_free_device_memory (devicep, device_ptr);
2347 gomp_mutex_unlock (&devicep->lock);
2351 omp_target_is_present (const void *ptr, int device_num)
2353 if (ptr == NULL)
2354 return 1;
2356 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2357 return 1;
2359 if (device_num < 0)
2360 return 0;
2362 struct gomp_device_descr *devicep = resolve_device (device_num);
2363 if (devicep == NULL)
2364 return 0;
2366 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2367 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2368 return 1;
2370 gomp_mutex_lock (&devicep->lock);
2371 struct splay_tree_s *mem_map = &devicep->mem_map;
2372 struct splay_tree_key_s cur_node;
2374 cur_node.host_start = (uintptr_t) ptr;
2375 cur_node.host_end = cur_node.host_start;
2376 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2377 int ret = n != NULL;
2378 gomp_mutex_unlock (&devicep->lock);
2379 return ret;
2383 omp_target_memcpy (void *dst, const void *src, size_t length,
2384 size_t dst_offset, size_t src_offset, int dst_device_num,
2385 int src_device_num)
2387 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2388 bool ret;
2390 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2392 if (dst_device_num < 0)
2393 return EINVAL;
2395 dst_devicep = resolve_device (dst_device_num);
2396 if (dst_devicep == NULL)
2397 return EINVAL;
2399 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2400 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2401 dst_devicep = NULL;
2403 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2405 if (src_device_num < 0)
2406 return EINVAL;
2408 src_devicep = resolve_device (src_device_num);
2409 if (src_devicep == NULL)
2410 return EINVAL;
2412 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2413 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2414 src_devicep = NULL;
2416 if (src_devicep == NULL && dst_devicep == NULL)
2418 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2419 return 0;
2421 if (src_devicep == NULL)
2423 gomp_mutex_lock (&dst_devicep->lock);
2424 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2425 (char *) dst + dst_offset,
2426 (char *) src + src_offset, length);
2427 gomp_mutex_unlock (&dst_devicep->lock);
2428 return (ret ? 0 : EINVAL);
2430 if (dst_devicep == NULL)
2432 gomp_mutex_lock (&src_devicep->lock);
2433 ret = src_devicep->dev2host_func (src_devicep->target_id,
2434 (char *) dst + dst_offset,
2435 (char *) src + src_offset, length);
2436 gomp_mutex_unlock (&src_devicep->lock);
2437 return (ret ? 0 : EINVAL);
2439 if (src_devicep == dst_devicep)
2441 gomp_mutex_lock (&src_devicep->lock);
2442 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2443 (char *) dst + dst_offset,
2444 (char *) src + src_offset, length);
2445 gomp_mutex_unlock (&src_devicep->lock);
2446 return (ret ? 0 : EINVAL);
2448 return EINVAL;
2451 static int
2452 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2453 int num_dims, const size_t *volume,
2454 const size_t *dst_offsets,
2455 const size_t *src_offsets,
2456 const size_t *dst_dimensions,
2457 const size_t *src_dimensions,
2458 struct gomp_device_descr *dst_devicep,
2459 struct gomp_device_descr *src_devicep)
2461 size_t dst_slice = element_size;
2462 size_t src_slice = element_size;
2463 size_t j, dst_off, src_off, length;
2464 int i, ret;
2466 if (num_dims == 1)
2468 if (__builtin_mul_overflow (element_size, volume[0], &length)
2469 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2470 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2471 return EINVAL;
2472 if (dst_devicep == NULL && src_devicep == NULL)
2474 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2475 length);
2476 ret = 1;
2478 else if (src_devicep == NULL)
2479 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2480 (char *) dst + dst_off,
2481 (const char *) src + src_off,
2482 length);
2483 else if (dst_devicep == NULL)
2484 ret = src_devicep->dev2host_func (src_devicep->target_id,
2485 (char *) dst + dst_off,
2486 (const char *) src + src_off,
2487 length);
2488 else if (src_devicep == dst_devicep)
2489 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2490 (char *) dst + dst_off,
2491 (const char *) src + src_off,
2492 length);
2493 else
2494 ret = 0;
2495 return ret ? 0 : EINVAL;
2498 /* FIXME: it would be nice to have some plugin function to handle
2499 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2500 be handled in the generic recursion below, and for host-host it
2501 should be used even for any num_dims >= 2. */
2503 for (i = 1; i < num_dims; i++)
2504 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2505 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2506 return EINVAL;
2507 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2508 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2509 return EINVAL;
2510 for (j = 0; j < volume[0]; j++)
2512 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2513 (const char *) src + src_off,
2514 element_size, num_dims - 1,
2515 volume + 1, dst_offsets + 1,
2516 src_offsets + 1, dst_dimensions + 1,
2517 src_dimensions + 1, dst_devicep,
2518 src_devicep);
2519 if (ret)
2520 return ret;
2521 dst_off += dst_slice;
2522 src_off += src_slice;
2524 return 0;
2528 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2529 int num_dims, const size_t *volume,
2530 const size_t *dst_offsets,
2531 const size_t *src_offsets,
2532 const size_t *dst_dimensions,
2533 const size_t *src_dimensions,
2534 int dst_device_num, int src_device_num)
2536 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2538 if (!dst && !src)
2539 return INT_MAX;
2541 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2543 if (dst_device_num < 0)
2544 return EINVAL;
2546 dst_devicep = resolve_device (dst_device_num);
2547 if (dst_devicep == NULL)
2548 return EINVAL;
2550 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2551 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2552 dst_devicep = NULL;
2554 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2556 if (src_device_num < 0)
2557 return EINVAL;
2559 src_devicep = resolve_device (src_device_num);
2560 if (src_devicep == NULL)
2561 return EINVAL;
2563 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2564 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2565 src_devicep = NULL;
2568 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2569 return EINVAL;
2571 if (src_devicep)
2572 gomp_mutex_lock (&src_devicep->lock);
2573 else if (dst_devicep)
2574 gomp_mutex_lock (&dst_devicep->lock);
2575 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2576 volume, dst_offsets, src_offsets,
2577 dst_dimensions, src_dimensions,
2578 dst_devicep, src_devicep);
2579 if (src_devicep)
2580 gomp_mutex_unlock (&src_devicep->lock);
2581 else if (dst_devicep)
2582 gomp_mutex_unlock (&dst_devicep->lock);
2583 return ret;
2587 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2588 size_t size, size_t device_offset, int device_num)
2590 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2591 return EINVAL;
2593 if (device_num < 0)
2594 return EINVAL;
2596 struct gomp_device_descr *devicep = resolve_device (device_num);
2597 if (devicep == NULL)
2598 return EINVAL;
2600 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2601 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2602 return EINVAL;
2604 gomp_mutex_lock (&devicep->lock);
2606 struct splay_tree_s *mem_map = &devicep->mem_map;
2607 struct splay_tree_key_s cur_node;
2608 int ret = EINVAL;
2610 cur_node.host_start = (uintptr_t) host_ptr;
2611 cur_node.host_end = cur_node.host_start + size;
2612 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2613 if (n)
2615 if (n->tgt->tgt_start + n->tgt_offset
2616 == (uintptr_t) device_ptr + device_offset
2617 && n->host_start <= cur_node.host_start
2618 && n->host_end >= cur_node.host_end)
2619 ret = 0;
2621 else
2623 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2624 tgt->array = gomp_malloc (sizeof (*tgt->array));
2625 tgt->refcount = 1;
2626 tgt->tgt_start = 0;
2627 tgt->tgt_end = 0;
2628 tgt->to_free = NULL;
2629 tgt->prev = NULL;
2630 tgt->list_count = 0;
2631 tgt->device_descr = devicep;
2632 splay_tree_node array = tgt->array;
2633 splay_tree_key k = &array->key;
2634 k->host_start = cur_node.host_start;
2635 k->host_end = cur_node.host_end;
2636 k->tgt = tgt;
2637 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2638 k->refcount = REFCOUNT_INFINITY;
2639 array->left = NULL;
2640 array->right = NULL;
2641 splay_tree_insert (&devicep->mem_map, array);
2642 ret = 0;
2644 gomp_mutex_unlock (&devicep->lock);
2645 return ret;
2649 omp_target_disassociate_ptr (const void *ptr, int device_num)
2651 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652 return EINVAL;
2654 if (device_num < 0)
2655 return EINVAL;
2657 struct gomp_device_descr *devicep = resolve_device (device_num);
2658 if (devicep == NULL)
2659 return EINVAL;
2661 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2662 return EINVAL;
2664 gomp_mutex_lock (&devicep->lock);
2666 struct splay_tree_s *mem_map = &devicep->mem_map;
2667 struct splay_tree_key_s cur_node;
2668 int ret = EINVAL;
2670 cur_node.host_start = (uintptr_t) ptr;
2671 cur_node.host_end = cur_node.host_start;
2672 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2673 if (n
2674 && n->host_start == cur_node.host_start
2675 && n->refcount == REFCOUNT_INFINITY
2676 && n->tgt->tgt_start == 0
2677 && n->tgt->to_free == NULL
2678 && n->tgt->refcount == 1
2679 && n->tgt->list_count == 0)
2681 splay_tree_remove (&devicep->mem_map, n);
2682 gomp_unmap_tgt (n->tgt);
2683 ret = 0;
2686 gomp_mutex_unlock (&devicep->lock);
2687 return ret;
2691 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2693 (void) kind;
2694 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2695 return gomp_pause_host ();
2696 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2697 return -1;
2698 /* Do nothing for target devices for now. */
2699 return 0;
2703 omp_pause_resource_all (omp_pause_resource_t kind)
2705 (void) kind;
2706 if (gomp_pause_host ())
2707 return -1;
2708 /* Do nothing for target devices for now. */
2709 return 0;
2712 ialias (omp_pause_resource)
2713 ialias (omp_pause_resource_all)
2715 #ifdef PLUGIN_SUPPORT
2717 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2718 in PLUGIN_NAME.
2719 The handles of the found functions are stored in the corresponding fields
2720 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2722 static bool
2723 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2724 const char *plugin_name)
2726 const char *err = NULL, *last_missing = NULL;
2728 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2729 if (!plugin_handle)
2730 goto dl_fail;
2732 /* Check if all required functions are available in the plugin and store
2733 their handlers. None of the symbols can legitimately be NULL,
2734 so we don't need to check dlerror all the time. */
2735 #define DLSYM(f) \
2736 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2737 goto dl_fail
2738 /* Similar, but missing functions are not an error. Return false if
2739 failed, true otherwise. */
2740 #define DLSYM_OPT(f, n) \
2741 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2742 || (last_missing = #n, 0))
2744 DLSYM (version);
2745 if (device->version_func () != GOMP_VERSION)
2747 err = "plugin version mismatch";
2748 goto fail;
2751 DLSYM (get_name);
2752 DLSYM (get_caps);
2753 DLSYM (get_type);
2754 DLSYM (get_num_devices);
2755 DLSYM (init_device);
2756 DLSYM (fini_device);
2757 DLSYM (load_image);
2758 DLSYM (unload_image);
2759 DLSYM (alloc);
2760 DLSYM (free);
2761 DLSYM (dev2host);
2762 DLSYM (host2dev);
2763 device->capabilities = device->get_caps_func ();
2764 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2766 DLSYM (run);
2767 DLSYM (async_run);
2768 DLSYM_OPT (can_run, can_run);
2769 DLSYM (dev2dev);
2771 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2773 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2774 || !DLSYM_OPT (openacc.create_thread_data,
2775 openacc_create_thread_data)
2776 || !DLSYM_OPT (openacc.destroy_thread_data,
2777 openacc_destroy_thread_data)
2778 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2779 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2780 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2781 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2782 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2783 || !DLSYM_OPT (openacc.async.queue_callback,
2784 openacc_async_queue_callback)
2785 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2786 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2787 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2789 /* Require all the OpenACC handlers if we have
2790 GOMP_OFFLOAD_CAP_OPENACC_200. */
2791 err = "plugin missing OpenACC handler function";
2792 goto fail;
2795 unsigned cuda = 0;
2796 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2797 openacc_cuda_get_current_device);
2798 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2799 openacc_cuda_get_current_context);
2800 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2801 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2802 if (cuda && cuda != 4)
2804 /* Make sure all the CUDA functions are there if any of them are. */
2805 err = "plugin missing OpenACC CUDA handler function";
2806 goto fail;
2809 #undef DLSYM
2810 #undef DLSYM_OPT
2812 return 1;
2814 dl_fail:
2815 err = dlerror ();
2816 fail:
2817 gomp_error ("while loading %s: %s", plugin_name, err);
2818 if (last_missing)
2819 gomp_error ("missing function was %s", last_missing);
2820 if (plugin_handle)
2821 dlclose (plugin_handle);
2823 return 0;
2826 /* This function finalizes all initialized devices. */
2828 static void
2829 gomp_target_fini (void)
2831 int i;
2832 for (i = 0; i < num_devices; i++)
2834 bool ret = true;
2835 struct gomp_device_descr *devicep = &devices[i];
2836 gomp_mutex_lock (&devicep->lock);
2837 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2838 ret = gomp_fini_device (devicep);
2839 gomp_mutex_unlock (&devicep->lock);
2840 if (!ret)
2841 gomp_fatal ("device finalization failed");
2845 /* This function initializes the runtime for offloading.
2846 It parses the list of offload plugins, and tries to load these.
2847 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2848 will be set, and the array DEVICES initialized, containing descriptors for
2849 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2850 by the others. */
2852 static void
2853 gomp_target_init (void)
2855 const char *prefix ="libgomp-plugin-";
2856 const char *suffix = SONAME_SUFFIX (1);
2857 const char *cur, *next;
2858 char *plugin_name;
2859 int i, new_num_devices;
2861 num_devices = 0;
2862 devices = NULL;
2864 cur = OFFLOAD_PLUGINS;
2865 if (*cur)
2868 struct gomp_device_descr current_device;
2869 size_t prefix_len, suffix_len, cur_len;
2871 next = strchr (cur, ',');
2873 prefix_len = strlen (prefix);
2874 cur_len = next ? next - cur : strlen (cur);
2875 suffix_len = strlen (suffix);
2877 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2878 if (!plugin_name)
2880 num_devices = 0;
2881 break;
2884 memcpy (plugin_name, prefix, prefix_len);
2885 memcpy (plugin_name + prefix_len, cur, cur_len);
2886 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2888 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2890 new_num_devices = current_device.get_num_devices_func ();
2891 if (new_num_devices >= 1)
2893 /* Augment DEVICES and NUM_DEVICES. */
2895 devices = realloc (devices, (num_devices + new_num_devices)
2896 * sizeof (struct gomp_device_descr));
2897 if (!devices)
2899 num_devices = 0;
2900 free (plugin_name);
2901 break;
2904 current_device.name = current_device.get_name_func ();
2905 /* current_device.capabilities has already been set. */
2906 current_device.type = current_device.get_type_func ();
2907 current_device.mem_map.root = NULL;
2908 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2909 current_device.openacc.data_environ = NULL;
2910 for (i = 0; i < new_num_devices; i++)
2912 current_device.target_id = i;
2913 devices[num_devices] = current_device;
2914 gomp_mutex_init (&devices[num_devices].lock);
2915 num_devices++;
2920 free (plugin_name);
2921 cur = next + 1;
2923 while (next);
2925 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2926 NUM_DEVICES_OPENMP. */
2927 struct gomp_device_descr *devices_s
2928 = malloc (num_devices * sizeof (struct gomp_device_descr));
2929 if (!devices_s)
2931 num_devices = 0;
2932 free (devices);
2933 devices = NULL;
2935 num_devices_openmp = 0;
2936 for (i = 0; i < num_devices; i++)
2937 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2938 devices_s[num_devices_openmp++] = devices[i];
2939 int num_devices_after_openmp = num_devices_openmp;
2940 for (i = 0; i < num_devices; i++)
2941 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2942 devices_s[num_devices_after_openmp++] = devices[i];
2943 free (devices);
2944 devices = devices_s;
2946 for (i = 0; i < num_devices; i++)
2948 /* The 'devices' array can be moved (by the realloc call) until we have
2949 found all the plugins, so registering with the OpenACC runtime (which
2950 takes a copy of the pointer argument) must be delayed until now. */
2951 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2952 goacc_register (&devices[i]);
2955 if (atexit (gomp_target_fini) != 0)
2956 gomp_fatal ("atexit failed");
2959 #else /* PLUGIN_SUPPORT */
2960 /* If dlfcn.h is unavailable we always fallback to host execution.
2961 GOMP_target* routines are just stubs for this case. */
2962 static void
2963 gomp_target_init (void)
2966 #endif /* PLUGIN_SUPPORT */