Fix typo in my last commit's ChangeLog entry
[official-gcc.git] / libgomp / target.c
blob1c9ca68ba104c39461f0d603ec375e3e90252dd6
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 unsigned version;
60 enum offload_target_type type;
61 const void *host_table;
62 const void *target_data;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr *offload_images;
68 /* Total number of offload images. */
69 static int num_offload_images;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr *devices;
74 /* Total number of available devices. */
75 static int num_devices;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 static void *
83 gomp_realloc_unlock (void *old, size_t size)
85 void *ret = realloc (old, size);
86 if (ret == NULL)
88 gomp_mutex_unlock (&register_lock);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 return ret;
94 attribute_hidden void
95 gomp_init_targets_once (void)
97 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 attribute_hidden int
101 gomp_get_num_devices (void)
103 gomp_init_targets_once ();
104 return num_devices_openmp;
107 static struct gomp_device_descr *
108 resolve_device (int device_id)
110 if (device_id == GOMP_DEVICE_ICV)
112 struct gomp_task_icv *icv = gomp_icv (false);
113 device_id = icv->default_device_var;
116 if (device_id < 0 || device_id >= gomp_get_num_devices ())
117 return NULL;
119 gomp_mutex_lock (&devices[device_id].lock);
120 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
121 gomp_init_device (&devices[device_id]);
122 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 gomp_mutex_unlock (&devices[device_id].lock);
125 return NULL;
127 gomp_mutex_unlock (&devices[device_id].lock);
129 return &devices[device_id];
133 static inline splay_tree_key
134 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 if (key->host_start != key->host_end)
137 return splay_tree_lookup (mem_map, key);
139 key->host_end++;
140 splay_tree_key n = splay_tree_lookup (mem_map, key);
141 key->host_end--;
142 if (n)
143 return n;
144 key->host_start--;
145 n = splay_tree_lookup (mem_map, key);
146 key->host_start++;
147 if (n)
148 return n;
149 return splay_tree_lookup (mem_map, key);
152 static inline splay_tree_key
153 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 if (key->host_start != key->host_end)
156 return splay_tree_lookup (mem_map, key);
158 key->host_end++;
159 splay_tree_key n = splay_tree_lookup (mem_map, key);
160 key->host_end--;
161 return n;
164 static inline void
165 gomp_device_copy (struct gomp_device_descr *devicep,
166 bool (*copy_func) (int, void *, const void *, size_t),
167 const char *dst, void *dstaddr,
168 const char *src, const void *srcaddr,
169 size_t size)
171 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 gomp_mutex_unlock (&devicep->lock);
174 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
175 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
179 static inline void
180 goacc_device_copy_async (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t,
182 struct goacc_asyncqueue *),
183 const char *dst, void *dstaddr,
184 const char *src, const void *srcaddr,
185 size_t size, struct goacc_asyncqueue *aq)
187 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
189 gomp_mutex_unlock (&devicep->lock);
190 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
191 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
195 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
196 host to device memory transfers. */
198 struct gomp_coalesce_chunk
200 /* The starting and ending point of a coalesced chunk of memory. */
201 size_t start, end;
204 struct gomp_coalesce_buf
206 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
207 it will be copied to the device. */
208 void *buf;
209 struct target_mem_desc *tgt;
210 /* Array with offsets, chunks[i].start is the starting offset and
211 chunks[i].end ending offset relative to tgt->tgt_start device address
212 of chunks which are to be copied to buf and later copied to device. */
213 struct gomp_coalesce_chunk *chunks;
214 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
215 be performed. */
216 long chunk_cnt;
217 /* During construction of chunks array, how many memory regions are within
218 the last chunk. If there is just one memory region for a chunk, we copy
219 it directly to device rather than going through buf. */
220 long use_cnt;
223 /* Maximum size of memory region considered for coalescing. Larger copies
224 are performed directly. */
225 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
227 /* Maximum size of a gap in between regions to consider them being copied
228 within the same chunk. All the device offsets considered are within
229 newly allocated device memory, so it isn't fatal if we copy some padding
230 in between from host to device. The gaps come either from alignment
231 padding or from memory regions which are not supposed to be copied from
232 host to device (e.g. map(alloc:), map(from:) etc.). */
233 #define MAX_COALESCE_BUF_GAP (4 * 1024)
235 /* Add region with device tgt_start relative offset and length to CBUF. */
237 static inline void
238 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
240 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
241 return;
242 if (cbuf->chunk_cnt)
244 if (cbuf->chunk_cnt < 0)
245 return;
246 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
248 cbuf->chunk_cnt = -1;
249 return;
251 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
253 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
254 cbuf->use_cnt++;
255 return;
257 /* If the last chunk is only used by one mapping, discard it,
258 as it will be one host to device copy anyway and
259 memcpying it around will only waste cycles. */
260 if (cbuf->use_cnt == 1)
261 cbuf->chunk_cnt--;
263 cbuf->chunks[cbuf->chunk_cnt].start = start;
264 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
265 cbuf->chunk_cnt++;
266 cbuf->use_cnt = 1;
269 /* Return true for mapping kinds which need to copy data from the
270 host to device for regions that weren't previously mapped. */
272 static inline bool
273 gomp_to_device_kind_p (int kind)
275 switch (kind)
277 case GOMP_MAP_ALLOC:
278 case GOMP_MAP_FROM:
279 case GOMP_MAP_FORCE_ALLOC:
280 case GOMP_MAP_ALWAYS_FROM:
281 return false;
282 default:
283 return true;
287 attribute_hidden void
288 gomp_copy_host2dev (struct gomp_device_descr *devicep,
289 struct goacc_asyncqueue *aq,
290 void *d, const void *h, size_t sz,
291 struct gomp_coalesce_buf *cbuf)
293 if (cbuf)
295 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
296 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
298 long first = 0;
299 long last = cbuf->chunk_cnt - 1;
300 while (first <= last)
302 long middle = (first + last) >> 1;
303 if (cbuf->chunks[middle].end <= doff)
304 first = middle + 1;
305 else if (cbuf->chunks[middle].start <= doff)
307 if (doff + sz > cbuf->chunks[middle].end)
308 gomp_fatal ("internal libgomp cbuf error");
309 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
310 h, sz);
311 return;
313 else
314 last = middle - 1;
318 if (__builtin_expect (aq != NULL, 0))
319 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
320 "dev", d, "host", h, sz, aq);
321 else
322 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
325 attribute_hidden void
326 gomp_copy_dev2host (struct gomp_device_descr *devicep,
327 struct goacc_asyncqueue *aq,
328 void *h, const void *d, size_t sz)
330 if (__builtin_expect (aq != NULL, 0))
331 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
332 "host", h, "dev", d, sz, aq);
333 else
334 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
337 static void
338 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
340 if (!devicep->free_func (devicep->target_id, devptr))
342 gomp_mutex_unlock (&devicep->lock);
343 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
348 gomp_map_0len_lookup found oldn for newn.
349 Helper function of gomp_map_vars. */
351 static inline void
352 gomp_map_vars_existing (struct gomp_device_descr *devicep,
353 struct goacc_asyncqueue *aq, splay_tree_key oldn,
354 splay_tree_key newn, struct target_var_desc *tgt_var,
355 unsigned char kind, struct gomp_coalesce_buf *cbuf)
357 tgt_var->key = oldn;
358 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
359 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
360 tgt_var->offset = newn->host_start - oldn->host_start;
361 tgt_var->length = newn->host_end - newn->host_start;
363 if ((kind & GOMP_MAP_FLAG_FORCE)
364 || oldn->host_start > newn->host_start
365 || oldn->host_end < newn->host_end)
367 gomp_mutex_unlock (&devicep->lock);
368 gomp_fatal ("Trying to map into device [%p..%p) object when "
369 "[%p..%p) is already mapped",
370 (void *) newn->host_start, (void *) newn->host_end,
371 (void *) oldn->host_start, (void *) oldn->host_end);
374 if (GOMP_MAP_ALWAYS_TO_P (kind))
375 gomp_copy_host2dev (devicep, aq,
376 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
377 + newn->host_start - oldn->host_start),
378 (void *) newn->host_start,
379 newn->host_end - newn->host_start, cbuf);
381 if (oldn->refcount != REFCOUNT_INFINITY)
382 oldn->refcount++;
385 static int
386 get_kind (bool short_mapkind, void *kinds, int idx)
388 return short_mapkind ? ((unsigned short *) kinds)[idx]
389 : ((unsigned char *) kinds)[idx];
392 static void
393 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
394 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
395 struct gomp_coalesce_buf *cbuf)
397 struct gomp_device_descr *devicep = tgt->device_descr;
398 struct splay_tree_s *mem_map = &devicep->mem_map;
399 struct splay_tree_key_s cur_node;
401 cur_node.host_start = host_ptr;
402 if (cur_node.host_start == (uintptr_t) NULL)
404 cur_node.tgt_offset = (uintptr_t) NULL;
405 gomp_copy_host2dev (devicep, aq,
406 (void *) (tgt->tgt_start + target_offset),
407 (void *) &cur_node.tgt_offset,
408 sizeof (void *), cbuf);
409 return;
411 /* Add bias to the pointer value. */
412 cur_node.host_start += bias;
413 cur_node.host_end = cur_node.host_start;
414 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
415 if (n == NULL)
417 gomp_mutex_unlock (&devicep->lock);
418 gomp_fatal ("Pointer target of array section wasn't mapped");
420 cur_node.host_start -= n->host_start;
421 cur_node.tgt_offset
422 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
423 /* At this point tgt_offset is target address of the
424 array section. Now subtract bias to get what we want
425 to initialize the pointer with. */
426 cur_node.tgt_offset -= bias;
427 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
428 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
431 static void
432 gomp_map_fields_existing (struct target_mem_desc *tgt,
433 struct goacc_asyncqueue *aq, splay_tree_key n,
434 size_t first, size_t i, void **hostaddrs,
435 size_t *sizes, void *kinds,
436 struct gomp_coalesce_buf *cbuf)
438 struct gomp_device_descr *devicep = tgt->device_descr;
439 struct splay_tree_s *mem_map = &devicep->mem_map;
440 struct splay_tree_key_s cur_node;
441 int kind;
442 const bool short_mapkind = true;
443 const int typemask = short_mapkind ? 0xff : 0x7;
445 cur_node.host_start = (uintptr_t) hostaddrs[i];
446 cur_node.host_end = cur_node.host_start + sizes[i];
447 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
448 kind = get_kind (short_mapkind, kinds, i);
449 if (n2
450 && n2->tgt == n->tgt
451 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
453 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
454 &tgt->list[i], kind & typemask, cbuf);
455 return;
457 if (sizes[i] == 0)
459 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
461 cur_node.host_start--;
462 n2 = splay_tree_lookup (mem_map, &cur_node);
463 cur_node.host_start++;
464 if (n2
465 && n2->tgt == n->tgt
466 && n2->host_start - n->host_start
467 == n2->tgt_offset - n->tgt_offset)
469 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
470 &tgt->list[i], kind & typemask, cbuf);
471 return;
474 cur_node.host_end++;
475 n2 = splay_tree_lookup (mem_map, &cur_node);
476 cur_node.host_end--;
477 if (n2
478 && n2->tgt == n->tgt
479 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
481 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
482 kind & typemask, cbuf);
483 return;
486 gomp_mutex_unlock (&devicep->lock);
487 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
488 "other mapped elements from the same structure weren't mapped "
489 "together with it", (void *) cur_node.host_start,
490 (void *) cur_node.host_end);
493 static inline uintptr_t
494 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
496 if (tgt->list[i].key != NULL)
497 return tgt->list[i].key->tgt->tgt_start
498 + tgt->list[i].key->tgt_offset
499 + tgt->list[i].offset;
500 if (tgt->list[i].offset == ~(uintptr_t) 0)
501 return (uintptr_t) hostaddrs[i];
502 if (tgt->list[i].offset == ~(uintptr_t) 1)
503 return 0;
504 if (tgt->list[i].offset == ~(uintptr_t) 2)
505 return tgt->list[i + 1].key->tgt->tgt_start
506 + tgt->list[i + 1].key->tgt_offset
507 + tgt->list[i + 1].offset
508 + (uintptr_t) hostaddrs[i]
509 - (uintptr_t) hostaddrs[i + 1];
510 return tgt->tgt_start + tgt->list[i].offset;
513 static inline __attribute__((always_inline)) struct target_mem_desc *
514 gomp_map_vars_internal (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, size_t mapnum,
516 void **hostaddrs, void **devaddrs, size_t *sizes,
517 void *kinds, bool short_mapkind,
518 enum gomp_map_vars_kind pragma_kind)
520 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
521 bool has_firstprivate = false;
522 const int rshift = short_mapkind ? 8 : 3;
523 const int typemask = short_mapkind ? 0xff : 0x7;
524 struct splay_tree_s *mem_map = &devicep->mem_map;
525 struct splay_tree_key_s cur_node;
526 struct target_mem_desc *tgt
527 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
528 tgt->list_count = mapnum;
529 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
530 tgt->device_descr = devicep;
531 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
533 if (mapnum == 0)
535 tgt->tgt_start = 0;
536 tgt->tgt_end = 0;
537 return tgt;
540 tgt_align = sizeof (void *);
541 tgt_size = 0;
542 cbuf.chunks = NULL;
543 cbuf.chunk_cnt = -1;
544 cbuf.use_cnt = 0;
545 cbuf.buf = NULL;
546 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
548 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
549 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
550 cbuf.chunk_cnt = 0;
552 if (pragma_kind == GOMP_MAP_VARS_TARGET)
554 size_t align = 4 * sizeof (void *);
555 tgt_align = align;
556 tgt_size = mapnum * sizeof (void *);
557 cbuf.chunk_cnt = 1;
558 cbuf.use_cnt = 1 + (mapnum > 1);
559 cbuf.chunks[0].start = 0;
560 cbuf.chunks[0].end = tgt_size;
563 gomp_mutex_lock (&devicep->lock);
564 if (devicep->state == GOMP_DEVICE_FINALIZED)
566 gomp_mutex_unlock (&devicep->lock);
567 free (tgt);
568 return NULL;
571 for (i = 0; i < mapnum; i++)
573 int kind = get_kind (short_mapkind, kinds, i);
574 if (hostaddrs[i] == NULL
575 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
577 tgt->list[i].key = NULL;
578 tgt->list[i].offset = ~(uintptr_t) 0;
579 continue;
581 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
583 tgt->list[i].key = NULL;
584 if (!not_found_cnt)
587 else
588 tgt->list[i].offset = 0;
589 continue;
591 else if ((kind & typemask) == GOMP_MAP_STRUCT)
593 size_t first = i + 1;
594 size_t last = i + sizes[i];
595 cur_node.host_start = (uintptr_t) hostaddrs[i];
596 cur_node.host_end = (uintptr_t) hostaddrs[last]
597 + sizes[last];
598 tgt->list[i].key = NULL;
599 tgt->list[i].offset = ~(uintptr_t) 2;
600 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
601 if (n == NULL)
603 size_t align = (size_t) 1 << (kind >> rshift);
604 if (tgt_align < align)
605 tgt_align = align;
606 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
607 tgt_size = (tgt_size + align - 1) & ~(align - 1);
608 tgt_size += cur_node.host_end - cur_node.host_start;
609 not_found_cnt += last - i;
610 for (i = first; i <= last; i++)
612 tgt->list[i].key = NULL;
613 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
614 & typemask))
615 gomp_coalesce_buf_add (&cbuf,
616 tgt_size - cur_node.host_end
617 + (uintptr_t) hostaddrs[i],
618 sizes[i]);
620 i--;
621 continue;
623 for (i = first; i <= last; i++)
624 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
625 sizes, kinds, NULL);
626 i--;
627 continue;
629 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
631 tgt->list[i].key = NULL;
632 tgt->list[i].offset = ~(uintptr_t) 1;
633 has_firstprivate = true;
634 continue;
636 cur_node.host_start = (uintptr_t) hostaddrs[i];
637 if (!GOMP_MAP_POINTER_P (kind & typemask))
638 cur_node.host_end = cur_node.host_start + sizes[i];
639 else
640 cur_node.host_end = cur_node.host_start + sizeof (void *);
641 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
643 tgt->list[i].key = NULL;
645 size_t align = (size_t) 1 << (kind >> rshift);
646 if (tgt_align < align)
647 tgt_align = align;
648 tgt_size = (tgt_size + align - 1) & ~(align - 1);
649 gomp_coalesce_buf_add (&cbuf, tgt_size,
650 cur_node.host_end - cur_node.host_start);
651 tgt_size += cur_node.host_end - cur_node.host_start;
652 has_firstprivate = true;
653 continue;
655 splay_tree_key n;
656 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
658 n = gomp_map_0len_lookup (mem_map, &cur_node);
659 if (!n)
661 tgt->list[i].key = NULL;
662 tgt->list[i].offset = ~(uintptr_t) 1;
663 continue;
666 else
667 n = splay_tree_lookup (mem_map, &cur_node);
668 if (n && n->refcount != REFCOUNT_LINK)
669 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
670 kind & typemask, NULL);
671 else
673 tgt->list[i].key = NULL;
675 size_t align = (size_t) 1 << (kind >> rshift);
676 not_found_cnt++;
677 if (tgt_align < align)
678 tgt_align = align;
679 tgt_size = (tgt_size + align - 1) & ~(align - 1);
680 if (gomp_to_device_kind_p (kind & typemask))
681 gomp_coalesce_buf_add (&cbuf, tgt_size,
682 cur_node.host_end - cur_node.host_start);
683 tgt_size += cur_node.host_end - cur_node.host_start;
684 if ((kind & typemask) == GOMP_MAP_TO_PSET)
686 size_t j;
687 for (j = i + 1; j < mapnum; j++)
688 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
689 & typemask))
690 break;
691 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
692 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
693 > cur_node.host_end))
694 break;
695 else
697 tgt->list[j].key = NULL;
698 i++;
704 if (devaddrs)
706 if (mapnum != 1)
708 gomp_mutex_unlock (&devicep->lock);
709 gomp_fatal ("unexpected aggregation");
711 tgt->to_free = devaddrs[0];
712 tgt->tgt_start = (uintptr_t) tgt->to_free;
713 tgt->tgt_end = tgt->tgt_start + sizes[0];
715 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
717 /* Allocate tgt_align aligned tgt_size block of memory. */
718 /* FIXME: Perhaps change interface to allocate properly aligned
719 memory. */
720 tgt->to_free = devicep->alloc_func (devicep->target_id,
721 tgt_size + tgt_align - 1);
722 if (!tgt->to_free)
724 gomp_mutex_unlock (&devicep->lock);
725 gomp_fatal ("device memory allocation fail");
728 tgt->tgt_start = (uintptr_t) tgt->to_free;
729 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
730 tgt->tgt_end = tgt->tgt_start + tgt_size;
732 if (cbuf.use_cnt == 1)
733 cbuf.chunk_cnt--;
734 if (cbuf.chunk_cnt > 0)
736 cbuf.buf
737 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
738 if (cbuf.buf)
740 cbuf.tgt = tgt;
741 cbufp = &cbuf;
745 else
747 tgt->to_free = NULL;
748 tgt->tgt_start = 0;
749 tgt->tgt_end = 0;
752 tgt_size = 0;
753 if (pragma_kind == GOMP_MAP_VARS_TARGET)
754 tgt_size = mapnum * sizeof (void *);
756 tgt->array = NULL;
757 if (not_found_cnt || has_firstprivate)
759 if (not_found_cnt)
760 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
761 splay_tree_node array = tgt->array;
762 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
763 uintptr_t field_tgt_base = 0;
765 for (i = 0; i < mapnum; i++)
766 if (tgt->list[i].key == NULL)
768 int kind = get_kind (short_mapkind, kinds, i);
769 if (hostaddrs[i] == NULL)
770 continue;
771 switch (kind & typemask)
773 size_t align, len, first, last;
774 splay_tree_key n;
775 case GOMP_MAP_FIRSTPRIVATE:
776 align = (size_t) 1 << (kind >> rshift);
777 tgt_size = (tgt_size + align - 1) & ~(align - 1);
778 tgt->list[i].offset = tgt_size;
779 len = sizes[i];
780 gomp_copy_host2dev (devicep, aq,
781 (void *) (tgt->tgt_start + tgt_size),
782 (void *) hostaddrs[i], len, cbufp);
783 tgt_size += len;
784 continue;
785 case GOMP_MAP_FIRSTPRIVATE_INT:
786 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
787 continue;
788 case GOMP_MAP_USE_DEVICE_PTR:
789 if (tgt->list[i].offset == 0)
791 cur_node.host_start = (uintptr_t) hostaddrs[i];
792 cur_node.host_end = cur_node.host_start;
793 n = gomp_map_lookup (mem_map, &cur_node);
794 if (n == NULL)
796 gomp_mutex_unlock (&devicep->lock);
797 gomp_fatal ("use_device_ptr pointer wasn't mapped");
799 cur_node.host_start -= n->host_start;
800 hostaddrs[i]
801 = (void *) (n->tgt->tgt_start + n->tgt_offset
802 + cur_node.host_start);
803 tgt->list[i].offset = ~(uintptr_t) 0;
805 continue;
806 case GOMP_MAP_STRUCT:
807 first = i + 1;
808 last = i + sizes[i];
809 cur_node.host_start = (uintptr_t) hostaddrs[i];
810 cur_node.host_end = (uintptr_t) hostaddrs[last]
811 + sizes[last];
812 if (tgt->list[first].key != NULL)
813 continue;
814 n = splay_tree_lookup (mem_map, &cur_node);
815 if (n == NULL)
817 size_t align = (size_t) 1 << (kind >> rshift);
818 tgt_size -= (uintptr_t) hostaddrs[first]
819 - (uintptr_t) hostaddrs[i];
820 tgt_size = (tgt_size + align - 1) & ~(align - 1);
821 tgt_size += (uintptr_t) hostaddrs[first]
822 - (uintptr_t) hostaddrs[i];
823 field_tgt_base = (uintptr_t) hostaddrs[first];
824 field_tgt_offset = tgt_size;
825 field_tgt_clear = last;
826 tgt_size += cur_node.host_end
827 - (uintptr_t) hostaddrs[first];
828 continue;
830 for (i = first; i <= last; i++)
831 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
832 sizes, kinds, cbufp);
833 i--;
834 continue;
835 case GOMP_MAP_ALWAYS_POINTER:
836 cur_node.host_start = (uintptr_t) hostaddrs[i];
837 cur_node.host_end = cur_node.host_start + sizeof (void *);
838 n = splay_tree_lookup (mem_map, &cur_node);
839 if (n == NULL
840 || n->host_start > cur_node.host_start
841 || n->host_end < cur_node.host_end)
843 gomp_mutex_unlock (&devicep->lock);
844 gomp_fatal ("always pointer not mapped");
846 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
847 != GOMP_MAP_ALWAYS_POINTER)
848 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
849 if (cur_node.tgt_offset)
850 cur_node.tgt_offset -= sizes[i];
851 gomp_copy_host2dev (devicep, aq,
852 (void *) (n->tgt->tgt_start
853 + n->tgt_offset
854 + cur_node.host_start
855 - n->host_start),
856 (void *) &cur_node.tgt_offset,
857 sizeof (void *), cbufp);
858 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
859 + cur_node.host_start - n->host_start;
860 continue;
861 default:
862 break;
864 splay_tree_key k = &array->key;
865 k->host_start = (uintptr_t) hostaddrs[i];
866 if (!GOMP_MAP_POINTER_P (kind & typemask))
867 k->host_end = k->host_start + sizes[i];
868 else
869 k->host_end = k->host_start + sizeof (void *);
870 splay_tree_key n = splay_tree_lookup (mem_map, k);
871 if (n && n->refcount != REFCOUNT_LINK)
872 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
873 kind & typemask, cbufp);
874 else
876 k->link_key = NULL;
877 if (n && n->refcount == REFCOUNT_LINK)
879 /* Replace target address of the pointer with target address
880 of mapped object in the splay tree. */
881 splay_tree_remove (mem_map, n);
882 k->link_key = n;
884 size_t align = (size_t) 1 << (kind >> rshift);
885 tgt->list[i].key = k;
886 k->tgt = tgt;
887 if (field_tgt_clear != ~(size_t) 0)
889 k->tgt_offset = k->host_start - field_tgt_base
890 + field_tgt_offset;
891 if (i == field_tgt_clear)
892 field_tgt_clear = ~(size_t) 0;
894 else
896 tgt_size = (tgt_size + align - 1) & ~(align - 1);
897 k->tgt_offset = tgt_size;
898 tgt_size += k->host_end - k->host_start;
900 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
901 tgt->list[i].always_copy_from
902 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
903 tgt->list[i].offset = 0;
904 tgt->list[i].length = k->host_end - k->host_start;
905 k->refcount = 1;
906 k->dynamic_refcount = 0;
907 tgt->refcount++;
908 array->left = NULL;
909 array->right = NULL;
910 splay_tree_insert (mem_map, array);
911 switch (kind & typemask)
913 case GOMP_MAP_ALLOC:
914 case GOMP_MAP_FROM:
915 case GOMP_MAP_FORCE_ALLOC:
916 case GOMP_MAP_FORCE_FROM:
917 case GOMP_MAP_ALWAYS_FROM:
918 break;
919 case GOMP_MAP_TO:
920 case GOMP_MAP_TOFROM:
921 case GOMP_MAP_FORCE_TO:
922 case GOMP_MAP_FORCE_TOFROM:
923 case GOMP_MAP_ALWAYS_TO:
924 case GOMP_MAP_ALWAYS_TOFROM:
925 gomp_copy_host2dev (devicep, aq,
926 (void *) (tgt->tgt_start
927 + k->tgt_offset),
928 (void *) k->host_start,
929 k->host_end - k->host_start, cbufp);
930 break;
931 case GOMP_MAP_POINTER:
932 gomp_map_pointer (tgt, aq,
933 (uintptr_t) *(void **) k->host_start,
934 k->tgt_offset, sizes[i], cbufp);
935 break;
936 case GOMP_MAP_TO_PSET:
937 gomp_copy_host2dev (devicep, aq,
938 (void *) (tgt->tgt_start
939 + k->tgt_offset),
940 (void *) k->host_start,
941 k->host_end - k->host_start, cbufp);
943 for (j = i + 1; j < mapnum; j++)
944 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
946 & typemask))
947 break;
948 else if ((uintptr_t) hostaddrs[j] < k->host_start
949 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
950 > k->host_end))
951 break;
952 else
954 tgt->list[j].key = k;
955 tgt->list[j].copy_from = false;
956 tgt->list[j].always_copy_from = false;
957 if (k->refcount != REFCOUNT_INFINITY)
958 k->refcount++;
959 gomp_map_pointer (tgt, aq,
960 (uintptr_t) *(void **) hostaddrs[j],
961 k->tgt_offset
962 + ((uintptr_t) hostaddrs[j]
963 - k->host_start),
964 sizes[j], cbufp);
965 i++;
967 break;
968 case GOMP_MAP_FORCE_PRESENT:
970 /* We already looked up the memory region above and it
971 was missing. */
972 size_t size = k->host_end - k->host_start;
973 gomp_mutex_unlock (&devicep->lock);
974 #ifdef HAVE_INTTYPES_H
975 gomp_fatal ("present clause: !acc_is_present (%p, "
976 "%"PRIu64" (0x%"PRIx64"))",
977 (void *) k->host_start,
978 (uint64_t) size, (uint64_t) size);
979 #else
980 gomp_fatal ("present clause: !acc_is_present (%p, "
981 "%lu (0x%lx))", (void *) k->host_start,
982 (unsigned long) size, (unsigned long) size);
983 #endif
985 break;
986 case GOMP_MAP_FORCE_DEVICEPTR:
987 assert (k->host_end - k->host_start == sizeof (void *));
988 gomp_copy_host2dev (devicep, aq,
989 (void *) (tgt->tgt_start
990 + k->tgt_offset),
991 (void *) k->host_start,
992 sizeof (void *), cbufp);
993 break;
994 default:
995 gomp_mutex_unlock (&devicep->lock);
996 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
997 kind);
1000 if (k->link_key)
1002 /* Set link pointer on target to the device address of the
1003 mapped object. */
1004 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1005 /* We intentionally do not use coalescing here, as it's not
1006 data allocated by the current call to this function. */
1007 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1008 &tgt_addr, sizeof (void *), NULL);
1010 array++;
1015 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1017 for (i = 0; i < mapnum; i++)
1019 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1020 gomp_copy_host2dev (devicep, aq,
1021 (void *) (tgt->tgt_start + i * sizeof (void *)),
1022 (void *) &cur_node.tgt_offset, sizeof (void *),
1023 cbufp);
1027 if (cbufp)
1029 long c = 0;
1030 for (c = 0; c < cbuf.chunk_cnt; ++c)
1031 gomp_copy_host2dev (devicep, aq,
1032 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1033 (char *) cbuf.buf + (cbuf.chunks[c].start
1034 - cbuf.chunks[0].start),
1035 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1036 free (cbuf.buf);
1037 cbuf.buf = NULL;
1038 cbufp = NULL;
1041 /* If the variable from "omp target enter data" map-list was already mapped,
1042 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1043 gomp_exit_data. */
1044 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1046 free (tgt);
1047 tgt = NULL;
1050 gomp_mutex_unlock (&devicep->lock);
1051 return tgt;
1054 attribute_hidden struct target_mem_desc *
1055 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1056 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1057 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1059 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1060 sizes, kinds, short_mapkind, pragma_kind);
1063 attribute_hidden struct target_mem_desc *
1064 gomp_map_vars_async (struct gomp_device_descr *devicep,
1065 struct goacc_asyncqueue *aq, size_t mapnum,
1066 void **hostaddrs, void **devaddrs, size_t *sizes,
1067 void *kinds, bool short_mapkind,
1068 enum gomp_map_vars_kind pragma_kind)
1070 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1071 sizes, kinds, short_mapkind, pragma_kind);
1074 attribute_hidden void
1075 gomp_unmap_tgt (struct target_mem_desc *tgt)
1077 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1078 if (tgt->tgt_end)
1079 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1081 free (tgt->array);
1082 free (tgt);
1085 attribute_hidden bool
1086 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1088 bool is_tgt_unmapped = false;
1089 splay_tree_remove (&devicep->mem_map, k);
1090 if (k->link_key)
1091 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1092 if (k->tgt->refcount > 1)
1093 k->tgt->refcount--;
1094 else
1096 is_tgt_unmapped = true;
1097 gomp_unmap_tgt (k->tgt);
1099 return is_tgt_unmapped;
1102 static void
1103 gomp_unref_tgt (void *ptr)
1105 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1107 if (tgt->refcount > 1)
1108 tgt->refcount--;
1109 else
1110 gomp_unmap_tgt (tgt);
1113 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1114 variables back from device to host: if it is false, it is assumed that this
1115 has been done already. */
1117 static inline __attribute__((always_inline)) void
1118 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1119 struct goacc_asyncqueue *aq)
1121 struct gomp_device_descr *devicep = tgt->device_descr;
1123 if (tgt->list_count == 0)
1125 free (tgt);
1126 return;
1129 gomp_mutex_lock (&devicep->lock);
1130 if (devicep->state == GOMP_DEVICE_FINALIZED)
1132 gomp_mutex_unlock (&devicep->lock);
1133 free (tgt->array);
1134 free (tgt);
1135 return;
1138 size_t i;
1139 for (i = 0; i < tgt->list_count; i++)
1141 splay_tree_key k = tgt->list[i].key;
1142 if (k == NULL)
1143 continue;
1145 bool do_unmap = false;
1146 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1147 k->refcount--;
1148 else if (k->refcount == 1)
1150 k->refcount--;
1151 do_unmap = true;
1154 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1155 || tgt->list[i].always_copy_from)
1156 gomp_copy_dev2host (devicep, aq,
1157 (void *) (k->host_start + tgt->list[i].offset),
1158 (void *) (k->tgt->tgt_start + k->tgt_offset
1159 + tgt->list[i].offset),
1160 tgt->list[i].length);
1161 if (do_unmap)
1162 gomp_remove_var (devicep, k);
1165 if (aq)
1166 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1167 (void *) tgt);
1168 else
1169 gomp_unref_tgt ((void *) tgt);
1171 gomp_mutex_unlock (&devicep->lock);
1174 attribute_hidden void
1175 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1177 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1180 attribute_hidden void
1181 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1182 struct goacc_asyncqueue *aq)
1184 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1187 static void
1188 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1189 size_t *sizes, void *kinds, bool short_mapkind)
1191 size_t i;
1192 struct splay_tree_key_s cur_node;
1193 const int typemask = short_mapkind ? 0xff : 0x7;
1195 if (!devicep)
1196 return;
1198 if (mapnum == 0)
1199 return;
1201 gomp_mutex_lock (&devicep->lock);
1202 if (devicep->state == GOMP_DEVICE_FINALIZED)
1204 gomp_mutex_unlock (&devicep->lock);
1205 return;
1208 for (i = 0; i < mapnum; i++)
1209 if (sizes[i])
1211 cur_node.host_start = (uintptr_t) hostaddrs[i];
1212 cur_node.host_end = cur_node.host_start + sizes[i];
1213 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1214 if (n)
1216 int kind = get_kind (short_mapkind, kinds, i);
1217 if (n->host_start > cur_node.host_start
1218 || n->host_end < cur_node.host_end)
1220 gomp_mutex_unlock (&devicep->lock);
1221 gomp_fatal ("Trying to update [%p..%p) object when "
1222 "only [%p..%p) is mapped",
1223 (void *) cur_node.host_start,
1224 (void *) cur_node.host_end,
1225 (void *) n->host_start,
1226 (void *) n->host_end);
1230 void *hostaddr = (void *) cur_node.host_start;
1231 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1232 + cur_node.host_start - n->host_start);
1233 size_t size = cur_node.host_end - cur_node.host_start;
1235 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1236 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1237 NULL);
1238 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1239 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1242 gomp_mutex_unlock (&devicep->lock);
1245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1246 And insert to splay tree the mapping between addresses from HOST_TABLE and
1247 from loaded target image. We rely in the host and device compiler
1248 emitting variable and functions in the same order. */
1250 static void
1251 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1252 const void *host_table, const void *target_data,
1253 bool is_register_lock)
1255 void **host_func_table = ((void ***) host_table)[0];
1256 void **host_funcs_end = ((void ***) host_table)[1];
1257 void **host_var_table = ((void ***) host_table)[2];
1258 void **host_vars_end = ((void ***) host_table)[3];
1260 /* The func table contains only addresses, the var table contains addresses
1261 and corresponding sizes. */
1262 int num_funcs = host_funcs_end - host_func_table;
1263 int num_vars = (host_vars_end - host_var_table) / 2;
1265 /* Load image to device and get target addresses for the image. */
1266 struct addr_pair *target_table = NULL;
1267 int i, num_target_entries;
1269 num_target_entries
1270 = devicep->load_image_func (devicep->target_id, version,
1271 target_data, &target_table);
1273 if (num_target_entries != num_funcs + num_vars)
1275 gomp_mutex_unlock (&devicep->lock);
1276 if (is_register_lock)
1277 gomp_mutex_unlock (&register_lock);
1278 gomp_fatal ("Cannot map target functions or variables"
1279 " (expected %u, have %u)", num_funcs + num_vars,
1280 num_target_entries);
1283 /* Insert host-target address mapping into splay tree. */
1284 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1285 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1286 tgt->refcount = REFCOUNT_INFINITY;
1287 tgt->tgt_start = 0;
1288 tgt->tgt_end = 0;
1289 tgt->to_free = NULL;
1290 tgt->prev = NULL;
1291 tgt->list_count = 0;
1292 tgt->device_descr = devicep;
1293 splay_tree_node array = tgt->array;
1295 for (i = 0; i < num_funcs; i++)
1297 splay_tree_key k = &array->key;
1298 k->host_start = (uintptr_t) host_func_table[i];
1299 k->host_end = k->host_start + 1;
1300 k->tgt = tgt;
1301 k->tgt_offset = target_table[i].start;
1302 k->refcount = REFCOUNT_INFINITY;
1303 k->link_key = NULL;
1304 array->left = NULL;
1305 array->right = NULL;
1306 splay_tree_insert (&devicep->mem_map, array);
1307 array++;
1310 /* Most significant bit of the size in host and target tables marks
1311 "omp declare target link" variables. */
1312 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1313 const uintptr_t size_mask = ~link_bit;
1315 for (i = 0; i < num_vars; i++)
1317 struct addr_pair *target_var = &target_table[num_funcs + i];
1318 uintptr_t target_size = target_var->end - target_var->start;
1320 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1322 gomp_mutex_unlock (&devicep->lock);
1323 if (is_register_lock)
1324 gomp_mutex_unlock (&register_lock);
1325 gomp_fatal ("Cannot map target variables (size mismatch)");
1328 splay_tree_key k = &array->key;
1329 k->host_start = (uintptr_t) host_var_table[i * 2];
1330 k->host_end
1331 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1332 k->tgt = tgt;
1333 k->tgt_offset = target_var->start;
1334 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1335 k->link_key = NULL;
1336 array->left = NULL;
1337 array->right = NULL;
1338 splay_tree_insert (&devicep->mem_map, array);
1339 array++;
1342 free (target_table);
1345 /* Unload the mappings described by target_data from device DEVICE_P.
1346 The device must be locked. */
1348 static void
1349 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1350 unsigned version,
1351 const void *host_table, const void *target_data)
1353 void **host_func_table = ((void ***) host_table)[0];
1354 void **host_funcs_end = ((void ***) host_table)[1];
1355 void **host_var_table = ((void ***) host_table)[2];
1356 void **host_vars_end = ((void ***) host_table)[3];
1358 /* The func table contains only addresses, the var table contains addresses
1359 and corresponding sizes. */
1360 int num_funcs = host_funcs_end - host_func_table;
1361 int num_vars = (host_vars_end - host_var_table) / 2;
1363 struct splay_tree_key_s k;
1364 splay_tree_key node = NULL;
1366 /* Find mapping at start of node array */
1367 if (num_funcs || num_vars)
1369 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1370 : (uintptr_t) host_var_table[0]);
1371 k.host_end = k.host_start + 1;
1372 node = splay_tree_lookup (&devicep->mem_map, &k);
1375 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1377 gomp_mutex_unlock (&devicep->lock);
1378 gomp_fatal ("image unload fail");
1381 /* Remove mappings from splay tree. */
1382 int i;
1383 for (i = 0; i < num_funcs; i++)
1385 k.host_start = (uintptr_t) host_func_table[i];
1386 k.host_end = k.host_start + 1;
1387 splay_tree_remove (&devicep->mem_map, &k);
1390 /* Most significant bit of the size in host and target tables marks
1391 "omp declare target link" variables. */
1392 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1393 const uintptr_t size_mask = ~link_bit;
1394 bool is_tgt_unmapped = false;
1396 for (i = 0; i < num_vars; i++)
1398 k.host_start = (uintptr_t) host_var_table[i * 2];
1399 k.host_end
1400 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1402 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1403 splay_tree_remove (&devicep->mem_map, &k);
1404 else
1406 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1407 is_tgt_unmapped = gomp_remove_var (devicep, n);
1411 if (node && !is_tgt_unmapped)
1413 free (node->tgt);
1414 free (node);
1418 /* This function should be called from every offload image while loading.
1419 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1420 the target, and TARGET_DATA needed by target plugin. */
1422 void
1423 GOMP_offload_register_ver (unsigned version, const void *host_table,
1424 int target_type, const void *target_data)
1426 int i;
1428 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1429 gomp_fatal ("Library too old for offload (version %u < %u)",
1430 GOMP_VERSION, GOMP_VERSION_LIB (version));
1432 gomp_mutex_lock (&register_lock);
1434 /* Load image to all initialized devices. */
1435 for (i = 0; i < num_devices; i++)
1437 struct gomp_device_descr *devicep = &devices[i];
1438 gomp_mutex_lock (&devicep->lock);
1439 if (devicep->type == target_type
1440 && devicep->state == GOMP_DEVICE_INITIALIZED)
1441 gomp_load_image_to_device (devicep, version,
1442 host_table, target_data, true);
1443 gomp_mutex_unlock (&devicep->lock);
1446 /* Insert image to array of pending images. */
1447 offload_images
1448 = gomp_realloc_unlock (offload_images,
1449 (num_offload_images + 1)
1450 * sizeof (struct offload_image_descr));
1451 offload_images[num_offload_images].version = version;
1452 offload_images[num_offload_images].type = target_type;
1453 offload_images[num_offload_images].host_table = host_table;
1454 offload_images[num_offload_images].target_data = target_data;
1456 num_offload_images++;
1457 gomp_mutex_unlock (&register_lock);
1460 void
1461 GOMP_offload_register (const void *host_table, int target_type,
1462 const void *target_data)
1464 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1467 /* This function should be called from every offload image while unloading.
1468 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1469 the target, and TARGET_DATA needed by target plugin. */
1471 void
1472 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1473 int target_type, const void *target_data)
1475 int i;
1477 gomp_mutex_lock (&register_lock);
1479 /* Unload image from all initialized devices. */
1480 for (i = 0; i < num_devices; i++)
1482 struct gomp_device_descr *devicep = &devices[i];
1483 gomp_mutex_lock (&devicep->lock);
1484 if (devicep->type == target_type
1485 && devicep->state == GOMP_DEVICE_INITIALIZED)
1486 gomp_unload_image_from_device (devicep, version,
1487 host_table, target_data);
1488 gomp_mutex_unlock (&devicep->lock);
1491 /* Remove image from array of pending images. */
1492 for (i = 0; i < num_offload_images; i++)
1493 if (offload_images[i].target_data == target_data)
1495 offload_images[i] = offload_images[--num_offload_images];
1496 break;
1499 gomp_mutex_unlock (&register_lock);
1502 void
1503 GOMP_offload_unregister (const void *host_table, int target_type,
1504 const void *target_data)
1506 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1509 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1510 must be locked on entry, and remains locked on return. */
1512 attribute_hidden void
1513 gomp_init_device (struct gomp_device_descr *devicep)
1515 int i;
1516 if (!devicep->init_device_func (devicep->target_id))
1518 gomp_mutex_unlock (&devicep->lock);
1519 gomp_fatal ("device initialization failed");
1522 /* Load to device all images registered by the moment. */
1523 for (i = 0; i < num_offload_images; i++)
1525 struct offload_image_descr *image = &offload_images[i];
1526 if (image->type == devicep->type)
1527 gomp_load_image_to_device (devicep, image->version,
1528 image->host_table, image->target_data,
1529 false);
1532 /* Initialize OpenACC asynchronous queues. */
1533 goacc_init_asyncqueues (devicep);
1535 devicep->state = GOMP_DEVICE_INITIALIZED;
1538 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1539 must be locked on entry, and remains locked on return. */
1541 attribute_hidden bool
1542 gomp_fini_device (struct gomp_device_descr *devicep)
1544 bool ret = goacc_fini_asyncqueues (devicep);
1545 ret &= devicep->fini_device_func (devicep->target_id);
1546 devicep->state = GOMP_DEVICE_FINALIZED;
1547 return ret;
1550 attribute_hidden void
1551 gomp_unload_device (struct gomp_device_descr *devicep)
1553 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1555 unsigned i;
1557 /* Unload from device all images registered at the moment. */
1558 for (i = 0; i < num_offload_images; i++)
1560 struct offload_image_descr *image = &offload_images[i];
1561 if (image->type == devicep->type)
1562 gomp_unload_image_from_device (devicep, image->version,
1563 image->host_table,
1564 image->target_data);
1569 /* Free address mapping tables. MM must be locked on entry, and remains locked
1570 on return. */
1572 attribute_hidden void
1573 gomp_free_memmap (struct splay_tree_s *mem_map)
1575 while (mem_map->root)
1577 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1579 splay_tree_remove (mem_map, &mem_map->root->key);
1580 free (tgt->array);
1581 free (tgt);
1585 /* Host fallback for GOMP_target{,_ext} routines. */
1587 static void
1588 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1590 struct gomp_thread old_thr, *thr = gomp_thread ();
1591 old_thr = *thr;
1592 memset (thr, '\0', sizeof (*thr));
1593 if (gomp_places_list)
1595 thr->place = old_thr.place;
1596 thr->ts.place_partition_len = gomp_places_list_len;
1598 fn (hostaddrs);
1599 gomp_free_thread (thr);
1600 *thr = old_thr;
1603 /* Calculate alignment and size requirements of a private copy of data shared
1604 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1606 static inline void
1607 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1608 unsigned short *kinds, size_t *tgt_align,
1609 size_t *tgt_size)
1611 size_t i;
1612 for (i = 0; i < mapnum; i++)
1613 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1615 size_t align = (size_t) 1 << (kinds[i] >> 8);
1616 if (*tgt_align < align)
1617 *tgt_align = align;
1618 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1619 *tgt_size += sizes[i];
1623 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1625 static inline void
1626 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1627 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1628 size_t tgt_size)
1630 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1631 if (al)
1632 tgt += tgt_align - al;
1633 tgt_size = 0;
1634 size_t i;
1635 for (i = 0; i < mapnum; i++)
1636 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1638 size_t align = (size_t) 1 << (kinds[i] >> 8);
1639 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1640 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1641 hostaddrs[i] = tgt + tgt_size;
1642 tgt_size = tgt_size + sizes[i];
1646 /* Helper function of GOMP_target{,_ext} routines. */
1648 static void *
1649 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1650 void (*host_fn) (void *))
1652 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1653 return (void *) host_fn;
1654 else
1656 gomp_mutex_lock (&devicep->lock);
1657 if (devicep->state == GOMP_DEVICE_FINALIZED)
1659 gomp_mutex_unlock (&devicep->lock);
1660 return NULL;
1663 struct splay_tree_key_s k;
1664 k.host_start = (uintptr_t) host_fn;
1665 k.host_end = k.host_start + 1;
1666 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1667 gomp_mutex_unlock (&devicep->lock);
1668 if (tgt_fn == NULL)
1669 return NULL;
1671 return (void *) tgt_fn->tgt_offset;
1675 /* Called when encountering a target directive. If DEVICE
1676 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1677 GOMP_DEVICE_HOST_FALLBACK (or any value
1678 larger than last available hw device), use host fallback.
1679 FN is address of host code, UNUSED is part of the current ABI, but
1680 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1681 with MAPNUM entries, with addresses of the host objects,
1682 sizes of the host objects (resp. for pointer kind pointer bias
1683 and assumed sizeof (void *) size) and kinds. */
1685 void
1686 GOMP_target (int device, void (*fn) (void *), const void *unused,
1687 size_t mapnum, void **hostaddrs, size_t *sizes,
1688 unsigned char *kinds)
1690 struct gomp_device_descr *devicep = resolve_device (device);
1692 void *fn_addr;
1693 if (devicep == NULL
1694 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1695 /* All shared memory devices should use the GOMP_target_ext function. */
1696 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1697 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1698 return gomp_target_fallback (fn, hostaddrs);
1700 struct target_mem_desc *tgt_vars
1701 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1702 GOMP_MAP_VARS_TARGET);
1703 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1704 NULL);
1705 gomp_unmap_vars (tgt_vars, true);
1708 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1709 and several arguments have been added:
1710 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1711 DEPEND is array of dependencies, see GOMP_task for details.
1713 ARGS is a pointer to an array consisting of a variable number of both
1714 device-independent and device-specific arguments, which can take one two
1715 elements where the first specifies for which device it is intended, the type
1716 and optionally also the value. If the value is not present in the first
1717 one, the whole second element the actual value. The last element of the
1718 array is a single NULL. Among the device independent can be for example
1719 NUM_TEAMS and THREAD_LIMIT.
1721 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1722 that value, or 1 if teams construct is not present, or 0, if
1723 teams construct does not have num_teams clause and so the choice is
1724 implementation defined, and -1 if it can't be determined on the host
1725 what value will GOMP_teams have on the device.
1726 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1727 body with that value, or 0, if teams construct does not have thread_limit
1728 clause or the teams construct is not present, or -1 if it can't be
1729 determined on the host what value will GOMP_teams have on the device. */
1731 void
1732 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1733 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1734 unsigned int flags, void **depend, void **args)
1736 struct gomp_device_descr *devicep = resolve_device (device);
1737 size_t tgt_align = 0, tgt_size = 0;
1738 bool fpc_done = false;
1740 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1742 struct gomp_thread *thr = gomp_thread ();
1743 /* Create a team if we don't have any around, as nowait
1744 target tasks make sense to run asynchronously even when
1745 outside of any parallel. */
1746 if (__builtin_expect (thr->ts.team == NULL, 0))
1748 struct gomp_team *team = gomp_new_team (1);
1749 struct gomp_task *task = thr->task;
1750 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1751 team->prev_ts = thr->ts;
1752 thr->ts.team = team;
1753 thr->ts.team_id = 0;
1754 thr->ts.work_share = &team->work_shares[0];
1755 thr->ts.last_work_share = NULL;
1756 #ifdef HAVE_SYNC_BUILTINS
1757 thr->ts.single_count = 0;
1758 #endif
1759 thr->ts.static_trip = 0;
1760 thr->task = &team->implicit_task[0];
1761 gomp_init_task (thr->task, NULL, icv);
1762 if (task)
1764 thr->task = task;
1765 gomp_end_task ();
1766 free (task);
1767 thr->task = &team->implicit_task[0];
1769 else
1770 pthread_setspecific (gomp_thread_destructor, thr);
1772 if (thr->ts.team
1773 && !thr->task->final_task)
1775 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1776 sizes, kinds, flags, depend, args,
1777 GOMP_TARGET_TASK_BEFORE_MAP);
1778 return;
1782 /* If there are depend clauses, but nowait is not present
1783 (or we are in a final task), block the parent task until the
1784 dependencies are resolved and then just continue with the rest
1785 of the function as if it is a merged task. */
1786 if (depend != NULL)
1788 struct gomp_thread *thr = gomp_thread ();
1789 if (thr->task && thr->task->depend_hash)
1791 /* If we might need to wait, copy firstprivate now. */
1792 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1793 &tgt_align, &tgt_size);
1794 if (tgt_align)
1796 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1797 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1798 tgt_align, tgt_size);
1800 fpc_done = true;
1801 gomp_task_maybe_wait_for_dependencies (depend);
1805 void *fn_addr;
1806 if (devicep == NULL
1807 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1808 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1809 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1811 if (!fpc_done)
1813 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1814 &tgt_align, &tgt_size);
1815 if (tgt_align)
1817 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1818 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1819 tgt_align, tgt_size);
1822 gomp_target_fallback (fn, hostaddrs);
1823 return;
1826 struct target_mem_desc *tgt_vars;
1827 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1829 if (!fpc_done)
1831 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1832 &tgt_align, &tgt_size);
1833 if (tgt_align)
1835 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1836 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1837 tgt_align, tgt_size);
1840 tgt_vars = NULL;
1842 else
1843 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1844 true, GOMP_MAP_VARS_TARGET);
1845 devicep->run_func (devicep->target_id, fn_addr,
1846 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1847 args);
1848 if (tgt_vars)
1849 gomp_unmap_vars (tgt_vars, true);
1852 /* Host fallback for GOMP_target_data{,_ext} routines. */
1854 static void
1855 gomp_target_data_fallback (void)
1857 struct gomp_task_icv *icv = gomp_icv (false);
1858 if (icv->target_data)
1860 /* Even when doing a host fallback, if there are any active
1861 #pragma omp target data constructs, need to remember the
1862 new #pragma omp target data, otherwise GOMP_target_end_data
1863 would get out of sync. */
1864 struct target_mem_desc *tgt
1865 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1866 GOMP_MAP_VARS_DATA);
1867 tgt->prev = icv->target_data;
1868 icv->target_data = tgt;
1872 void
1873 GOMP_target_data (int device, const void *unused, size_t mapnum,
1874 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1876 struct gomp_device_descr *devicep = resolve_device (device);
1878 if (devicep == NULL
1879 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1880 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1881 return gomp_target_data_fallback ();
1883 struct target_mem_desc *tgt
1884 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1885 GOMP_MAP_VARS_DATA);
1886 struct gomp_task_icv *icv = gomp_icv (true);
1887 tgt->prev = icv->target_data;
1888 icv->target_data = tgt;
1891 void
1892 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1893 size_t *sizes, unsigned short *kinds)
1895 struct gomp_device_descr *devicep = resolve_device (device);
1897 if (devicep == NULL
1898 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1899 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1900 return gomp_target_data_fallback ();
1902 struct target_mem_desc *tgt
1903 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1904 GOMP_MAP_VARS_DATA);
1905 struct gomp_task_icv *icv = gomp_icv (true);
1906 tgt->prev = icv->target_data;
1907 icv->target_data = tgt;
1910 void
1911 GOMP_target_end_data (void)
1913 struct gomp_task_icv *icv = gomp_icv (false);
1914 if (icv->target_data)
1916 struct target_mem_desc *tgt = icv->target_data;
1917 icv->target_data = tgt->prev;
1918 gomp_unmap_vars (tgt, true);
1922 void
1923 GOMP_target_update (int device, const void *unused, size_t mapnum,
1924 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1926 struct gomp_device_descr *devicep = resolve_device (device);
1928 if (devicep == NULL
1929 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1930 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1931 return;
1933 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1936 void
1937 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1938 size_t *sizes, unsigned short *kinds,
1939 unsigned int flags, void **depend)
1941 struct gomp_device_descr *devicep = resolve_device (device);
1943 /* If there are depend clauses, but nowait is not present,
1944 block the parent task until the dependencies are resolved
1945 and then just continue with the rest of the function as if it
1946 is a merged task. Until we are able to schedule task during
1947 variable mapping or unmapping, ignore nowait if depend clauses
1948 are not present. */
1949 if (depend != NULL)
1951 struct gomp_thread *thr = gomp_thread ();
1952 if (thr->task && thr->task->depend_hash)
1954 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1955 && thr->ts.team
1956 && !thr->task->final_task)
1958 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1959 mapnum, hostaddrs, sizes, kinds,
1960 flags | GOMP_TARGET_FLAG_UPDATE,
1961 depend, NULL, GOMP_TARGET_TASK_DATA))
1962 return;
1964 else
1966 struct gomp_team *team = thr->ts.team;
1967 /* If parallel or taskgroup has been cancelled, don't start new
1968 tasks. */
1969 if (__builtin_expect (gomp_cancel_var, 0) && team)
1971 if (gomp_team_barrier_cancelled (&team->barrier))
1972 return;
1973 if (thr->task->taskgroup)
1975 if (thr->task->taskgroup->cancelled)
1976 return;
1977 if (thr->task->taskgroup->workshare
1978 && thr->task->taskgroup->prev
1979 && thr->task->taskgroup->prev->cancelled)
1980 return;
1984 gomp_task_maybe_wait_for_dependencies (depend);
1989 if (devicep == NULL
1990 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1991 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1992 return;
1994 struct gomp_thread *thr = gomp_thread ();
1995 struct gomp_team *team = thr->ts.team;
1996 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1997 if (__builtin_expect (gomp_cancel_var, 0) && team)
1999 if (gomp_team_barrier_cancelled (&team->barrier))
2000 return;
2001 if (thr->task->taskgroup)
2003 if (thr->task->taskgroup->cancelled)
2004 return;
2005 if (thr->task->taskgroup->workshare
2006 && thr->task->taskgroup->prev
2007 && thr->task->taskgroup->prev->cancelled)
2008 return;
2012 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2015 static void
2016 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2017 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2019 const int typemask = 0xff;
2020 size_t i;
2021 gomp_mutex_lock (&devicep->lock);
2022 if (devicep->state == GOMP_DEVICE_FINALIZED)
2024 gomp_mutex_unlock (&devicep->lock);
2025 return;
2028 for (i = 0; i < mapnum; i++)
2030 struct splay_tree_key_s cur_node;
2031 unsigned char kind = kinds[i] & typemask;
2032 switch (kind)
2034 case GOMP_MAP_FROM:
2035 case GOMP_MAP_ALWAYS_FROM:
2036 case GOMP_MAP_DELETE:
2037 case GOMP_MAP_RELEASE:
2038 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2039 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2040 cur_node.host_start = (uintptr_t) hostaddrs[i];
2041 cur_node.host_end = cur_node.host_start + sizes[i];
2042 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2043 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2044 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2045 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2046 if (!k)
2047 continue;
2049 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2050 k->refcount--;
2051 if ((kind == GOMP_MAP_DELETE
2052 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2053 && k->refcount != REFCOUNT_INFINITY)
2054 k->refcount = 0;
2056 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2057 || kind == GOMP_MAP_ALWAYS_FROM)
2058 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2059 (void *) (k->tgt->tgt_start + k->tgt_offset
2060 + cur_node.host_start
2061 - k->host_start),
2062 cur_node.host_end - cur_node.host_start);
2063 if (k->refcount == 0)
2065 splay_tree_remove (&devicep->mem_map, k);
2066 if (k->link_key)
2067 splay_tree_insert (&devicep->mem_map,
2068 (splay_tree_node) k->link_key);
2069 if (k->tgt->refcount > 1)
2070 k->tgt->refcount--;
2071 else
2072 gomp_unmap_tgt (k->tgt);
2075 break;
2076 default:
2077 gomp_mutex_unlock (&devicep->lock);
2078 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2079 kind);
2083 gomp_mutex_unlock (&devicep->lock);
2086 void
2087 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2088 size_t *sizes, unsigned short *kinds,
2089 unsigned int flags, void **depend)
2091 struct gomp_device_descr *devicep = resolve_device (device);
2093 /* If there are depend clauses, but nowait is not present,
2094 block the parent task until the dependencies are resolved
2095 and then just continue with the rest of the function as if it
2096 is a merged task. Until we are able to schedule task during
2097 variable mapping or unmapping, ignore nowait if depend clauses
2098 are not present. */
2099 if (depend != NULL)
2101 struct gomp_thread *thr = gomp_thread ();
2102 if (thr->task && thr->task->depend_hash)
2104 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2105 && thr->ts.team
2106 && !thr->task->final_task)
2108 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2109 mapnum, hostaddrs, sizes, kinds,
2110 flags, depend, NULL,
2111 GOMP_TARGET_TASK_DATA))
2112 return;
2114 else
2116 struct gomp_team *team = thr->ts.team;
2117 /* If parallel or taskgroup has been cancelled, don't start new
2118 tasks. */
2119 if (__builtin_expect (gomp_cancel_var, 0) && team)
2121 if (gomp_team_barrier_cancelled (&team->barrier))
2122 return;
2123 if (thr->task->taskgroup)
2125 if (thr->task->taskgroup->cancelled)
2126 return;
2127 if (thr->task->taskgroup->workshare
2128 && thr->task->taskgroup->prev
2129 && thr->task->taskgroup->prev->cancelled)
2130 return;
2134 gomp_task_maybe_wait_for_dependencies (depend);
2139 if (devicep == NULL
2140 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2141 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2142 return;
2144 struct gomp_thread *thr = gomp_thread ();
2145 struct gomp_team *team = thr->ts.team;
2146 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2147 if (__builtin_expect (gomp_cancel_var, 0) && team)
2149 if (gomp_team_barrier_cancelled (&team->barrier))
2150 return;
2151 if (thr->task->taskgroup)
2153 if (thr->task->taskgroup->cancelled)
2154 return;
2155 if (thr->task->taskgroup->workshare
2156 && thr->task->taskgroup->prev
2157 && thr->task->taskgroup->prev->cancelled)
2158 return;
2162 size_t i;
2163 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2164 for (i = 0; i < mapnum; i++)
2165 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2167 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2168 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2169 i += sizes[i];
2171 else
2172 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2173 true, GOMP_MAP_VARS_ENTER_DATA);
2174 else
2175 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2178 bool
2179 gomp_target_task_fn (void *data)
2181 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2182 struct gomp_device_descr *devicep = ttask->devicep;
2184 if (ttask->fn != NULL)
2186 void *fn_addr;
2187 if (devicep == NULL
2188 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2189 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2190 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2192 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2193 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2194 return false;
2197 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2199 if (ttask->tgt)
2200 gomp_unmap_vars (ttask->tgt, true);
2201 return false;
2204 void *actual_arguments;
2205 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2207 ttask->tgt = NULL;
2208 actual_arguments = ttask->hostaddrs;
2210 else
2212 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2213 NULL, ttask->sizes, ttask->kinds, true,
2214 GOMP_MAP_VARS_TARGET);
2215 actual_arguments = (void *) ttask->tgt->tgt_start;
2217 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2219 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2220 ttask->args, (void *) ttask);
2221 return true;
2223 else if (devicep == NULL
2224 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2225 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2226 return false;
2228 size_t i;
2229 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2230 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2231 ttask->kinds, true);
2232 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2233 for (i = 0; i < ttask->mapnum; i++)
2234 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2236 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2237 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2238 GOMP_MAP_VARS_ENTER_DATA);
2239 i += ttask->sizes[i];
2241 else
2242 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2243 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2244 else
2245 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2246 ttask->kinds);
2247 return false;
2250 void
2251 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2253 if (thread_limit)
2255 struct gomp_task_icv *icv = gomp_icv (true);
2256 icv->thread_limit_var
2257 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2259 (void) num_teams;
2262 void *
2263 omp_target_alloc (size_t size, int device_num)
2265 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2266 return malloc (size);
2268 if (device_num < 0)
2269 return NULL;
2271 struct gomp_device_descr *devicep = resolve_device (device_num);
2272 if (devicep == NULL)
2273 return NULL;
2275 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2276 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2277 return malloc (size);
2279 gomp_mutex_lock (&devicep->lock);
2280 void *ret = devicep->alloc_func (devicep->target_id, size);
2281 gomp_mutex_unlock (&devicep->lock);
2282 return ret;
2285 void
2286 omp_target_free (void *device_ptr, int device_num)
2288 if (device_ptr == NULL)
2289 return;
2291 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2293 free (device_ptr);
2294 return;
2297 if (device_num < 0)
2298 return;
2300 struct gomp_device_descr *devicep = resolve_device (device_num);
2301 if (devicep == NULL)
2302 return;
2304 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2305 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2307 free (device_ptr);
2308 return;
2311 gomp_mutex_lock (&devicep->lock);
2312 gomp_free_device_memory (devicep, device_ptr);
2313 gomp_mutex_unlock (&devicep->lock);
2317 omp_target_is_present (const void *ptr, int device_num)
2319 if (ptr == NULL)
2320 return 1;
2322 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2323 return 1;
2325 if (device_num < 0)
2326 return 0;
2328 struct gomp_device_descr *devicep = resolve_device (device_num);
2329 if (devicep == NULL)
2330 return 0;
2332 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2333 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2334 return 1;
2336 gomp_mutex_lock (&devicep->lock);
2337 struct splay_tree_s *mem_map = &devicep->mem_map;
2338 struct splay_tree_key_s cur_node;
2340 cur_node.host_start = (uintptr_t) ptr;
2341 cur_node.host_end = cur_node.host_start;
2342 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2343 int ret = n != NULL;
2344 gomp_mutex_unlock (&devicep->lock);
2345 return ret;
2349 omp_target_memcpy (void *dst, const void *src, size_t length,
2350 size_t dst_offset, size_t src_offset, int dst_device_num,
2351 int src_device_num)
2353 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2354 bool ret;
2356 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2358 if (dst_device_num < 0)
2359 return EINVAL;
2361 dst_devicep = resolve_device (dst_device_num);
2362 if (dst_devicep == NULL)
2363 return EINVAL;
2365 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2366 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2367 dst_devicep = NULL;
2369 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2371 if (src_device_num < 0)
2372 return EINVAL;
2374 src_devicep = resolve_device (src_device_num);
2375 if (src_devicep == NULL)
2376 return EINVAL;
2378 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2379 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2380 src_devicep = NULL;
2382 if (src_devicep == NULL && dst_devicep == NULL)
2384 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2385 return 0;
2387 if (src_devicep == NULL)
2389 gomp_mutex_lock (&dst_devicep->lock);
2390 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2391 (char *) dst + dst_offset,
2392 (char *) src + src_offset, length);
2393 gomp_mutex_unlock (&dst_devicep->lock);
2394 return (ret ? 0 : EINVAL);
2396 if (dst_devicep == NULL)
2398 gomp_mutex_lock (&src_devicep->lock);
2399 ret = src_devicep->dev2host_func (src_devicep->target_id,
2400 (char *) dst + dst_offset,
2401 (char *) src + src_offset, length);
2402 gomp_mutex_unlock (&src_devicep->lock);
2403 return (ret ? 0 : EINVAL);
2405 if (src_devicep == dst_devicep)
2407 gomp_mutex_lock (&src_devicep->lock);
2408 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2409 (char *) dst + dst_offset,
2410 (char *) src + src_offset, length);
2411 gomp_mutex_unlock (&src_devicep->lock);
2412 return (ret ? 0 : EINVAL);
2414 return EINVAL;
2417 static int
2418 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2419 int num_dims, const size_t *volume,
2420 const size_t *dst_offsets,
2421 const size_t *src_offsets,
2422 const size_t *dst_dimensions,
2423 const size_t *src_dimensions,
2424 struct gomp_device_descr *dst_devicep,
2425 struct gomp_device_descr *src_devicep)
2427 size_t dst_slice = element_size;
2428 size_t src_slice = element_size;
2429 size_t j, dst_off, src_off, length;
2430 int i, ret;
2432 if (num_dims == 1)
2434 if (__builtin_mul_overflow (element_size, volume[0], &length)
2435 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2436 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2437 return EINVAL;
2438 if (dst_devicep == NULL && src_devicep == NULL)
2440 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2441 length);
2442 ret = 1;
2444 else if (src_devicep == NULL)
2445 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2446 (char *) dst + dst_off,
2447 (const char *) src + src_off,
2448 length);
2449 else if (dst_devicep == NULL)
2450 ret = src_devicep->dev2host_func (src_devicep->target_id,
2451 (char *) dst + dst_off,
2452 (const char *) src + src_off,
2453 length);
2454 else if (src_devicep == dst_devicep)
2455 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2456 (char *) dst + dst_off,
2457 (const char *) src + src_off,
2458 length);
2459 else
2460 ret = 0;
2461 return ret ? 0 : EINVAL;
2464 /* FIXME: it would be nice to have some plugin function to handle
2465 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2466 be handled in the generic recursion below, and for host-host it
2467 should be used even for any num_dims >= 2. */
2469 for (i = 1; i < num_dims; i++)
2470 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2471 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2472 return EINVAL;
2473 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2474 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2475 return EINVAL;
2476 for (j = 0; j < volume[0]; j++)
2478 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2479 (const char *) src + src_off,
2480 element_size, num_dims - 1,
2481 volume + 1, dst_offsets + 1,
2482 src_offsets + 1, dst_dimensions + 1,
2483 src_dimensions + 1, dst_devicep,
2484 src_devicep);
2485 if (ret)
2486 return ret;
2487 dst_off += dst_slice;
2488 src_off += src_slice;
2490 return 0;
2494 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2495 int num_dims, const size_t *volume,
2496 const size_t *dst_offsets,
2497 const size_t *src_offsets,
2498 const size_t *dst_dimensions,
2499 const size_t *src_dimensions,
2500 int dst_device_num, int src_device_num)
2502 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2504 if (!dst && !src)
2505 return INT_MAX;
2507 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2509 if (dst_device_num < 0)
2510 return EINVAL;
2512 dst_devicep = resolve_device (dst_device_num);
2513 if (dst_devicep == NULL)
2514 return EINVAL;
2516 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2517 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2518 dst_devicep = NULL;
2520 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2522 if (src_device_num < 0)
2523 return EINVAL;
2525 src_devicep = resolve_device (src_device_num);
2526 if (src_devicep == NULL)
2527 return EINVAL;
2529 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2530 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2531 src_devicep = NULL;
2534 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2535 return EINVAL;
2537 if (src_devicep)
2538 gomp_mutex_lock (&src_devicep->lock);
2539 else if (dst_devicep)
2540 gomp_mutex_lock (&dst_devicep->lock);
2541 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2542 volume, dst_offsets, src_offsets,
2543 dst_dimensions, src_dimensions,
2544 dst_devicep, src_devicep);
2545 if (src_devicep)
2546 gomp_mutex_unlock (&src_devicep->lock);
2547 else if (dst_devicep)
2548 gomp_mutex_unlock (&dst_devicep->lock);
2549 return ret;
2553 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2554 size_t size, size_t device_offset, int device_num)
2556 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2557 return EINVAL;
2559 if (device_num < 0)
2560 return EINVAL;
2562 struct gomp_device_descr *devicep = resolve_device (device_num);
2563 if (devicep == NULL)
2564 return EINVAL;
2566 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2567 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2568 return EINVAL;
2570 gomp_mutex_lock (&devicep->lock);
2572 struct splay_tree_s *mem_map = &devicep->mem_map;
2573 struct splay_tree_key_s cur_node;
2574 int ret = EINVAL;
2576 cur_node.host_start = (uintptr_t) host_ptr;
2577 cur_node.host_end = cur_node.host_start + size;
2578 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2579 if (n)
2581 if (n->tgt->tgt_start + n->tgt_offset
2582 == (uintptr_t) device_ptr + device_offset
2583 && n->host_start <= cur_node.host_start
2584 && n->host_end >= cur_node.host_end)
2585 ret = 0;
2587 else
2589 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2590 tgt->array = gomp_malloc (sizeof (*tgt->array));
2591 tgt->refcount = 1;
2592 tgt->tgt_start = 0;
2593 tgt->tgt_end = 0;
2594 tgt->to_free = NULL;
2595 tgt->prev = NULL;
2596 tgt->list_count = 0;
2597 tgt->device_descr = devicep;
2598 splay_tree_node array = tgt->array;
2599 splay_tree_key k = &array->key;
2600 k->host_start = cur_node.host_start;
2601 k->host_end = cur_node.host_end;
2602 k->tgt = tgt;
2603 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2604 k->refcount = REFCOUNT_INFINITY;
2605 array->left = NULL;
2606 array->right = NULL;
2607 splay_tree_insert (&devicep->mem_map, array);
2608 ret = 0;
2610 gomp_mutex_unlock (&devicep->lock);
2611 return ret;
2615 omp_target_disassociate_ptr (const void *ptr, int device_num)
2617 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2618 return EINVAL;
2620 if (device_num < 0)
2621 return EINVAL;
2623 struct gomp_device_descr *devicep = resolve_device (device_num);
2624 if (devicep == NULL)
2625 return EINVAL;
2627 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2628 return EINVAL;
2630 gomp_mutex_lock (&devicep->lock);
2632 struct splay_tree_s *mem_map = &devicep->mem_map;
2633 struct splay_tree_key_s cur_node;
2634 int ret = EINVAL;
2636 cur_node.host_start = (uintptr_t) ptr;
2637 cur_node.host_end = cur_node.host_start;
2638 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2639 if (n
2640 && n->host_start == cur_node.host_start
2641 && n->refcount == REFCOUNT_INFINITY
2642 && n->tgt->tgt_start == 0
2643 && n->tgt->to_free == NULL
2644 && n->tgt->refcount == 1
2645 && n->tgt->list_count == 0)
2647 splay_tree_remove (&devicep->mem_map, n);
2648 gomp_unmap_tgt (n->tgt);
2649 ret = 0;
2652 gomp_mutex_unlock (&devicep->lock);
2653 return ret;
2657 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2659 (void) kind;
2660 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2661 return gomp_pause_host ();
2662 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2663 return -1;
2664 /* Do nothing for target devices for now. */
2665 return 0;
2669 omp_pause_resource_all (omp_pause_resource_t kind)
2671 (void) kind;
2672 if (gomp_pause_host ())
2673 return -1;
2674 /* Do nothing for target devices for now. */
2675 return 0;
2678 ialias (omp_pause_resource)
2679 ialias (omp_pause_resource_all)
2681 #ifdef PLUGIN_SUPPORT
2683 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2684 in PLUGIN_NAME.
2685 The handles of the found functions are stored in the corresponding fields
2686 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2688 static bool
2689 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2690 const char *plugin_name)
2692 const char *err = NULL, *last_missing = NULL;
2694 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2695 if (!plugin_handle)
2696 goto dl_fail;
2698 /* Check if all required functions are available in the plugin and store
2699 their handlers. None of the symbols can legitimately be NULL,
2700 so we don't need to check dlerror all the time. */
2701 #define DLSYM(f) \
2702 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2703 goto dl_fail
2704 /* Similar, but missing functions are not an error. Return false if
2705 failed, true otherwise. */
2706 #define DLSYM_OPT(f, n) \
2707 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2708 || (last_missing = #n, 0))
2710 DLSYM (version);
2711 if (device->version_func () != GOMP_VERSION)
2713 err = "plugin version mismatch";
2714 goto fail;
2717 DLSYM (get_name);
2718 DLSYM (get_caps);
2719 DLSYM (get_type);
2720 DLSYM (get_num_devices);
2721 DLSYM (init_device);
2722 DLSYM (fini_device);
2723 DLSYM (load_image);
2724 DLSYM (unload_image);
2725 DLSYM (alloc);
2726 DLSYM (free);
2727 DLSYM (dev2host);
2728 DLSYM (host2dev);
2729 device->capabilities = device->get_caps_func ();
2730 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2732 DLSYM (run);
2733 DLSYM (async_run);
2734 DLSYM_OPT (can_run, can_run);
2735 DLSYM (dev2dev);
2737 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2739 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2740 || !DLSYM_OPT (openacc.create_thread_data,
2741 openacc_create_thread_data)
2742 || !DLSYM_OPT (openacc.destroy_thread_data,
2743 openacc_destroy_thread_data)
2744 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2745 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2746 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2747 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2748 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2749 || !DLSYM_OPT (openacc.async.queue_callback,
2750 openacc_async_queue_callback)
2751 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2752 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2753 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2755 /* Require all the OpenACC handlers if we have
2756 GOMP_OFFLOAD_CAP_OPENACC_200. */
2757 err = "plugin missing OpenACC handler function";
2758 goto fail;
2761 unsigned cuda = 0;
2762 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2763 openacc_cuda_get_current_device);
2764 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2765 openacc_cuda_get_current_context);
2766 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2767 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2768 if (cuda && cuda != 4)
2770 /* Make sure all the CUDA functions are there if any of them are. */
2771 err = "plugin missing OpenACC CUDA handler function";
2772 goto fail;
2775 #undef DLSYM
2776 #undef DLSYM_OPT
2778 return 1;
2780 dl_fail:
2781 err = dlerror ();
2782 fail:
2783 gomp_error ("while loading %s: %s", plugin_name, err);
2784 if (last_missing)
2785 gomp_error ("missing function was %s", last_missing);
2786 if (plugin_handle)
2787 dlclose (plugin_handle);
2789 return 0;
2792 /* This function finalizes all initialized devices. */
2794 static void
2795 gomp_target_fini (void)
2797 int i;
2798 for (i = 0; i < num_devices; i++)
2800 bool ret = true;
2801 struct gomp_device_descr *devicep = &devices[i];
2802 gomp_mutex_lock (&devicep->lock);
2803 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2804 ret = gomp_fini_device (devicep);
2805 gomp_mutex_unlock (&devicep->lock);
2806 if (!ret)
2807 gomp_fatal ("device finalization failed");
2811 /* This function initializes the runtime for offloading.
2812 It parses the list of offload plugins, and tries to load these.
2813 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2814 will be set, and the array DEVICES initialized, containing descriptors for
2815 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2816 by the others. */
2818 static void
2819 gomp_target_init (void)
2821 const char *prefix ="libgomp-plugin-";
2822 const char *suffix = SONAME_SUFFIX (1);
2823 const char *cur, *next;
2824 char *plugin_name;
2825 int i, new_num_devices;
2827 num_devices = 0;
2828 devices = NULL;
2830 cur = OFFLOAD_PLUGINS;
2831 if (*cur)
2834 struct gomp_device_descr current_device;
2835 size_t prefix_len, suffix_len, cur_len;
2837 next = strchr (cur, ',');
2839 prefix_len = strlen (prefix);
2840 cur_len = next ? next - cur : strlen (cur);
2841 suffix_len = strlen (suffix);
2843 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2844 if (!plugin_name)
2846 num_devices = 0;
2847 break;
2850 memcpy (plugin_name, prefix, prefix_len);
2851 memcpy (plugin_name + prefix_len, cur, cur_len);
2852 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2854 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2856 new_num_devices = current_device.get_num_devices_func ();
2857 if (new_num_devices >= 1)
2859 /* Augment DEVICES and NUM_DEVICES. */
2861 devices = realloc (devices, (num_devices + new_num_devices)
2862 * sizeof (struct gomp_device_descr));
2863 if (!devices)
2865 num_devices = 0;
2866 free (plugin_name);
2867 break;
2870 current_device.name = current_device.get_name_func ();
2871 /* current_device.capabilities has already been set. */
2872 current_device.type = current_device.get_type_func ();
2873 current_device.mem_map.root = NULL;
2874 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2875 current_device.openacc.data_environ = NULL;
2876 for (i = 0; i < new_num_devices; i++)
2878 current_device.target_id = i;
2879 devices[num_devices] = current_device;
2880 gomp_mutex_init (&devices[num_devices].lock);
2881 num_devices++;
2886 free (plugin_name);
2887 cur = next + 1;
2889 while (next);
2891 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2892 NUM_DEVICES_OPENMP. */
2893 struct gomp_device_descr *devices_s
2894 = malloc (num_devices * sizeof (struct gomp_device_descr));
2895 if (!devices_s)
2897 num_devices = 0;
2898 free (devices);
2899 devices = NULL;
2901 num_devices_openmp = 0;
2902 for (i = 0; i < num_devices; i++)
2903 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2904 devices_s[num_devices_openmp++] = devices[i];
2905 int num_devices_after_openmp = num_devices_openmp;
2906 for (i = 0; i < num_devices; i++)
2907 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2908 devices_s[num_devices_after_openmp++] = devices[i];
2909 free (devices);
2910 devices = devices_s;
2912 for (i = 0; i < num_devices; i++)
2914 /* The 'devices' array can be moved (by the realloc call) until we have
2915 found all the plugins, so registering with the OpenACC runtime (which
2916 takes a copy of the pointer argument) must be delayed until now. */
2917 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2918 goacc_register (&devices[i]);
2921 if (atexit (gomp_target_fini) != 0)
2922 gomp_fatal ("atexit failed");
2925 #else /* PLUGIN_SUPPORT */
2926 /* If dlfcn.h is unavailable we always fallback to host execution.
2927 GOMP_target* routines are just stubs for this case. */
2928 static void
2929 gomp_target_init (void)
2932 #endif /* PLUGIN_SUPPORT */