re PR c++/88744 (class non-type template parameters doesn't work with default templat...
[official-gcc.git] / libgomp / target.c
blob8af5f97c38b49eafe365a061c4be3181b58bc583
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
183 struct gomp_coalesce_chunk
185 /* The starting and ending point of a coalesced chunk of memory. */
186 size_t start, end;
189 struct gomp_coalesce_buf
191 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
192 it will be copied to the device. */
193 void *buf;
194 struct target_mem_desc *tgt;
195 /* Array with offsets, chunks[i].start is the starting offset and
196 chunks[i].end ending offset relative to tgt->tgt_start device address
197 of chunks which are to be copied to buf and later copied to device. */
198 struct gomp_coalesce_chunk *chunks;
199 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
200 be performed. */
201 long chunk_cnt;
202 /* During construction of chunks array, how many memory regions are within
203 the last chunk. If there is just one memory region for a chunk, we copy
204 it directly to device rather than going through buf. */
205 long use_cnt;
208 /* Maximum size of memory region considered for coalescing. Larger copies
209 are performed directly. */
210 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
212 /* Maximum size of a gap in between regions to consider them being copied
213 within the same chunk. All the device offsets considered are within
214 newly allocated device memory, so it isn't fatal if we copy some padding
215 in between from host to device. The gaps come either from alignment
216 padding or from memory regions which are not supposed to be copied from
217 host to device (e.g. map(alloc:), map(from:) etc.). */
218 #define MAX_COALESCE_BUF_GAP (4 * 1024)
220 /* Add region with device tgt_start relative offset and length to CBUF. */
222 static inline void
223 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
225 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
226 return;
227 if (cbuf->chunk_cnt)
229 if (cbuf->chunk_cnt < 0)
230 return;
231 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
233 cbuf->chunk_cnt = -1;
234 return;
236 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
238 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
239 cbuf->use_cnt++;
240 return;
242 /* If the last chunk is only used by one mapping, discard it,
243 as it will be one host to device copy anyway and
244 memcpying it around will only waste cycles. */
245 if (cbuf->use_cnt == 1)
246 cbuf->chunk_cnt--;
248 cbuf->chunks[cbuf->chunk_cnt].start = start;
249 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
250 cbuf->chunk_cnt++;
251 cbuf->use_cnt = 1;
254 /* Return true for mapping kinds which need to copy data from the
255 host to device for regions that weren't previously mapped. */
257 static inline bool
258 gomp_to_device_kind_p (int kind)
260 switch (kind)
262 case GOMP_MAP_ALLOC:
263 case GOMP_MAP_FROM:
264 case GOMP_MAP_FORCE_ALLOC:
265 case GOMP_MAP_ALWAYS_FROM:
266 return false;
267 default:
268 return true;
272 static void
273 gomp_copy_host2dev (struct gomp_device_descr *devicep,
274 void *d, const void *h, size_t sz,
275 struct gomp_coalesce_buf *cbuf)
277 if (cbuf)
279 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
280 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
282 long first = 0;
283 long last = cbuf->chunk_cnt - 1;
284 while (first <= last)
286 long middle = (first + last) >> 1;
287 if (cbuf->chunks[middle].end <= doff)
288 first = middle + 1;
289 else if (cbuf->chunks[middle].start <= doff)
291 if (doff + sz > cbuf->chunks[middle].end)
292 gomp_fatal ("internal libgomp cbuf error");
293 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
294 h, sz);
295 return;
297 else
298 last = middle - 1;
302 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
305 static void
306 gomp_copy_dev2host (struct gomp_device_descr *devicep,
307 void *h, const void *d, size_t sz)
309 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
312 static void
313 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
315 if (!devicep->free_func (devicep->target_id, devptr))
317 gomp_mutex_unlock (&devicep->lock);
318 gomp_fatal ("error in freeing device memory block at %p", devptr);
322 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
323 gomp_map_0len_lookup found oldn for newn.
324 Helper function of gomp_map_vars. */
326 static inline void
327 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
328 splay_tree_key newn, struct target_var_desc *tgt_var,
329 unsigned char kind, struct gomp_coalesce_buf *cbuf)
331 tgt_var->key = oldn;
332 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
333 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
334 tgt_var->offset = newn->host_start - oldn->host_start;
335 tgt_var->length = newn->host_end - newn->host_start;
337 if ((kind & GOMP_MAP_FLAG_FORCE)
338 || oldn->host_start > newn->host_start
339 || oldn->host_end < newn->host_end)
341 gomp_mutex_unlock (&devicep->lock);
342 gomp_fatal ("Trying to map into device [%p..%p) object when "
343 "[%p..%p) is already mapped",
344 (void *) newn->host_start, (void *) newn->host_end,
345 (void *) oldn->host_start, (void *) oldn->host_end);
348 if (GOMP_MAP_ALWAYS_TO_P (kind))
349 gomp_copy_host2dev (devicep,
350 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
351 + newn->host_start - oldn->host_start),
352 (void *) newn->host_start,
353 newn->host_end - newn->host_start, cbuf);
355 if (oldn->refcount != REFCOUNT_INFINITY)
356 oldn->refcount++;
359 static int
360 get_kind (bool short_mapkind, void *kinds, int idx)
362 return short_mapkind ? ((unsigned short *) kinds)[idx]
363 : ((unsigned char *) kinds)[idx];
366 static void
367 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
368 uintptr_t target_offset, uintptr_t bias,
369 struct gomp_coalesce_buf *cbuf)
371 struct gomp_device_descr *devicep = tgt->device_descr;
372 struct splay_tree_s *mem_map = &devicep->mem_map;
373 struct splay_tree_key_s cur_node;
375 cur_node.host_start = host_ptr;
376 if (cur_node.host_start == (uintptr_t) NULL)
378 cur_node.tgt_offset = (uintptr_t) NULL;
379 gomp_copy_host2dev (devicep,
380 (void *) (tgt->tgt_start + target_offset),
381 (void *) &cur_node.tgt_offset,
382 sizeof (void *), cbuf);
383 return;
385 /* Add bias to the pointer value. */
386 cur_node.host_start += bias;
387 cur_node.host_end = cur_node.host_start;
388 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
389 if (n == NULL)
391 gomp_mutex_unlock (&devicep->lock);
392 gomp_fatal ("Pointer target of array section wasn't mapped");
394 cur_node.host_start -= n->host_start;
395 cur_node.tgt_offset
396 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
397 /* At this point tgt_offset is target address of the
398 array section. Now subtract bias to get what we want
399 to initialize the pointer with. */
400 cur_node.tgt_offset -= bias;
401 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
402 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
405 static void
406 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
407 size_t first, size_t i, void **hostaddrs,
408 size_t *sizes, void *kinds,
409 struct gomp_coalesce_buf *cbuf)
411 struct gomp_device_descr *devicep = tgt->device_descr;
412 struct splay_tree_s *mem_map = &devicep->mem_map;
413 struct splay_tree_key_s cur_node;
414 int kind;
415 const bool short_mapkind = true;
416 const int typemask = short_mapkind ? 0xff : 0x7;
418 cur_node.host_start = (uintptr_t) hostaddrs[i];
419 cur_node.host_end = cur_node.host_start + sizes[i];
420 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
421 kind = get_kind (short_mapkind, kinds, i);
422 if (n2
423 && n2->tgt == n->tgt
424 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
426 gomp_map_vars_existing (devicep, n2, &cur_node,
427 &tgt->list[i], kind & typemask, cbuf);
428 return;
430 if (sizes[i] == 0)
432 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
434 cur_node.host_start--;
435 n2 = splay_tree_lookup (mem_map, &cur_node);
436 cur_node.host_start++;
437 if (n2
438 && n2->tgt == n->tgt
439 && n2->host_start - n->host_start
440 == n2->tgt_offset - n->tgt_offset)
442 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
443 kind & typemask, cbuf);
444 return;
447 cur_node.host_end++;
448 n2 = splay_tree_lookup (mem_map, &cur_node);
449 cur_node.host_end--;
450 if (n2
451 && n2->tgt == n->tgt
452 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
454 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
455 kind & typemask, cbuf);
456 return;
459 gomp_mutex_unlock (&devicep->lock);
460 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
461 "other mapped elements from the same structure weren't mapped "
462 "together with it", (void *) cur_node.host_start,
463 (void *) cur_node.host_end);
466 static inline uintptr_t
467 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
469 if (tgt->list[i].key != NULL)
470 return tgt->list[i].key->tgt->tgt_start
471 + tgt->list[i].key->tgt_offset
472 + tgt->list[i].offset;
473 if (tgt->list[i].offset == ~(uintptr_t) 0)
474 return (uintptr_t) hostaddrs[i];
475 if (tgt->list[i].offset == ~(uintptr_t) 1)
476 return 0;
477 if (tgt->list[i].offset == ~(uintptr_t) 2)
478 return tgt->list[i + 1].key->tgt->tgt_start
479 + tgt->list[i + 1].key->tgt_offset
480 + tgt->list[i + 1].offset
481 + (uintptr_t) hostaddrs[i]
482 - (uintptr_t) hostaddrs[i + 1];
483 return tgt->tgt_start + tgt->list[i].offset;
486 attribute_hidden struct target_mem_desc *
487 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
488 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
489 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
491 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
492 bool has_firstprivate = false;
493 const int rshift = short_mapkind ? 8 : 3;
494 const int typemask = short_mapkind ? 0xff : 0x7;
495 struct splay_tree_s *mem_map = &devicep->mem_map;
496 struct splay_tree_key_s cur_node;
497 struct target_mem_desc *tgt
498 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
499 tgt->list_count = mapnum;
500 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
501 tgt->device_descr = devicep;
502 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
504 if (mapnum == 0)
506 tgt->tgt_start = 0;
507 tgt->tgt_end = 0;
508 return tgt;
511 tgt_align = sizeof (void *);
512 tgt_size = 0;
513 cbuf.chunks = NULL;
514 cbuf.chunk_cnt = -1;
515 cbuf.use_cnt = 0;
516 cbuf.buf = NULL;
517 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
519 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
520 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
521 cbuf.chunk_cnt = 0;
523 if (pragma_kind == GOMP_MAP_VARS_TARGET)
525 size_t align = 4 * sizeof (void *);
526 tgt_align = align;
527 tgt_size = mapnum * sizeof (void *);
528 cbuf.chunk_cnt = 1;
529 cbuf.use_cnt = 1 + (mapnum > 1);
530 cbuf.chunks[0].start = 0;
531 cbuf.chunks[0].end = tgt_size;
534 gomp_mutex_lock (&devicep->lock);
535 if (devicep->state == GOMP_DEVICE_FINALIZED)
537 gomp_mutex_unlock (&devicep->lock);
538 free (tgt);
539 return NULL;
542 for (i = 0; i < mapnum; i++)
544 int kind = get_kind (short_mapkind, kinds, i);
545 if (hostaddrs[i] == NULL
546 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
548 tgt->list[i].key = NULL;
549 tgt->list[i].offset = ~(uintptr_t) 0;
550 continue;
552 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
554 cur_node.host_start = (uintptr_t) hostaddrs[i];
555 cur_node.host_end = cur_node.host_start;
556 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
557 if (n == NULL)
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("use_device_ptr pointer wasn't mapped");
562 cur_node.host_start -= n->host_start;
563 hostaddrs[i]
564 = (void *) (n->tgt->tgt_start + n->tgt_offset
565 + cur_node.host_start);
566 tgt->list[i].key = NULL;
567 tgt->list[i].offset = ~(uintptr_t) 0;
568 continue;
570 else if ((kind & typemask) == GOMP_MAP_STRUCT)
572 size_t first = i + 1;
573 size_t last = i + sizes[i];
574 cur_node.host_start = (uintptr_t) hostaddrs[i];
575 cur_node.host_end = (uintptr_t) hostaddrs[last]
576 + sizes[last];
577 tgt->list[i].key = NULL;
578 tgt->list[i].offset = ~(uintptr_t) 2;
579 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
580 if (n == NULL)
582 size_t align = (size_t) 1 << (kind >> rshift);
583 if (tgt_align < align)
584 tgt_align = align;
585 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
586 tgt_size = (tgt_size + align - 1) & ~(align - 1);
587 tgt_size += cur_node.host_end - cur_node.host_start;
588 not_found_cnt += last - i;
589 for (i = first; i <= last; i++)
591 tgt->list[i].key = NULL;
592 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
593 & typemask))
594 gomp_coalesce_buf_add (&cbuf,
595 tgt_size - cur_node.host_end
596 + (uintptr_t) hostaddrs[i],
597 sizes[i]);
599 i--;
600 continue;
602 for (i = first; i <= last; i++)
603 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
604 sizes, kinds, NULL);
605 i--;
606 continue;
608 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
610 tgt->list[i].key = NULL;
611 tgt->list[i].offset = ~(uintptr_t) 1;
612 has_firstprivate = true;
613 continue;
615 cur_node.host_start = (uintptr_t) hostaddrs[i];
616 if (!GOMP_MAP_POINTER_P (kind & typemask))
617 cur_node.host_end = cur_node.host_start + sizes[i];
618 else
619 cur_node.host_end = cur_node.host_start + sizeof (void *);
620 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
622 tgt->list[i].key = NULL;
624 size_t align = (size_t) 1 << (kind >> rshift);
625 if (tgt_align < align)
626 tgt_align = align;
627 tgt_size = (tgt_size + align - 1) & ~(align - 1);
628 gomp_coalesce_buf_add (&cbuf, tgt_size,
629 cur_node.host_end - cur_node.host_start);
630 tgt_size += cur_node.host_end - cur_node.host_start;
631 has_firstprivate = true;
632 continue;
634 splay_tree_key n;
635 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
637 n = gomp_map_0len_lookup (mem_map, &cur_node);
638 if (!n)
640 tgt->list[i].key = NULL;
641 tgt->list[i].offset = ~(uintptr_t) 1;
642 continue;
645 else
646 n = splay_tree_lookup (mem_map, &cur_node);
647 if (n && n->refcount != REFCOUNT_LINK)
648 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
649 kind & typemask, NULL);
650 else
652 tgt->list[i].key = NULL;
654 size_t align = (size_t) 1 << (kind >> rshift);
655 not_found_cnt++;
656 if (tgt_align < align)
657 tgt_align = align;
658 tgt_size = (tgt_size + align - 1) & ~(align - 1);
659 if (gomp_to_device_kind_p (kind & typemask))
660 gomp_coalesce_buf_add (&cbuf, tgt_size,
661 cur_node.host_end - cur_node.host_start);
662 tgt_size += cur_node.host_end - cur_node.host_start;
663 if ((kind & typemask) == GOMP_MAP_TO_PSET)
665 size_t j;
666 for (j = i + 1; j < mapnum; j++)
667 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
668 & typemask))
669 break;
670 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
671 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
672 > cur_node.host_end))
673 break;
674 else
676 tgt->list[j].key = NULL;
677 i++;
683 if (devaddrs)
685 if (mapnum != 1)
687 gomp_mutex_unlock (&devicep->lock);
688 gomp_fatal ("unexpected aggregation");
690 tgt->to_free = devaddrs[0];
691 tgt->tgt_start = (uintptr_t) tgt->to_free;
692 tgt->tgt_end = tgt->tgt_start + sizes[0];
694 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
696 /* Allocate tgt_align aligned tgt_size block of memory. */
697 /* FIXME: Perhaps change interface to allocate properly aligned
698 memory. */
699 tgt->to_free = devicep->alloc_func (devicep->target_id,
700 tgt_size + tgt_align - 1);
701 if (!tgt->to_free)
703 gomp_mutex_unlock (&devicep->lock);
704 gomp_fatal ("device memory allocation fail");
707 tgt->tgt_start = (uintptr_t) tgt->to_free;
708 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
709 tgt->tgt_end = tgt->tgt_start + tgt_size;
711 if (cbuf.use_cnt == 1)
712 cbuf.chunk_cnt--;
713 if (cbuf.chunk_cnt > 0)
715 cbuf.buf
716 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
717 if (cbuf.buf)
719 cbuf.tgt = tgt;
720 cbufp = &cbuf;
724 else
726 tgt->to_free = NULL;
727 tgt->tgt_start = 0;
728 tgt->tgt_end = 0;
731 tgt_size = 0;
732 if (pragma_kind == GOMP_MAP_VARS_TARGET)
733 tgt_size = mapnum * sizeof (void *);
735 tgt->array = NULL;
736 if (not_found_cnt || has_firstprivate)
738 if (not_found_cnt)
739 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
740 splay_tree_node array = tgt->array;
741 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
742 uintptr_t field_tgt_base = 0;
744 for (i = 0; i < mapnum; i++)
745 if (tgt->list[i].key == NULL)
747 int kind = get_kind (short_mapkind, kinds, i);
748 if (hostaddrs[i] == NULL)
749 continue;
750 switch (kind & typemask)
752 size_t align, len, first, last;
753 splay_tree_key n;
754 case GOMP_MAP_FIRSTPRIVATE:
755 align = (size_t) 1 << (kind >> rshift);
756 tgt_size = (tgt_size + align - 1) & ~(align - 1);
757 tgt->list[i].offset = tgt_size;
758 len = sizes[i];
759 gomp_copy_host2dev (devicep,
760 (void *) (tgt->tgt_start + tgt_size),
761 (void *) hostaddrs[i], len, cbufp);
762 tgt_size += len;
763 continue;
764 case GOMP_MAP_FIRSTPRIVATE_INT:
765 case GOMP_MAP_USE_DEVICE_PTR:
766 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
767 continue;
768 case GOMP_MAP_STRUCT:
769 first = i + 1;
770 last = i + sizes[i];
771 cur_node.host_start = (uintptr_t) hostaddrs[i];
772 cur_node.host_end = (uintptr_t) hostaddrs[last]
773 + sizes[last];
774 if (tgt->list[first].key != NULL)
775 continue;
776 n = splay_tree_lookup (mem_map, &cur_node);
777 if (n == NULL)
779 size_t align = (size_t) 1 << (kind >> rshift);
780 tgt_size -= (uintptr_t) hostaddrs[first]
781 - (uintptr_t) hostaddrs[i];
782 tgt_size = (tgt_size + align - 1) & ~(align - 1);
783 tgt_size += (uintptr_t) hostaddrs[first]
784 - (uintptr_t) hostaddrs[i];
785 field_tgt_base = (uintptr_t) hostaddrs[first];
786 field_tgt_offset = tgt_size;
787 field_tgt_clear = last;
788 tgt_size += cur_node.host_end
789 - (uintptr_t) hostaddrs[first];
790 continue;
792 for (i = first; i <= last; i++)
793 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
794 sizes, kinds, cbufp);
795 i--;
796 continue;
797 case GOMP_MAP_ALWAYS_POINTER:
798 cur_node.host_start = (uintptr_t) hostaddrs[i];
799 cur_node.host_end = cur_node.host_start + sizeof (void *);
800 n = splay_tree_lookup (mem_map, &cur_node);
801 if (n == NULL
802 || n->host_start > cur_node.host_start
803 || n->host_end < cur_node.host_end)
805 gomp_mutex_unlock (&devicep->lock);
806 gomp_fatal ("always pointer not mapped");
808 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
809 != GOMP_MAP_ALWAYS_POINTER)
810 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
811 if (cur_node.tgt_offset)
812 cur_node.tgt_offset -= sizes[i];
813 gomp_copy_host2dev (devicep,
814 (void *) (n->tgt->tgt_start
815 + n->tgt_offset
816 + cur_node.host_start
817 - n->host_start),
818 (void *) &cur_node.tgt_offset,
819 sizeof (void *), cbufp);
820 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
821 + cur_node.host_start - n->host_start;
822 continue;
823 default:
824 break;
826 splay_tree_key k = &array->key;
827 k->host_start = (uintptr_t) hostaddrs[i];
828 if (!GOMP_MAP_POINTER_P (kind & typemask))
829 k->host_end = k->host_start + sizes[i];
830 else
831 k->host_end = k->host_start + sizeof (void *);
832 splay_tree_key n = splay_tree_lookup (mem_map, k);
833 if (n && n->refcount != REFCOUNT_LINK)
834 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
835 kind & typemask, cbufp);
836 else
838 k->link_key = NULL;
839 if (n && n->refcount == REFCOUNT_LINK)
841 /* Replace target address of the pointer with target address
842 of mapped object in the splay tree. */
843 splay_tree_remove (mem_map, n);
844 k->link_key = n;
846 size_t align = (size_t) 1 << (kind >> rshift);
847 tgt->list[i].key = k;
848 k->tgt = tgt;
849 if (field_tgt_clear != ~(size_t) 0)
851 k->tgt_offset = k->host_start - field_tgt_base
852 + field_tgt_offset;
853 if (i == field_tgt_clear)
854 field_tgt_clear = ~(size_t) 0;
856 else
858 tgt_size = (tgt_size + align - 1) & ~(align - 1);
859 k->tgt_offset = tgt_size;
860 tgt_size += k->host_end - k->host_start;
862 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
863 tgt->list[i].always_copy_from
864 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
865 tgt->list[i].offset = 0;
866 tgt->list[i].length = k->host_end - k->host_start;
867 k->refcount = 1;
868 k->dynamic_refcount = 0;
869 tgt->refcount++;
870 array->left = NULL;
871 array->right = NULL;
872 splay_tree_insert (mem_map, array);
873 switch (kind & typemask)
875 case GOMP_MAP_ALLOC:
876 case GOMP_MAP_FROM:
877 case GOMP_MAP_FORCE_ALLOC:
878 case GOMP_MAP_FORCE_FROM:
879 case GOMP_MAP_ALWAYS_FROM:
880 break;
881 case GOMP_MAP_TO:
882 case GOMP_MAP_TOFROM:
883 case GOMP_MAP_FORCE_TO:
884 case GOMP_MAP_FORCE_TOFROM:
885 case GOMP_MAP_ALWAYS_TO:
886 case GOMP_MAP_ALWAYS_TOFROM:
887 gomp_copy_host2dev (devicep,
888 (void *) (tgt->tgt_start
889 + k->tgt_offset),
890 (void *) k->host_start,
891 k->host_end - k->host_start, cbufp);
892 break;
893 case GOMP_MAP_POINTER:
894 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
895 k->tgt_offset, sizes[i], cbufp);
896 break;
897 case GOMP_MAP_TO_PSET:
898 gomp_copy_host2dev (devicep,
899 (void *) (tgt->tgt_start
900 + k->tgt_offset),
901 (void *) k->host_start,
902 k->host_end - k->host_start, cbufp);
904 for (j = i + 1; j < mapnum; j++)
905 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
907 & typemask))
908 break;
909 else if ((uintptr_t) hostaddrs[j] < k->host_start
910 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
911 > k->host_end))
912 break;
913 else
915 tgt->list[j].key = k;
916 tgt->list[j].copy_from = false;
917 tgt->list[j].always_copy_from = false;
918 if (k->refcount != REFCOUNT_INFINITY)
919 k->refcount++;
920 gomp_map_pointer (tgt,
921 (uintptr_t) *(void **) hostaddrs[j],
922 k->tgt_offset
923 + ((uintptr_t) hostaddrs[j]
924 - k->host_start),
925 sizes[j], cbufp);
926 i++;
928 break;
929 case GOMP_MAP_FORCE_PRESENT:
931 /* We already looked up the memory region above and it
932 was missing. */
933 size_t size = k->host_end - k->host_start;
934 gomp_mutex_unlock (&devicep->lock);
935 #ifdef HAVE_INTTYPES_H
936 gomp_fatal ("present clause: !acc_is_present (%p, "
937 "%"PRIu64" (0x%"PRIx64"))",
938 (void *) k->host_start,
939 (uint64_t) size, (uint64_t) size);
940 #else
941 gomp_fatal ("present clause: !acc_is_present (%p, "
942 "%lu (0x%lx))", (void *) k->host_start,
943 (unsigned long) size, (unsigned long) size);
944 #endif
946 break;
947 case GOMP_MAP_FORCE_DEVICEPTR:
948 assert (k->host_end - k->host_start == sizeof (void *));
949 gomp_copy_host2dev (devicep,
950 (void *) (tgt->tgt_start
951 + k->tgt_offset),
952 (void *) k->host_start,
953 sizeof (void *), cbufp);
954 break;
955 default:
956 gomp_mutex_unlock (&devicep->lock);
957 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
958 kind);
961 if (k->link_key)
963 /* Set link pointer on target to the device address of the
964 mapped object. */
965 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
966 /* We intentionally do not use coalescing here, as it's not
967 data allocated by the current call to this function. */
968 gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
969 &tgt_addr, sizeof (void *), NULL);
971 array++;
976 if (pragma_kind == GOMP_MAP_VARS_TARGET)
978 for (i = 0; i < mapnum; i++)
980 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
981 gomp_copy_host2dev (devicep,
982 (void *) (tgt->tgt_start + i * sizeof (void *)),
983 (void *) &cur_node.tgt_offset, sizeof (void *),
984 cbufp);
988 if (cbufp)
990 long c = 0;
991 for (c = 0; c < cbuf.chunk_cnt; ++c)
992 gomp_copy_host2dev (devicep,
993 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
994 (char *) cbuf.buf + (cbuf.chunks[c].start
995 - cbuf.chunks[0].start),
996 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
997 free (cbuf.buf);
998 cbuf.buf = NULL;
999 cbufp = NULL;
1002 /* If the variable from "omp target enter data" map-list was already mapped,
1003 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1004 gomp_exit_data. */
1005 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1007 free (tgt);
1008 tgt = NULL;
1011 gomp_mutex_unlock (&devicep->lock);
1012 return tgt;
1015 static void
1016 gomp_unmap_tgt (struct target_mem_desc *tgt)
1018 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1019 if (tgt->tgt_end)
1020 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1022 free (tgt->array);
1023 free (tgt);
1026 attribute_hidden bool
1027 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1029 bool is_tgt_unmapped = false;
1030 splay_tree_remove (&devicep->mem_map, k);
1031 if (k->link_key)
1032 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1033 if (k->tgt->refcount > 1)
1034 k->tgt->refcount--;
1035 else
1037 is_tgt_unmapped = true;
1038 gomp_unmap_tgt (k->tgt);
1040 return is_tgt_unmapped;
1043 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1044 variables back from device to host: if it is false, it is assumed that this
1045 has been done already. */
1047 attribute_hidden void
1048 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1050 struct gomp_device_descr *devicep = tgt->device_descr;
1052 if (tgt->list_count == 0)
1054 free (tgt);
1055 return;
1058 gomp_mutex_lock (&devicep->lock);
1059 if (devicep->state == GOMP_DEVICE_FINALIZED)
1061 gomp_mutex_unlock (&devicep->lock);
1062 free (tgt->array);
1063 free (tgt);
1064 return;
1067 size_t i;
1068 for (i = 0; i < tgt->list_count; i++)
1070 splay_tree_key k = tgt->list[i].key;
1071 if (k == NULL)
1072 continue;
1074 bool do_unmap = false;
1075 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1076 k->refcount--;
1077 else if (k->refcount == 1)
1079 k->refcount--;
1080 do_unmap = true;
1083 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1084 || tgt->list[i].always_copy_from)
1085 gomp_copy_dev2host (devicep,
1086 (void *) (k->host_start + tgt->list[i].offset),
1087 (void *) (k->tgt->tgt_start + k->tgt_offset
1088 + tgt->list[i].offset),
1089 tgt->list[i].length);
1090 if (do_unmap)
1091 gomp_remove_var (devicep, k);
1094 if (tgt->refcount > 1)
1095 tgt->refcount--;
1096 else
1097 gomp_unmap_tgt (tgt);
1099 gomp_mutex_unlock (&devicep->lock);
1102 static void
1103 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1104 size_t *sizes, void *kinds, bool short_mapkind)
1106 size_t i;
1107 struct splay_tree_key_s cur_node;
1108 const int typemask = short_mapkind ? 0xff : 0x7;
1110 if (!devicep)
1111 return;
1113 if (mapnum == 0)
1114 return;
1116 gomp_mutex_lock (&devicep->lock);
1117 if (devicep->state == GOMP_DEVICE_FINALIZED)
1119 gomp_mutex_unlock (&devicep->lock);
1120 return;
1123 for (i = 0; i < mapnum; i++)
1124 if (sizes[i])
1126 cur_node.host_start = (uintptr_t) hostaddrs[i];
1127 cur_node.host_end = cur_node.host_start + sizes[i];
1128 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1129 if (n)
1131 int kind = get_kind (short_mapkind, kinds, i);
1132 if (n->host_start > cur_node.host_start
1133 || n->host_end < cur_node.host_end)
1135 gomp_mutex_unlock (&devicep->lock);
1136 gomp_fatal ("Trying to update [%p..%p) object when "
1137 "only [%p..%p) is mapped",
1138 (void *) cur_node.host_start,
1139 (void *) cur_node.host_end,
1140 (void *) n->host_start,
1141 (void *) n->host_end);
1145 void *hostaddr = (void *) cur_node.host_start;
1146 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1147 + cur_node.host_start - n->host_start);
1148 size_t size = cur_node.host_end - cur_node.host_start;
1150 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1151 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1152 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1153 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1156 gomp_mutex_unlock (&devicep->lock);
1159 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1160 And insert to splay tree the mapping between addresses from HOST_TABLE and
1161 from loaded target image. We rely in the host and device compiler
1162 emitting variable and functions in the same order. */
1164 static void
1165 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1166 const void *host_table, const void *target_data,
1167 bool is_register_lock)
1169 void **host_func_table = ((void ***) host_table)[0];
1170 void **host_funcs_end = ((void ***) host_table)[1];
1171 void **host_var_table = ((void ***) host_table)[2];
1172 void **host_vars_end = ((void ***) host_table)[3];
1174 /* The func table contains only addresses, the var table contains addresses
1175 and corresponding sizes. */
1176 int num_funcs = host_funcs_end - host_func_table;
1177 int num_vars = (host_vars_end - host_var_table) / 2;
1179 /* Load image to device and get target addresses for the image. */
1180 struct addr_pair *target_table = NULL;
1181 int i, num_target_entries;
1183 num_target_entries
1184 = devicep->load_image_func (devicep->target_id, version,
1185 target_data, &target_table);
1187 if (num_target_entries != num_funcs + num_vars)
1189 gomp_mutex_unlock (&devicep->lock);
1190 if (is_register_lock)
1191 gomp_mutex_unlock (&register_lock);
1192 gomp_fatal ("Cannot map target functions or variables"
1193 " (expected %u, have %u)", num_funcs + num_vars,
1194 num_target_entries);
1197 /* Insert host-target address mapping into splay tree. */
1198 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1199 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1200 tgt->refcount = REFCOUNT_INFINITY;
1201 tgt->tgt_start = 0;
1202 tgt->tgt_end = 0;
1203 tgt->to_free = NULL;
1204 tgt->prev = NULL;
1205 tgt->list_count = 0;
1206 tgt->device_descr = devicep;
1207 splay_tree_node array = tgt->array;
1209 for (i = 0; i < num_funcs; i++)
1211 splay_tree_key k = &array->key;
1212 k->host_start = (uintptr_t) host_func_table[i];
1213 k->host_end = k->host_start + 1;
1214 k->tgt = tgt;
1215 k->tgt_offset = target_table[i].start;
1216 k->refcount = REFCOUNT_INFINITY;
1217 k->link_key = NULL;
1218 array->left = NULL;
1219 array->right = NULL;
1220 splay_tree_insert (&devicep->mem_map, array);
1221 array++;
1224 /* Most significant bit of the size in host and target tables marks
1225 "omp declare target link" variables. */
1226 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1227 const uintptr_t size_mask = ~link_bit;
1229 for (i = 0; i < num_vars; i++)
1231 struct addr_pair *target_var = &target_table[num_funcs + i];
1232 uintptr_t target_size = target_var->end - target_var->start;
1234 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1236 gomp_mutex_unlock (&devicep->lock);
1237 if (is_register_lock)
1238 gomp_mutex_unlock (&register_lock);
1239 gomp_fatal ("Cannot map target variables (size mismatch)");
1242 splay_tree_key k = &array->key;
1243 k->host_start = (uintptr_t) host_var_table[i * 2];
1244 k->host_end
1245 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1246 k->tgt = tgt;
1247 k->tgt_offset = target_var->start;
1248 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1249 k->link_key = NULL;
1250 array->left = NULL;
1251 array->right = NULL;
1252 splay_tree_insert (&devicep->mem_map, array);
1253 array++;
1256 free (target_table);
1259 /* Unload the mappings described by target_data from device DEVICE_P.
1260 The device must be locked. */
1262 static void
1263 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1264 unsigned version,
1265 const void *host_table, const void *target_data)
1267 void **host_func_table = ((void ***) host_table)[0];
1268 void **host_funcs_end = ((void ***) host_table)[1];
1269 void **host_var_table = ((void ***) host_table)[2];
1270 void **host_vars_end = ((void ***) host_table)[3];
1272 /* The func table contains only addresses, the var table contains addresses
1273 and corresponding sizes. */
1274 int num_funcs = host_funcs_end - host_func_table;
1275 int num_vars = (host_vars_end - host_var_table) / 2;
1277 struct splay_tree_key_s k;
1278 splay_tree_key node = NULL;
1280 /* Find mapping at start of node array */
1281 if (num_funcs || num_vars)
1283 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1284 : (uintptr_t) host_var_table[0]);
1285 k.host_end = k.host_start + 1;
1286 node = splay_tree_lookup (&devicep->mem_map, &k);
1289 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1291 gomp_mutex_unlock (&devicep->lock);
1292 gomp_fatal ("image unload fail");
1295 /* Remove mappings from splay tree. */
1296 int i;
1297 for (i = 0; i < num_funcs; i++)
1299 k.host_start = (uintptr_t) host_func_table[i];
1300 k.host_end = k.host_start + 1;
1301 splay_tree_remove (&devicep->mem_map, &k);
1304 /* Most significant bit of the size in host and target tables marks
1305 "omp declare target link" variables. */
1306 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1307 const uintptr_t size_mask = ~link_bit;
1308 bool is_tgt_unmapped = false;
1310 for (i = 0; i < num_vars; i++)
1312 k.host_start = (uintptr_t) host_var_table[i * 2];
1313 k.host_end
1314 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1316 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1317 splay_tree_remove (&devicep->mem_map, &k);
1318 else
1320 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1321 is_tgt_unmapped = gomp_remove_var (devicep, n);
1325 if (node && !is_tgt_unmapped)
1327 free (node->tgt);
1328 free (node);
1332 /* This function should be called from every offload image while loading.
1333 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1334 the target, and TARGET_DATA needed by target plugin. */
1336 void
1337 GOMP_offload_register_ver (unsigned version, const void *host_table,
1338 int target_type, const void *target_data)
1340 int i;
1342 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1343 gomp_fatal ("Library too old for offload (version %u < %u)",
1344 GOMP_VERSION, GOMP_VERSION_LIB (version));
1346 gomp_mutex_lock (&register_lock);
1348 /* Load image to all initialized devices. */
1349 for (i = 0; i < num_devices; i++)
1351 struct gomp_device_descr *devicep = &devices[i];
1352 gomp_mutex_lock (&devicep->lock);
1353 if (devicep->type == target_type
1354 && devicep->state == GOMP_DEVICE_INITIALIZED)
1355 gomp_load_image_to_device (devicep, version,
1356 host_table, target_data, true);
1357 gomp_mutex_unlock (&devicep->lock);
1360 /* Insert image to array of pending images. */
1361 offload_images
1362 = gomp_realloc_unlock (offload_images,
1363 (num_offload_images + 1)
1364 * sizeof (struct offload_image_descr));
1365 offload_images[num_offload_images].version = version;
1366 offload_images[num_offload_images].type = target_type;
1367 offload_images[num_offload_images].host_table = host_table;
1368 offload_images[num_offload_images].target_data = target_data;
1370 num_offload_images++;
1371 gomp_mutex_unlock (&register_lock);
1374 void
1375 GOMP_offload_register (const void *host_table, int target_type,
1376 const void *target_data)
1378 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1381 /* This function should be called from every offload image while unloading.
1382 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1383 the target, and TARGET_DATA needed by target plugin. */
1385 void
1386 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1387 int target_type, const void *target_data)
1389 int i;
1391 gomp_mutex_lock (&register_lock);
1393 /* Unload image from all initialized devices. */
1394 for (i = 0; i < num_devices; i++)
1396 struct gomp_device_descr *devicep = &devices[i];
1397 gomp_mutex_lock (&devicep->lock);
1398 if (devicep->type == target_type
1399 && devicep->state == GOMP_DEVICE_INITIALIZED)
1400 gomp_unload_image_from_device (devicep, version,
1401 host_table, target_data);
1402 gomp_mutex_unlock (&devicep->lock);
1405 /* Remove image from array of pending images. */
1406 for (i = 0; i < num_offload_images; i++)
1407 if (offload_images[i].target_data == target_data)
1409 offload_images[i] = offload_images[--num_offload_images];
1410 break;
1413 gomp_mutex_unlock (&register_lock);
1416 void
1417 GOMP_offload_unregister (const void *host_table, int target_type,
1418 const void *target_data)
1420 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1423 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1424 must be locked on entry, and remains locked on return. */
1426 attribute_hidden void
1427 gomp_init_device (struct gomp_device_descr *devicep)
1429 int i;
1430 if (!devicep->init_device_func (devicep->target_id))
1432 gomp_mutex_unlock (&devicep->lock);
1433 gomp_fatal ("device initialization failed");
1436 /* Load to device all images registered by the moment. */
1437 for (i = 0; i < num_offload_images; i++)
1439 struct offload_image_descr *image = &offload_images[i];
1440 if (image->type == devicep->type)
1441 gomp_load_image_to_device (devicep, image->version,
1442 image->host_table, image->target_data,
1443 false);
1446 devicep->state = GOMP_DEVICE_INITIALIZED;
1449 attribute_hidden void
1450 gomp_unload_device (struct gomp_device_descr *devicep)
1452 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1454 unsigned i;
1456 /* Unload from device all images registered at the moment. */
1457 for (i = 0; i < num_offload_images; i++)
1459 struct offload_image_descr *image = &offload_images[i];
1460 if (image->type == devicep->type)
1461 gomp_unload_image_from_device (devicep, image->version,
1462 image->host_table,
1463 image->target_data);
1468 /* Free address mapping tables. MM must be locked on entry, and remains locked
1469 on return. */
1471 attribute_hidden void
1472 gomp_free_memmap (struct splay_tree_s *mem_map)
1474 while (mem_map->root)
1476 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1478 splay_tree_remove (mem_map, &mem_map->root->key);
1479 free (tgt->array);
1480 free (tgt);
1484 /* Host fallback for GOMP_target{,_ext} routines. */
1486 static void
1487 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1489 struct gomp_thread old_thr, *thr = gomp_thread ();
1490 old_thr = *thr;
1491 memset (thr, '\0', sizeof (*thr));
1492 if (gomp_places_list)
1494 thr->place = old_thr.place;
1495 thr->ts.place_partition_len = gomp_places_list_len;
1497 fn (hostaddrs);
1498 gomp_free_thread (thr);
1499 *thr = old_thr;
1502 /* Calculate alignment and size requirements of a private copy of data shared
1503 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1505 static inline void
1506 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1507 unsigned short *kinds, size_t *tgt_align,
1508 size_t *tgt_size)
1510 size_t i;
1511 for (i = 0; i < mapnum; i++)
1512 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1514 size_t align = (size_t) 1 << (kinds[i] >> 8);
1515 if (*tgt_align < align)
1516 *tgt_align = align;
1517 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1518 *tgt_size += sizes[i];
1522 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1524 static inline void
1525 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1526 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1527 size_t tgt_size)
1529 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1530 if (al)
1531 tgt += tgt_align - al;
1532 tgt_size = 0;
1533 size_t i;
1534 for (i = 0; i < mapnum; i++)
1535 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1537 size_t align = (size_t) 1 << (kinds[i] >> 8);
1538 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1539 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1540 hostaddrs[i] = tgt + tgt_size;
1541 tgt_size = tgt_size + sizes[i];
1545 /* Helper function of GOMP_target{,_ext} routines. */
1547 static void *
1548 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1549 void (*host_fn) (void *))
1551 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1552 return (void *) host_fn;
1553 else
1555 gomp_mutex_lock (&devicep->lock);
1556 if (devicep->state == GOMP_DEVICE_FINALIZED)
1558 gomp_mutex_unlock (&devicep->lock);
1559 return NULL;
1562 struct splay_tree_key_s k;
1563 k.host_start = (uintptr_t) host_fn;
1564 k.host_end = k.host_start + 1;
1565 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1566 gomp_mutex_unlock (&devicep->lock);
1567 if (tgt_fn == NULL)
1568 return NULL;
1570 return (void *) tgt_fn->tgt_offset;
1574 /* Called when encountering a target directive. If DEVICE
1575 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1576 GOMP_DEVICE_HOST_FALLBACK (or any value
1577 larger than last available hw device), use host fallback.
1578 FN is address of host code, UNUSED is part of the current ABI, but
1579 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1580 with MAPNUM entries, with addresses of the host objects,
1581 sizes of the host objects (resp. for pointer kind pointer bias
1582 and assumed sizeof (void *) size) and kinds. */
1584 void
1585 GOMP_target (int device, void (*fn) (void *), const void *unused,
1586 size_t mapnum, void **hostaddrs, size_t *sizes,
1587 unsigned char *kinds)
1589 struct gomp_device_descr *devicep = resolve_device (device);
1591 void *fn_addr;
1592 if (devicep == NULL
1593 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1594 /* All shared memory devices should use the GOMP_target_ext function. */
1595 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1596 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1597 return gomp_target_fallback (fn, hostaddrs);
1599 struct target_mem_desc *tgt_vars
1600 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1601 GOMP_MAP_VARS_TARGET);
1602 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1603 NULL);
1604 gomp_unmap_vars (tgt_vars, true);
1607 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1608 and several arguments have been added:
1609 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1610 DEPEND is array of dependencies, see GOMP_task for details.
1612 ARGS is a pointer to an array consisting of a variable number of both
1613 device-independent and device-specific arguments, which can take one two
1614 elements where the first specifies for which device it is intended, the type
1615 and optionally also the value. If the value is not present in the first
1616 one, the whole second element the actual value. The last element of the
1617 array is a single NULL. Among the device independent can be for example
1618 NUM_TEAMS and THREAD_LIMIT.
1620 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1621 that value, or 1 if teams construct is not present, or 0, if
1622 teams construct does not have num_teams clause and so the choice is
1623 implementation defined, and -1 if it can't be determined on the host
1624 what value will GOMP_teams have on the device.
1625 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1626 body with that value, or 0, if teams construct does not have thread_limit
1627 clause or the teams construct is not present, or -1 if it can't be
1628 determined on the host what value will GOMP_teams have on the device. */
1630 void
1631 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1632 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1633 unsigned int flags, void **depend, void **args)
1635 struct gomp_device_descr *devicep = resolve_device (device);
1636 size_t tgt_align = 0, tgt_size = 0;
1637 bool fpc_done = false;
1639 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1641 struct gomp_thread *thr = gomp_thread ();
1642 /* Create a team if we don't have any around, as nowait
1643 target tasks make sense to run asynchronously even when
1644 outside of any parallel. */
1645 if (__builtin_expect (thr->ts.team == NULL, 0))
1647 struct gomp_team *team = gomp_new_team (1);
1648 struct gomp_task *task = thr->task;
1649 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1650 team->prev_ts = thr->ts;
1651 thr->ts.team = team;
1652 thr->ts.team_id = 0;
1653 thr->ts.work_share = &team->work_shares[0];
1654 thr->ts.last_work_share = NULL;
1655 #ifdef HAVE_SYNC_BUILTINS
1656 thr->ts.single_count = 0;
1657 #endif
1658 thr->ts.static_trip = 0;
1659 thr->task = &team->implicit_task[0];
1660 gomp_init_task (thr->task, NULL, icv);
1661 if (task)
1663 thr->task = task;
1664 gomp_end_task ();
1665 free (task);
1666 thr->task = &team->implicit_task[0];
1668 else
1669 pthread_setspecific (gomp_thread_destructor, thr);
1671 if (thr->ts.team
1672 && !thr->task->final_task)
1674 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1675 sizes, kinds, flags, depend, args,
1676 GOMP_TARGET_TASK_BEFORE_MAP);
1677 return;
1681 /* If there are depend clauses, but nowait is not present
1682 (or we are in a final task), block the parent task until the
1683 dependencies are resolved and then just continue with the rest
1684 of the function as if it is a merged task. */
1685 if (depend != NULL)
1687 struct gomp_thread *thr = gomp_thread ();
1688 if (thr->task && thr->task->depend_hash)
1690 /* If we might need to wait, copy firstprivate now. */
1691 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1692 &tgt_align, &tgt_size);
1693 if (tgt_align)
1695 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1696 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1697 tgt_align, tgt_size);
1699 fpc_done = true;
1700 gomp_task_maybe_wait_for_dependencies (depend);
1704 void *fn_addr;
1705 if (devicep == NULL
1706 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1707 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1708 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1710 if (!fpc_done)
1712 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1713 &tgt_align, &tgt_size);
1714 if (tgt_align)
1716 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1717 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1718 tgt_align, tgt_size);
1721 gomp_target_fallback (fn, hostaddrs);
1722 return;
1725 struct target_mem_desc *tgt_vars;
1726 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1728 if (!fpc_done)
1730 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1731 &tgt_align, &tgt_size);
1732 if (tgt_align)
1734 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1735 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1736 tgt_align, tgt_size);
1739 tgt_vars = NULL;
1741 else
1742 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1743 true, GOMP_MAP_VARS_TARGET);
1744 devicep->run_func (devicep->target_id, fn_addr,
1745 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1746 args);
1747 if (tgt_vars)
1748 gomp_unmap_vars (tgt_vars, true);
1751 /* Host fallback for GOMP_target_data{,_ext} routines. */
1753 static void
1754 gomp_target_data_fallback (void)
1756 struct gomp_task_icv *icv = gomp_icv (false);
1757 if (icv->target_data)
1759 /* Even when doing a host fallback, if there are any active
1760 #pragma omp target data constructs, need to remember the
1761 new #pragma omp target data, otherwise GOMP_target_end_data
1762 would get out of sync. */
1763 struct target_mem_desc *tgt
1764 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1765 GOMP_MAP_VARS_DATA);
1766 tgt->prev = icv->target_data;
1767 icv->target_data = tgt;
1771 void
1772 GOMP_target_data (int device, const void *unused, size_t mapnum,
1773 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1775 struct gomp_device_descr *devicep = resolve_device (device);
1777 if (devicep == NULL
1778 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1779 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1780 return gomp_target_data_fallback ();
1782 struct target_mem_desc *tgt
1783 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1784 GOMP_MAP_VARS_DATA);
1785 struct gomp_task_icv *icv = gomp_icv (true);
1786 tgt->prev = icv->target_data;
1787 icv->target_data = tgt;
1790 void
1791 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1792 size_t *sizes, unsigned short *kinds)
1794 struct gomp_device_descr *devicep = resolve_device (device);
1796 if (devicep == NULL
1797 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1798 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1799 return gomp_target_data_fallback ();
1801 struct target_mem_desc *tgt
1802 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1803 GOMP_MAP_VARS_DATA);
1804 struct gomp_task_icv *icv = gomp_icv (true);
1805 tgt->prev = icv->target_data;
1806 icv->target_data = tgt;
1809 void
1810 GOMP_target_end_data (void)
1812 struct gomp_task_icv *icv = gomp_icv (false);
1813 if (icv->target_data)
1815 struct target_mem_desc *tgt = icv->target_data;
1816 icv->target_data = tgt->prev;
1817 gomp_unmap_vars (tgt, true);
1821 void
1822 GOMP_target_update (int device, const void *unused, size_t mapnum,
1823 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1825 struct gomp_device_descr *devicep = resolve_device (device);
1827 if (devicep == NULL
1828 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1829 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1830 return;
1832 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1835 void
1836 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1837 size_t *sizes, unsigned short *kinds,
1838 unsigned int flags, void **depend)
1840 struct gomp_device_descr *devicep = resolve_device (device);
1842 /* If there are depend clauses, but nowait is not present,
1843 block the parent task until the dependencies are resolved
1844 and then just continue with the rest of the function as if it
1845 is a merged task. Until we are able to schedule task during
1846 variable mapping or unmapping, ignore nowait if depend clauses
1847 are not present. */
1848 if (depend != NULL)
1850 struct gomp_thread *thr = gomp_thread ();
1851 if (thr->task && thr->task->depend_hash)
1853 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1854 && thr->ts.team
1855 && !thr->task->final_task)
1857 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1858 mapnum, hostaddrs, sizes, kinds,
1859 flags | GOMP_TARGET_FLAG_UPDATE,
1860 depend, NULL, GOMP_TARGET_TASK_DATA))
1861 return;
1863 else
1865 struct gomp_team *team = thr->ts.team;
1866 /* If parallel or taskgroup has been cancelled, don't start new
1867 tasks. */
1868 if (__builtin_expect (gomp_cancel_var, 0) && team)
1870 if (gomp_team_barrier_cancelled (&team->barrier))
1871 return;
1872 if (thr->task->taskgroup)
1874 if (thr->task->taskgroup->cancelled)
1875 return;
1876 if (thr->task->taskgroup->workshare
1877 && thr->task->taskgroup->prev
1878 && thr->task->taskgroup->prev->cancelled)
1879 return;
1883 gomp_task_maybe_wait_for_dependencies (depend);
1888 if (devicep == NULL
1889 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1890 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1891 return;
1893 struct gomp_thread *thr = gomp_thread ();
1894 struct gomp_team *team = thr->ts.team;
1895 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1896 if (__builtin_expect (gomp_cancel_var, 0) && team)
1898 if (gomp_team_barrier_cancelled (&team->barrier))
1899 return;
1900 if (thr->task->taskgroup)
1902 if (thr->task->taskgroup->cancelled)
1903 return;
1904 if (thr->task->taskgroup->workshare
1905 && thr->task->taskgroup->prev
1906 && thr->task->taskgroup->prev->cancelled)
1907 return;
1911 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1914 static void
1915 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1916 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1918 const int typemask = 0xff;
1919 size_t i;
1920 gomp_mutex_lock (&devicep->lock);
1921 if (devicep->state == GOMP_DEVICE_FINALIZED)
1923 gomp_mutex_unlock (&devicep->lock);
1924 return;
1927 for (i = 0; i < mapnum; i++)
1929 struct splay_tree_key_s cur_node;
1930 unsigned char kind = kinds[i] & typemask;
1931 switch (kind)
1933 case GOMP_MAP_FROM:
1934 case GOMP_MAP_ALWAYS_FROM:
1935 case GOMP_MAP_DELETE:
1936 case GOMP_MAP_RELEASE:
1937 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1938 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1939 cur_node.host_start = (uintptr_t) hostaddrs[i];
1940 cur_node.host_end = cur_node.host_start + sizes[i];
1941 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1942 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1943 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1944 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1945 if (!k)
1946 continue;
1948 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1949 k->refcount--;
1950 if ((kind == GOMP_MAP_DELETE
1951 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1952 && k->refcount != REFCOUNT_INFINITY)
1953 k->refcount = 0;
1955 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1956 || kind == GOMP_MAP_ALWAYS_FROM)
1957 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1958 (void *) (k->tgt->tgt_start + k->tgt_offset
1959 + cur_node.host_start
1960 - k->host_start),
1961 cur_node.host_end - cur_node.host_start);
1962 if (k->refcount == 0)
1964 splay_tree_remove (&devicep->mem_map, k);
1965 if (k->link_key)
1966 splay_tree_insert (&devicep->mem_map,
1967 (splay_tree_node) k->link_key);
1968 if (k->tgt->refcount > 1)
1969 k->tgt->refcount--;
1970 else
1971 gomp_unmap_tgt (k->tgt);
1974 break;
1975 default:
1976 gomp_mutex_unlock (&devicep->lock);
1977 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1978 kind);
1982 gomp_mutex_unlock (&devicep->lock);
1985 void
1986 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1987 size_t *sizes, unsigned short *kinds,
1988 unsigned int flags, void **depend)
1990 struct gomp_device_descr *devicep = resolve_device (device);
1992 /* If there are depend clauses, but nowait is not present,
1993 block the parent task until the dependencies are resolved
1994 and then just continue with the rest of the function as if it
1995 is a merged task. Until we are able to schedule task during
1996 variable mapping or unmapping, ignore nowait if depend clauses
1997 are not present. */
1998 if (depend != NULL)
2000 struct gomp_thread *thr = gomp_thread ();
2001 if (thr->task && thr->task->depend_hash)
2003 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2004 && thr->ts.team
2005 && !thr->task->final_task)
2007 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2008 mapnum, hostaddrs, sizes, kinds,
2009 flags, depend, NULL,
2010 GOMP_TARGET_TASK_DATA))
2011 return;
2013 else
2015 struct gomp_team *team = thr->ts.team;
2016 /* If parallel or taskgroup has been cancelled, don't start new
2017 tasks. */
2018 if (__builtin_expect (gomp_cancel_var, 0) && team)
2020 if (gomp_team_barrier_cancelled (&team->barrier))
2021 return;
2022 if (thr->task->taskgroup)
2024 if (thr->task->taskgroup->cancelled)
2025 return;
2026 if (thr->task->taskgroup->workshare
2027 && thr->task->taskgroup->prev
2028 && thr->task->taskgroup->prev->cancelled)
2029 return;
2033 gomp_task_maybe_wait_for_dependencies (depend);
2038 if (devicep == NULL
2039 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2040 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2041 return;
2043 struct gomp_thread *thr = gomp_thread ();
2044 struct gomp_team *team = thr->ts.team;
2045 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2046 if (__builtin_expect (gomp_cancel_var, 0) && team)
2048 if (gomp_team_barrier_cancelled (&team->barrier))
2049 return;
2050 if (thr->task->taskgroup)
2052 if (thr->task->taskgroup->cancelled)
2053 return;
2054 if (thr->task->taskgroup->workshare
2055 && thr->task->taskgroup->prev
2056 && thr->task->taskgroup->prev->cancelled)
2057 return;
2061 size_t i;
2062 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2063 for (i = 0; i < mapnum; i++)
2064 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2066 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2067 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2068 i += sizes[i];
2070 else
2071 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2072 true, GOMP_MAP_VARS_ENTER_DATA);
2073 else
2074 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2077 bool
2078 gomp_target_task_fn (void *data)
2080 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2081 struct gomp_device_descr *devicep = ttask->devicep;
2083 if (ttask->fn != NULL)
2085 void *fn_addr;
2086 if (devicep == NULL
2087 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2088 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2089 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2091 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2092 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2093 return false;
2096 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2098 if (ttask->tgt)
2099 gomp_unmap_vars (ttask->tgt, true);
2100 return false;
2103 void *actual_arguments;
2104 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2106 ttask->tgt = NULL;
2107 actual_arguments = ttask->hostaddrs;
2109 else
2111 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2112 NULL, ttask->sizes, ttask->kinds, true,
2113 GOMP_MAP_VARS_TARGET);
2114 actual_arguments = (void *) ttask->tgt->tgt_start;
2116 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2118 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2119 ttask->args, (void *) ttask);
2120 return true;
2122 else if (devicep == NULL
2123 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2124 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2125 return false;
2127 size_t i;
2128 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2129 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2130 ttask->kinds, true);
2131 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2132 for (i = 0; i < ttask->mapnum; i++)
2133 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2135 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2136 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2137 GOMP_MAP_VARS_ENTER_DATA);
2138 i += ttask->sizes[i];
2140 else
2141 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2142 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2143 else
2144 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2145 ttask->kinds);
2146 return false;
2149 void
2150 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2152 if (thread_limit)
2154 struct gomp_task_icv *icv = gomp_icv (true);
2155 icv->thread_limit_var
2156 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2158 (void) num_teams;
2161 void *
2162 omp_target_alloc (size_t size, int device_num)
2164 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2165 return malloc (size);
2167 if (device_num < 0)
2168 return NULL;
2170 struct gomp_device_descr *devicep = resolve_device (device_num);
2171 if (devicep == NULL)
2172 return NULL;
2174 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2175 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2176 return malloc (size);
2178 gomp_mutex_lock (&devicep->lock);
2179 void *ret = devicep->alloc_func (devicep->target_id, size);
2180 gomp_mutex_unlock (&devicep->lock);
2181 return ret;
2184 void
2185 omp_target_free (void *device_ptr, int device_num)
2187 if (device_ptr == NULL)
2188 return;
2190 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2192 free (device_ptr);
2193 return;
2196 if (device_num < 0)
2197 return;
2199 struct gomp_device_descr *devicep = resolve_device (device_num);
2200 if (devicep == NULL)
2201 return;
2203 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2204 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2206 free (device_ptr);
2207 return;
2210 gomp_mutex_lock (&devicep->lock);
2211 gomp_free_device_memory (devicep, device_ptr);
2212 gomp_mutex_unlock (&devicep->lock);
2216 omp_target_is_present (const void *ptr, int device_num)
2218 if (ptr == NULL)
2219 return 1;
2221 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2222 return 1;
2224 if (device_num < 0)
2225 return 0;
2227 struct gomp_device_descr *devicep = resolve_device (device_num);
2228 if (devicep == NULL)
2229 return 0;
2231 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2232 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2233 return 1;
2235 gomp_mutex_lock (&devicep->lock);
2236 struct splay_tree_s *mem_map = &devicep->mem_map;
2237 struct splay_tree_key_s cur_node;
2239 cur_node.host_start = (uintptr_t) ptr;
2240 cur_node.host_end = cur_node.host_start;
2241 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2242 int ret = n != NULL;
2243 gomp_mutex_unlock (&devicep->lock);
2244 return ret;
2248 omp_target_memcpy (void *dst, const void *src, size_t length,
2249 size_t dst_offset, size_t src_offset, int dst_device_num,
2250 int src_device_num)
2252 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2253 bool ret;
2255 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2257 if (dst_device_num < 0)
2258 return EINVAL;
2260 dst_devicep = resolve_device (dst_device_num);
2261 if (dst_devicep == NULL)
2262 return EINVAL;
2264 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2265 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2266 dst_devicep = NULL;
2268 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2270 if (src_device_num < 0)
2271 return EINVAL;
2273 src_devicep = resolve_device (src_device_num);
2274 if (src_devicep == NULL)
2275 return EINVAL;
2277 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2278 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2279 src_devicep = NULL;
2281 if (src_devicep == NULL && dst_devicep == NULL)
2283 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2284 return 0;
2286 if (src_devicep == NULL)
2288 gomp_mutex_lock (&dst_devicep->lock);
2289 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2290 (char *) dst + dst_offset,
2291 (char *) src + src_offset, length);
2292 gomp_mutex_unlock (&dst_devicep->lock);
2293 return (ret ? 0 : EINVAL);
2295 if (dst_devicep == NULL)
2297 gomp_mutex_lock (&src_devicep->lock);
2298 ret = src_devicep->dev2host_func (src_devicep->target_id,
2299 (char *) dst + dst_offset,
2300 (char *) src + src_offset, length);
2301 gomp_mutex_unlock (&src_devicep->lock);
2302 return (ret ? 0 : EINVAL);
2304 if (src_devicep == dst_devicep)
2306 gomp_mutex_lock (&src_devicep->lock);
2307 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2308 (char *) dst + dst_offset,
2309 (char *) src + src_offset, length);
2310 gomp_mutex_unlock (&src_devicep->lock);
2311 return (ret ? 0 : EINVAL);
2313 return EINVAL;
2316 static int
2317 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2318 int num_dims, const size_t *volume,
2319 const size_t *dst_offsets,
2320 const size_t *src_offsets,
2321 const size_t *dst_dimensions,
2322 const size_t *src_dimensions,
2323 struct gomp_device_descr *dst_devicep,
2324 struct gomp_device_descr *src_devicep)
2326 size_t dst_slice = element_size;
2327 size_t src_slice = element_size;
2328 size_t j, dst_off, src_off, length;
2329 int i, ret;
2331 if (num_dims == 1)
2333 if (__builtin_mul_overflow (element_size, volume[0], &length)
2334 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2335 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2336 return EINVAL;
2337 if (dst_devicep == NULL && src_devicep == NULL)
2339 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2340 length);
2341 ret = 1;
2343 else if (src_devicep == NULL)
2344 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2345 (char *) dst + dst_off,
2346 (const char *) src + src_off,
2347 length);
2348 else if (dst_devicep == NULL)
2349 ret = src_devicep->dev2host_func (src_devicep->target_id,
2350 (char *) dst + dst_off,
2351 (const char *) src + src_off,
2352 length);
2353 else if (src_devicep == dst_devicep)
2354 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2355 (char *) dst + dst_off,
2356 (const char *) src + src_off,
2357 length);
2358 else
2359 ret = 0;
2360 return ret ? 0 : EINVAL;
2363 /* FIXME: it would be nice to have some plugin function to handle
2364 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2365 be handled in the generic recursion below, and for host-host it
2366 should be used even for any num_dims >= 2. */
2368 for (i = 1; i < num_dims; i++)
2369 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2370 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2371 return EINVAL;
2372 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2373 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2374 return EINVAL;
2375 for (j = 0; j < volume[0]; j++)
2377 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2378 (const char *) src + src_off,
2379 element_size, num_dims - 1,
2380 volume + 1, dst_offsets + 1,
2381 src_offsets + 1, dst_dimensions + 1,
2382 src_dimensions + 1, dst_devicep,
2383 src_devicep);
2384 if (ret)
2385 return ret;
2386 dst_off += dst_slice;
2387 src_off += src_slice;
2389 return 0;
2393 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2394 int num_dims, const size_t *volume,
2395 const size_t *dst_offsets,
2396 const size_t *src_offsets,
2397 const size_t *dst_dimensions,
2398 const size_t *src_dimensions,
2399 int dst_device_num, int src_device_num)
2401 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2403 if (!dst && !src)
2404 return INT_MAX;
2406 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2408 if (dst_device_num < 0)
2409 return EINVAL;
2411 dst_devicep = resolve_device (dst_device_num);
2412 if (dst_devicep == NULL)
2413 return EINVAL;
2415 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2416 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2417 dst_devicep = NULL;
2419 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2421 if (src_device_num < 0)
2422 return EINVAL;
2424 src_devicep = resolve_device (src_device_num);
2425 if (src_devicep == NULL)
2426 return EINVAL;
2428 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2429 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2430 src_devicep = NULL;
2433 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2434 return EINVAL;
2436 if (src_devicep)
2437 gomp_mutex_lock (&src_devicep->lock);
2438 else if (dst_devicep)
2439 gomp_mutex_lock (&dst_devicep->lock);
2440 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2441 volume, dst_offsets, src_offsets,
2442 dst_dimensions, src_dimensions,
2443 dst_devicep, src_devicep);
2444 if (src_devicep)
2445 gomp_mutex_unlock (&src_devicep->lock);
2446 else if (dst_devicep)
2447 gomp_mutex_unlock (&dst_devicep->lock);
2448 return ret;
2452 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2453 size_t size, size_t device_offset, int device_num)
2455 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2456 return EINVAL;
2458 if (device_num < 0)
2459 return EINVAL;
2461 struct gomp_device_descr *devicep = resolve_device (device_num);
2462 if (devicep == NULL)
2463 return EINVAL;
2465 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2466 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2467 return EINVAL;
2469 gomp_mutex_lock (&devicep->lock);
2471 struct splay_tree_s *mem_map = &devicep->mem_map;
2472 struct splay_tree_key_s cur_node;
2473 int ret = EINVAL;
2475 cur_node.host_start = (uintptr_t) host_ptr;
2476 cur_node.host_end = cur_node.host_start + size;
2477 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2478 if (n)
2480 if (n->tgt->tgt_start + n->tgt_offset
2481 == (uintptr_t) device_ptr + device_offset
2482 && n->host_start <= cur_node.host_start
2483 && n->host_end >= cur_node.host_end)
2484 ret = 0;
2486 else
2488 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2489 tgt->array = gomp_malloc (sizeof (*tgt->array));
2490 tgt->refcount = 1;
2491 tgt->tgt_start = 0;
2492 tgt->tgt_end = 0;
2493 tgt->to_free = NULL;
2494 tgt->prev = NULL;
2495 tgt->list_count = 0;
2496 tgt->device_descr = devicep;
2497 splay_tree_node array = tgt->array;
2498 splay_tree_key k = &array->key;
2499 k->host_start = cur_node.host_start;
2500 k->host_end = cur_node.host_end;
2501 k->tgt = tgt;
2502 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2503 k->refcount = REFCOUNT_INFINITY;
2504 array->left = NULL;
2505 array->right = NULL;
2506 splay_tree_insert (&devicep->mem_map, array);
2507 ret = 0;
2509 gomp_mutex_unlock (&devicep->lock);
2510 return ret;
2514 omp_target_disassociate_ptr (const void *ptr, int device_num)
2516 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2517 return EINVAL;
2519 if (device_num < 0)
2520 return EINVAL;
2522 struct gomp_device_descr *devicep = resolve_device (device_num);
2523 if (devicep == NULL)
2524 return EINVAL;
2526 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2527 return EINVAL;
2529 gomp_mutex_lock (&devicep->lock);
2531 struct splay_tree_s *mem_map = &devicep->mem_map;
2532 struct splay_tree_key_s cur_node;
2533 int ret = EINVAL;
2535 cur_node.host_start = (uintptr_t) ptr;
2536 cur_node.host_end = cur_node.host_start;
2537 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2538 if (n
2539 && n->host_start == cur_node.host_start
2540 && n->refcount == REFCOUNT_INFINITY
2541 && n->tgt->tgt_start == 0
2542 && n->tgt->to_free == NULL
2543 && n->tgt->refcount == 1
2544 && n->tgt->list_count == 0)
2546 splay_tree_remove (&devicep->mem_map, n);
2547 gomp_unmap_tgt (n->tgt);
2548 ret = 0;
2551 gomp_mutex_unlock (&devicep->lock);
2552 return ret;
2556 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2558 (void) kind;
2559 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2560 return gomp_pause_host ();
2561 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2562 return -1;
2563 /* Do nothing for target devices for now. */
2564 return 0;
2568 omp_pause_resource_all (omp_pause_resource_t kind)
2570 (void) kind;
2571 if (gomp_pause_host ())
2572 return -1;
2573 /* Do nothing for target devices for now. */
2574 return 0;
2577 ialias (omp_pause_resource)
2578 ialias (omp_pause_resource_all)
2580 #ifdef PLUGIN_SUPPORT
2582 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2583 in PLUGIN_NAME.
2584 The handles of the found functions are stored in the corresponding fields
2585 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2587 static bool
2588 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2589 const char *plugin_name)
2591 const char *err = NULL, *last_missing = NULL;
2593 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2594 if (!plugin_handle)
2595 goto dl_fail;
2597 /* Check if all required functions are available in the plugin and store
2598 their handlers. None of the symbols can legitimately be NULL,
2599 so we don't need to check dlerror all the time. */
2600 #define DLSYM(f) \
2601 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2602 goto dl_fail
2603 /* Similar, but missing functions are not an error. Return false if
2604 failed, true otherwise. */
2605 #define DLSYM_OPT(f, n) \
2606 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2607 || (last_missing = #n, 0))
2609 DLSYM (version);
2610 if (device->version_func () != GOMP_VERSION)
2612 err = "plugin version mismatch";
2613 goto fail;
2616 DLSYM (get_name);
2617 DLSYM (get_caps);
2618 DLSYM (get_type);
2619 DLSYM (get_num_devices);
2620 DLSYM (init_device);
2621 DLSYM (fini_device);
2622 DLSYM (load_image);
2623 DLSYM (unload_image);
2624 DLSYM (alloc);
2625 DLSYM (free);
2626 DLSYM (dev2host);
2627 DLSYM (host2dev);
2628 device->capabilities = device->get_caps_func ();
2629 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2631 DLSYM (run);
2632 DLSYM (async_run);
2633 DLSYM_OPT (can_run, can_run);
2634 DLSYM (dev2dev);
2636 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2638 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2639 || !DLSYM_OPT (openacc.register_async_cleanup,
2640 openacc_register_async_cleanup)
2641 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2642 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2643 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2644 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2645 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2646 || !DLSYM_OPT (openacc.async_wait_all_async,
2647 openacc_async_wait_all_async)
2648 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2649 || !DLSYM_OPT (openacc.create_thread_data,
2650 openacc_create_thread_data)
2651 || !DLSYM_OPT (openacc.destroy_thread_data,
2652 openacc_destroy_thread_data))
2654 /* Require all the OpenACC handlers if we have
2655 GOMP_OFFLOAD_CAP_OPENACC_200. */
2656 err = "plugin missing OpenACC handler function";
2657 goto fail;
2660 unsigned cuda = 0;
2661 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2662 openacc_cuda_get_current_device);
2663 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2664 openacc_cuda_get_current_context);
2665 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2666 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2667 if (cuda && cuda != 4)
2669 /* Make sure all the CUDA functions are there if any of them are. */
2670 err = "plugin missing OpenACC CUDA handler function";
2671 goto fail;
2674 #undef DLSYM
2675 #undef DLSYM_OPT
2677 return 1;
2679 dl_fail:
2680 err = dlerror ();
2681 fail:
2682 gomp_error ("while loading %s: %s", plugin_name, err);
2683 if (last_missing)
2684 gomp_error ("missing function was %s", last_missing);
2685 if (plugin_handle)
2686 dlclose (plugin_handle);
2688 return 0;
2691 /* This function finalizes all initialized devices. */
2693 static void
2694 gomp_target_fini (void)
2696 int i;
2697 for (i = 0; i < num_devices; i++)
2699 bool ret = true;
2700 struct gomp_device_descr *devicep = &devices[i];
2701 gomp_mutex_lock (&devicep->lock);
2702 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2704 ret = devicep->fini_device_func (devicep->target_id);
2705 devicep->state = GOMP_DEVICE_FINALIZED;
2707 gomp_mutex_unlock (&devicep->lock);
2708 if (!ret)
2709 gomp_fatal ("device finalization failed");
2713 /* This function initializes the runtime needed for offloading.
2714 It parses the list of offload targets and tries to load the plugins for
2715 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2716 will be set, and the array DEVICES initialized, containing descriptors for
2717 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2718 by the others. */
2720 static void
2721 gomp_target_init (void)
2723 const char *prefix ="libgomp-plugin-";
2724 const char *suffix = SONAME_SUFFIX (1);
2725 const char *cur, *next;
2726 char *plugin_name;
2727 int i, new_num_devices;
2729 num_devices = 0;
2730 devices = NULL;
2732 cur = OFFLOAD_TARGETS;
2733 if (*cur)
2736 struct gomp_device_descr current_device;
2737 size_t prefix_len, suffix_len, cur_len;
2739 next = strchr (cur, ',');
2741 prefix_len = strlen (prefix);
2742 cur_len = next ? next - cur : strlen (cur);
2743 suffix_len = strlen (suffix);
2745 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2746 if (!plugin_name)
2748 num_devices = 0;
2749 break;
2752 memcpy (plugin_name, prefix, prefix_len);
2753 memcpy (plugin_name + prefix_len, cur, cur_len);
2754 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2756 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2758 new_num_devices = current_device.get_num_devices_func ();
2759 if (new_num_devices >= 1)
2761 /* Augment DEVICES and NUM_DEVICES. */
2763 devices = realloc (devices, (num_devices + new_num_devices)
2764 * sizeof (struct gomp_device_descr));
2765 if (!devices)
2767 num_devices = 0;
2768 free (plugin_name);
2769 break;
2772 current_device.name = current_device.get_name_func ();
2773 /* current_device.capabilities has already been set. */
2774 current_device.type = current_device.get_type_func ();
2775 current_device.mem_map.root = NULL;
2776 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2777 current_device.openacc.data_environ = NULL;
2778 for (i = 0; i < new_num_devices; i++)
2780 current_device.target_id = i;
2781 devices[num_devices] = current_device;
2782 gomp_mutex_init (&devices[num_devices].lock);
2783 num_devices++;
2788 free (plugin_name);
2789 cur = next + 1;
2791 while (next);
2793 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2794 NUM_DEVICES_OPENMP. */
2795 struct gomp_device_descr *devices_s
2796 = malloc (num_devices * sizeof (struct gomp_device_descr));
2797 if (!devices_s)
2799 num_devices = 0;
2800 free (devices);
2801 devices = NULL;
2803 num_devices_openmp = 0;
2804 for (i = 0; i < num_devices; i++)
2805 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2806 devices_s[num_devices_openmp++] = devices[i];
2807 int num_devices_after_openmp = num_devices_openmp;
2808 for (i = 0; i < num_devices; i++)
2809 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2810 devices_s[num_devices_after_openmp++] = devices[i];
2811 free (devices);
2812 devices = devices_s;
2814 for (i = 0; i < num_devices; i++)
2816 /* The 'devices' array can be moved (by the realloc call) until we have
2817 found all the plugins, so registering with the OpenACC runtime (which
2818 takes a copy of the pointer argument) must be delayed until now. */
2819 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2820 goacc_register (&devices[i]);
2823 if (atexit (gomp_target_fini) != 0)
2824 gomp_fatal ("atexit failed");
2827 #else /* PLUGIN_SUPPORT */
2828 /* If dlfcn.h is unavailable we always fallback to host execution.
2829 GOMP_target* routines are just stubs for this case. */
2830 static void
2831 gomp_target_init (void)
2834 #endif /* PLUGIN_SUPPORT */