2018-11-11 Richard Biener <rguenther@suse.de>
[official-gcc.git] / libgomp / target.c
blob8ebc2a370a1656d0a3e3e68664f3048ef668ffb1
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
183 struct gomp_coalesce_buf
185 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
186 it will be copied to the device. */
187 void *buf;
188 struct target_mem_desc *tgt;
189 /* Array with offsets, chunks[2 * i] is the starting offset and
190 chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
191 of chunks which are to be copied to buf and later copied to device. */
192 size_t *chunks;
193 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
194 be performed. */
195 long chunk_cnt;
196 /* During construction of chunks array, how many memory regions are within
197 the last chunk. If there is just one memory region for a chunk, we copy
198 it directly to device rather than going through buf. */
199 long use_cnt;
202 /* Maximum size of memory region considered for coalescing. Larger copies
203 are performed directly. */
204 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
206 /* Maximum size of a gap in between regions to consider them being copied
207 within the same chunk. All the device offsets considered are within
208 newly allocated device memory, so it isn't fatal if we copy some padding
209 in between from host to device. The gaps come either from alignment
210 padding or from memory regions which are not supposed to be copied from
211 host to device (e.g. map(alloc:), map(from:) etc.). */
212 #define MAX_COALESCE_BUF_GAP (4 * 1024)
214 /* Add region with device tgt_start relative offset and length to CBUF. */
216 static inline void
217 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
219 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
220 return;
221 if (cbuf->chunk_cnt)
223 if (cbuf->chunk_cnt < 0)
224 return;
225 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
227 cbuf->chunk_cnt = -1;
228 return;
230 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
232 cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
233 cbuf->use_cnt++;
234 return;
236 /* If the last chunk is only used by one mapping, discard it,
237 as it will be one host to device copy anyway and
238 memcpying it around will only waste cycles. */
239 if (cbuf->use_cnt == 1)
240 cbuf->chunk_cnt--;
242 cbuf->chunks[2 * cbuf->chunk_cnt] = start;
243 cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
244 cbuf->chunk_cnt++;
245 cbuf->use_cnt = 1;
248 /* Return true for mapping kinds which need to copy data from the
249 host to device for regions that weren't previously mapped. */
251 static inline bool
252 gomp_to_device_kind_p (int kind)
254 switch (kind)
256 case GOMP_MAP_ALLOC:
257 case GOMP_MAP_FROM:
258 case GOMP_MAP_FORCE_ALLOC:
259 case GOMP_MAP_ALWAYS_FROM:
260 return false;
261 default:
262 return true;
266 static void
267 gomp_copy_host2dev (struct gomp_device_descr *devicep,
268 void *d, const void *h, size_t sz,
269 struct gomp_coalesce_buf *cbuf)
271 if (cbuf)
273 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
274 if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
276 long first = 0;
277 long last = cbuf->chunk_cnt - 1;
278 while (first <= last)
280 long middle = (first + last) >> 1;
281 if (cbuf->chunks[2 * middle + 1] <= doff)
282 first = middle + 1;
283 else if (cbuf->chunks[2 * middle] <= doff)
285 if (doff + sz > cbuf->chunks[2 * middle + 1])
286 gomp_fatal ("internal libgomp cbuf error");
287 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
288 h, sz);
289 return;
291 else
292 last = middle - 1;
296 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
299 static void
300 gomp_copy_dev2host (struct gomp_device_descr *devicep,
301 void *h, const void *d, size_t sz)
303 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
306 static void
307 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
309 if (!devicep->free_func (devicep->target_id, devptr))
311 gomp_mutex_unlock (&devicep->lock);
312 gomp_fatal ("error in freeing device memory block at %p", devptr);
316 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
317 gomp_map_0len_lookup found oldn for newn.
318 Helper function of gomp_map_vars. */
320 static inline void
321 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
322 splay_tree_key newn, struct target_var_desc *tgt_var,
323 unsigned char kind, struct gomp_coalesce_buf *cbuf)
325 tgt_var->key = oldn;
326 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
327 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
328 tgt_var->offset = newn->host_start - oldn->host_start;
329 tgt_var->length = newn->host_end - newn->host_start;
331 if ((kind & GOMP_MAP_FLAG_FORCE)
332 || oldn->host_start > newn->host_start
333 || oldn->host_end < newn->host_end)
335 gomp_mutex_unlock (&devicep->lock);
336 gomp_fatal ("Trying to map into device [%p..%p) object when "
337 "[%p..%p) is already mapped",
338 (void *) newn->host_start, (void *) newn->host_end,
339 (void *) oldn->host_start, (void *) oldn->host_end);
342 if (GOMP_MAP_ALWAYS_TO_P (kind))
343 gomp_copy_host2dev (devicep,
344 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
345 + newn->host_start - oldn->host_start),
346 (void *) newn->host_start,
347 newn->host_end - newn->host_start, cbuf);
349 if (oldn->refcount != REFCOUNT_INFINITY)
350 oldn->refcount++;
353 static int
354 get_kind (bool short_mapkind, void *kinds, int idx)
356 return short_mapkind ? ((unsigned short *) kinds)[idx]
357 : ((unsigned char *) kinds)[idx];
360 static void
361 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
362 uintptr_t target_offset, uintptr_t bias,
363 struct gomp_coalesce_buf *cbuf)
365 struct gomp_device_descr *devicep = tgt->device_descr;
366 struct splay_tree_s *mem_map = &devicep->mem_map;
367 struct splay_tree_key_s cur_node;
369 cur_node.host_start = host_ptr;
370 if (cur_node.host_start == (uintptr_t) NULL)
372 cur_node.tgt_offset = (uintptr_t) NULL;
373 gomp_copy_host2dev (devicep,
374 (void *) (tgt->tgt_start + target_offset),
375 (void *) &cur_node.tgt_offset,
376 sizeof (void *), cbuf);
377 return;
379 /* Add bias to the pointer value. */
380 cur_node.host_start += bias;
381 cur_node.host_end = cur_node.host_start;
382 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
383 if (n == NULL)
385 gomp_mutex_unlock (&devicep->lock);
386 gomp_fatal ("Pointer target of array section wasn't mapped");
388 cur_node.host_start -= n->host_start;
389 cur_node.tgt_offset
390 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
391 /* At this point tgt_offset is target address of the
392 array section. Now subtract bias to get what we want
393 to initialize the pointer with. */
394 cur_node.tgt_offset -= bias;
395 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
396 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
399 static void
400 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
401 size_t first, size_t i, void **hostaddrs,
402 size_t *sizes, void *kinds,
403 struct gomp_coalesce_buf *cbuf)
405 struct gomp_device_descr *devicep = tgt->device_descr;
406 struct splay_tree_s *mem_map = &devicep->mem_map;
407 struct splay_tree_key_s cur_node;
408 int kind;
409 const bool short_mapkind = true;
410 const int typemask = short_mapkind ? 0xff : 0x7;
412 cur_node.host_start = (uintptr_t) hostaddrs[i];
413 cur_node.host_end = cur_node.host_start + sizes[i];
414 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
415 kind = get_kind (short_mapkind, kinds, i);
416 if (n2
417 && n2->tgt == n->tgt
418 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
420 gomp_map_vars_existing (devicep, n2, &cur_node,
421 &tgt->list[i], kind & typemask, cbuf);
422 return;
424 if (sizes[i] == 0)
426 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
428 cur_node.host_start--;
429 n2 = splay_tree_lookup (mem_map, &cur_node);
430 cur_node.host_start++;
431 if (n2
432 && n2->tgt == n->tgt
433 && n2->host_start - n->host_start
434 == n2->tgt_offset - n->tgt_offset)
436 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
437 kind & typemask, cbuf);
438 return;
441 cur_node.host_end++;
442 n2 = splay_tree_lookup (mem_map, &cur_node);
443 cur_node.host_end--;
444 if (n2
445 && n2->tgt == n->tgt
446 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
448 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
449 kind & typemask, cbuf);
450 return;
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
455 "other mapped elements from the same structure weren't mapped "
456 "together with it", (void *) cur_node.host_start,
457 (void *) cur_node.host_end);
460 static inline uintptr_t
461 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
463 if (tgt->list[i].key != NULL)
464 return tgt->list[i].key->tgt->tgt_start
465 + tgt->list[i].key->tgt_offset
466 + tgt->list[i].offset;
467 if (tgt->list[i].offset == ~(uintptr_t) 0)
468 return (uintptr_t) hostaddrs[i];
469 if (tgt->list[i].offset == ~(uintptr_t) 1)
470 return 0;
471 if (tgt->list[i].offset == ~(uintptr_t) 2)
472 return tgt->list[i + 1].key->tgt->tgt_start
473 + tgt->list[i + 1].key->tgt_offset
474 + tgt->list[i + 1].offset
475 + (uintptr_t) hostaddrs[i]
476 - (uintptr_t) hostaddrs[i + 1];
477 return tgt->tgt_start + tgt->list[i].offset;
480 attribute_hidden struct target_mem_desc *
481 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
482 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
483 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
485 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
486 bool has_firstprivate = false;
487 const int rshift = short_mapkind ? 8 : 3;
488 const int typemask = short_mapkind ? 0xff : 0x7;
489 struct splay_tree_s *mem_map = &devicep->mem_map;
490 struct splay_tree_key_s cur_node;
491 struct target_mem_desc *tgt
492 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
493 tgt->list_count = mapnum;
494 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
495 tgt->device_descr = devicep;
496 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
498 if (mapnum == 0)
500 tgt->tgt_start = 0;
501 tgt->tgt_end = 0;
502 return tgt;
505 tgt_align = sizeof (void *);
506 tgt_size = 0;
507 cbuf.chunks = NULL;
508 cbuf.chunk_cnt = -1;
509 cbuf.use_cnt = 0;
510 cbuf.buf = NULL;
511 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
513 cbuf.chunks
514 = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
515 cbuf.chunk_cnt = 0;
517 if (pragma_kind == GOMP_MAP_VARS_TARGET)
519 size_t align = 4 * sizeof (void *);
520 tgt_align = align;
521 tgt_size = mapnum * sizeof (void *);
522 cbuf.chunk_cnt = 1;
523 cbuf.use_cnt = 1 + (mapnum > 1);
524 cbuf.chunks[0] = 0;
525 cbuf.chunks[1] = tgt_size;
528 gomp_mutex_lock (&devicep->lock);
529 if (devicep->state == GOMP_DEVICE_FINALIZED)
531 gomp_mutex_unlock (&devicep->lock);
532 free (tgt);
533 return NULL;
536 for (i = 0; i < mapnum; i++)
538 int kind = get_kind (short_mapkind, kinds, i);
539 if (hostaddrs[i] == NULL
540 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
542 tgt->list[i].key = NULL;
543 tgt->list[i].offset = ~(uintptr_t) 0;
544 continue;
546 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
548 cur_node.host_start = (uintptr_t) hostaddrs[i];
549 cur_node.host_end = cur_node.host_start;
550 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
551 if (n == NULL)
553 gomp_mutex_unlock (&devicep->lock);
554 gomp_fatal ("use_device_ptr pointer wasn't mapped");
556 cur_node.host_start -= n->host_start;
557 hostaddrs[i]
558 = (void *) (n->tgt->tgt_start + n->tgt_offset
559 + cur_node.host_start);
560 tgt->list[i].key = NULL;
561 tgt->list[i].offset = ~(uintptr_t) 0;
562 continue;
564 else if ((kind & typemask) == GOMP_MAP_STRUCT)
566 size_t first = i + 1;
567 size_t last = i + sizes[i];
568 cur_node.host_start = (uintptr_t) hostaddrs[i];
569 cur_node.host_end = (uintptr_t) hostaddrs[last]
570 + sizes[last];
571 tgt->list[i].key = NULL;
572 tgt->list[i].offset = ~(uintptr_t) 2;
573 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
574 if (n == NULL)
576 size_t align = (size_t) 1 << (kind >> rshift);
577 if (tgt_align < align)
578 tgt_align = align;
579 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
580 tgt_size = (tgt_size + align - 1) & ~(align - 1);
581 tgt_size += cur_node.host_end - cur_node.host_start;
582 not_found_cnt += last - i;
583 for (i = first; i <= last; i++)
585 tgt->list[i].key = NULL;
586 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
587 & typemask))
588 gomp_coalesce_buf_add (&cbuf,
589 tgt_size - cur_node.host_end
590 + (uintptr_t) hostaddrs[i],
591 sizes[i]);
593 i--;
594 continue;
596 for (i = first; i <= last; i++)
597 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
598 sizes, kinds, NULL);
599 i--;
600 continue;
602 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
604 tgt->list[i].key = NULL;
605 tgt->list[i].offset = ~(uintptr_t) 1;
606 has_firstprivate = true;
607 continue;
609 cur_node.host_start = (uintptr_t) hostaddrs[i];
610 if (!GOMP_MAP_POINTER_P (kind & typemask))
611 cur_node.host_end = cur_node.host_start + sizes[i];
612 else
613 cur_node.host_end = cur_node.host_start + sizeof (void *);
614 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
616 tgt->list[i].key = NULL;
618 size_t align = (size_t) 1 << (kind >> rshift);
619 if (tgt_align < align)
620 tgt_align = align;
621 tgt_size = (tgt_size + align - 1) & ~(align - 1);
622 gomp_coalesce_buf_add (&cbuf, tgt_size,
623 cur_node.host_end - cur_node.host_start);
624 tgt_size += cur_node.host_end - cur_node.host_start;
625 has_firstprivate = true;
626 continue;
628 splay_tree_key n;
629 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
631 n = gomp_map_0len_lookup (mem_map, &cur_node);
632 if (!n)
634 tgt->list[i].key = NULL;
635 tgt->list[i].offset = ~(uintptr_t) 1;
636 continue;
639 else
640 n = splay_tree_lookup (mem_map, &cur_node);
641 if (n && n->refcount != REFCOUNT_LINK)
642 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
643 kind & typemask, NULL);
644 else
646 tgt->list[i].key = NULL;
648 size_t align = (size_t) 1 << (kind >> rshift);
649 not_found_cnt++;
650 if (tgt_align < align)
651 tgt_align = align;
652 tgt_size = (tgt_size + align - 1) & ~(align - 1);
653 if (gomp_to_device_kind_p (kind & typemask))
654 gomp_coalesce_buf_add (&cbuf, tgt_size,
655 cur_node.host_end - cur_node.host_start);
656 tgt_size += cur_node.host_end - cur_node.host_start;
657 if ((kind & typemask) == GOMP_MAP_TO_PSET)
659 size_t j;
660 for (j = i + 1; j < mapnum; j++)
661 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
662 & typemask))
663 break;
664 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
665 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
666 > cur_node.host_end))
667 break;
668 else
670 tgt->list[j].key = NULL;
671 i++;
677 if (devaddrs)
679 if (mapnum != 1)
681 gomp_mutex_unlock (&devicep->lock);
682 gomp_fatal ("unexpected aggregation");
684 tgt->to_free = devaddrs[0];
685 tgt->tgt_start = (uintptr_t) tgt->to_free;
686 tgt->tgt_end = tgt->tgt_start + sizes[0];
688 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
690 /* Allocate tgt_align aligned tgt_size block of memory. */
691 /* FIXME: Perhaps change interface to allocate properly aligned
692 memory. */
693 tgt->to_free = devicep->alloc_func (devicep->target_id,
694 tgt_size + tgt_align - 1);
695 if (!tgt->to_free)
697 gomp_mutex_unlock (&devicep->lock);
698 gomp_fatal ("device memory allocation fail");
701 tgt->tgt_start = (uintptr_t) tgt->to_free;
702 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
703 tgt->tgt_end = tgt->tgt_start + tgt_size;
705 if (cbuf.use_cnt == 1)
706 cbuf.chunk_cnt--;
707 if (cbuf.chunk_cnt > 0)
709 cbuf.buf
710 = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
711 if (cbuf.buf)
713 cbuf.tgt = tgt;
714 cbufp = &cbuf;
718 else
720 tgt->to_free = NULL;
721 tgt->tgt_start = 0;
722 tgt->tgt_end = 0;
725 tgt_size = 0;
726 if (pragma_kind == GOMP_MAP_VARS_TARGET)
727 tgt_size = mapnum * sizeof (void *);
729 tgt->array = NULL;
730 if (not_found_cnt || has_firstprivate)
732 if (not_found_cnt)
733 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
734 splay_tree_node array = tgt->array;
735 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
736 uintptr_t field_tgt_base = 0;
738 for (i = 0; i < mapnum; i++)
739 if (tgt->list[i].key == NULL)
741 int kind = get_kind (short_mapkind, kinds, i);
742 if (hostaddrs[i] == NULL)
743 continue;
744 switch (kind & typemask)
746 size_t align, len, first, last;
747 splay_tree_key n;
748 case GOMP_MAP_FIRSTPRIVATE:
749 align = (size_t) 1 << (kind >> rshift);
750 tgt_size = (tgt_size + align - 1) & ~(align - 1);
751 tgt->list[i].offset = tgt_size;
752 len = sizes[i];
753 gomp_copy_host2dev (devicep,
754 (void *) (tgt->tgt_start + tgt_size),
755 (void *) hostaddrs[i], len, cbufp);
756 tgt_size += len;
757 continue;
758 case GOMP_MAP_FIRSTPRIVATE_INT:
759 case GOMP_MAP_USE_DEVICE_PTR:
760 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
761 continue;
762 case GOMP_MAP_STRUCT:
763 first = i + 1;
764 last = i + sizes[i];
765 cur_node.host_start = (uintptr_t) hostaddrs[i];
766 cur_node.host_end = (uintptr_t) hostaddrs[last]
767 + sizes[last];
768 if (tgt->list[first].key != NULL)
769 continue;
770 n = splay_tree_lookup (mem_map, &cur_node);
771 if (n == NULL)
773 size_t align = (size_t) 1 << (kind >> rshift);
774 tgt_size -= (uintptr_t) hostaddrs[first]
775 - (uintptr_t) hostaddrs[i];
776 tgt_size = (tgt_size + align - 1) & ~(align - 1);
777 tgt_size += (uintptr_t) hostaddrs[first]
778 - (uintptr_t) hostaddrs[i];
779 field_tgt_base = (uintptr_t) hostaddrs[first];
780 field_tgt_offset = tgt_size;
781 field_tgt_clear = last;
782 tgt_size += cur_node.host_end
783 - (uintptr_t) hostaddrs[first];
784 continue;
786 for (i = first; i <= last; i++)
787 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
788 sizes, kinds, cbufp);
789 i--;
790 continue;
791 case GOMP_MAP_ALWAYS_POINTER:
792 cur_node.host_start = (uintptr_t) hostaddrs[i];
793 cur_node.host_end = cur_node.host_start + sizeof (void *);
794 n = splay_tree_lookup (mem_map, &cur_node);
795 if (n == NULL
796 || n->host_start > cur_node.host_start
797 || n->host_end < cur_node.host_end)
799 gomp_mutex_unlock (&devicep->lock);
800 gomp_fatal ("always pointer not mapped");
802 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
803 != GOMP_MAP_ALWAYS_POINTER)
804 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
805 if (cur_node.tgt_offset)
806 cur_node.tgt_offset -= sizes[i];
807 gomp_copy_host2dev (devicep,
808 (void *) (n->tgt->tgt_start
809 + n->tgt_offset
810 + cur_node.host_start
811 - n->host_start),
812 (void *) &cur_node.tgt_offset,
813 sizeof (void *), cbufp);
814 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
815 + cur_node.host_start - n->host_start;
816 continue;
817 default:
818 break;
820 splay_tree_key k = &array->key;
821 k->host_start = (uintptr_t) hostaddrs[i];
822 if (!GOMP_MAP_POINTER_P (kind & typemask))
823 k->host_end = k->host_start + sizes[i];
824 else
825 k->host_end = k->host_start + sizeof (void *);
826 splay_tree_key n = splay_tree_lookup (mem_map, k);
827 if (n && n->refcount != REFCOUNT_LINK)
828 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
829 kind & typemask, cbufp);
830 else
832 k->link_key = NULL;
833 if (n && n->refcount == REFCOUNT_LINK)
835 /* Replace target address of the pointer with target address
836 of mapped object in the splay tree. */
837 splay_tree_remove (mem_map, n);
838 k->link_key = n;
840 size_t align = (size_t) 1 << (kind >> rshift);
841 tgt->list[i].key = k;
842 k->tgt = tgt;
843 if (field_tgt_clear != ~(size_t) 0)
845 k->tgt_offset = k->host_start - field_tgt_base
846 + field_tgt_offset;
847 if (i == field_tgt_clear)
848 field_tgt_clear = ~(size_t) 0;
850 else
852 tgt_size = (tgt_size + align - 1) & ~(align - 1);
853 k->tgt_offset = tgt_size;
854 tgt_size += k->host_end - k->host_start;
856 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
857 tgt->list[i].always_copy_from
858 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
859 tgt->list[i].offset = 0;
860 tgt->list[i].length = k->host_end - k->host_start;
861 k->refcount = 1;
862 k->dynamic_refcount = 0;
863 tgt->refcount++;
864 array->left = NULL;
865 array->right = NULL;
866 splay_tree_insert (mem_map, array);
867 switch (kind & typemask)
869 case GOMP_MAP_ALLOC:
870 case GOMP_MAP_FROM:
871 case GOMP_MAP_FORCE_ALLOC:
872 case GOMP_MAP_FORCE_FROM:
873 case GOMP_MAP_ALWAYS_FROM:
874 break;
875 case GOMP_MAP_TO:
876 case GOMP_MAP_TOFROM:
877 case GOMP_MAP_FORCE_TO:
878 case GOMP_MAP_FORCE_TOFROM:
879 case GOMP_MAP_ALWAYS_TO:
880 case GOMP_MAP_ALWAYS_TOFROM:
881 gomp_copy_host2dev (devicep,
882 (void *) (tgt->tgt_start
883 + k->tgt_offset),
884 (void *) k->host_start,
885 k->host_end - k->host_start, cbufp);
886 break;
887 case GOMP_MAP_POINTER:
888 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
889 k->tgt_offset, sizes[i], cbufp);
890 break;
891 case GOMP_MAP_TO_PSET:
892 gomp_copy_host2dev (devicep,
893 (void *) (tgt->tgt_start
894 + k->tgt_offset),
895 (void *) k->host_start,
896 k->host_end - k->host_start, cbufp);
898 for (j = i + 1; j < mapnum; j++)
899 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
901 & typemask))
902 break;
903 else if ((uintptr_t) hostaddrs[j] < k->host_start
904 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
905 > k->host_end))
906 break;
907 else
909 tgt->list[j].key = k;
910 tgt->list[j].copy_from = false;
911 tgt->list[j].always_copy_from = false;
912 if (k->refcount != REFCOUNT_INFINITY)
913 k->refcount++;
914 gomp_map_pointer (tgt,
915 (uintptr_t) *(void **) hostaddrs[j],
916 k->tgt_offset
917 + ((uintptr_t) hostaddrs[j]
918 - k->host_start),
919 sizes[j], cbufp);
920 i++;
922 break;
923 case GOMP_MAP_FORCE_PRESENT:
925 /* We already looked up the memory region above and it
926 was missing. */
927 size_t size = k->host_end - k->host_start;
928 gomp_mutex_unlock (&devicep->lock);
929 #ifdef HAVE_INTTYPES_H
930 gomp_fatal ("present clause: !acc_is_present (%p, "
931 "%"PRIu64" (0x%"PRIx64"))",
932 (void *) k->host_start,
933 (uint64_t) size, (uint64_t) size);
934 #else
935 gomp_fatal ("present clause: !acc_is_present (%p, "
936 "%lu (0x%lx))", (void *) k->host_start,
937 (unsigned long) size, (unsigned long) size);
938 #endif
940 break;
941 case GOMP_MAP_FORCE_DEVICEPTR:
942 assert (k->host_end - k->host_start == sizeof (void *));
943 gomp_copy_host2dev (devicep,
944 (void *) (tgt->tgt_start
945 + k->tgt_offset),
946 (void *) k->host_start,
947 sizeof (void *), cbufp);
948 break;
949 default:
950 gomp_mutex_unlock (&devicep->lock);
951 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
952 kind);
955 if (k->link_key)
957 /* Set link pointer on target to the device address of the
958 mapped object. */
959 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
960 devicep->host2dev_func (devicep->target_id,
961 (void *) n->tgt_offset,
962 &tgt_addr, sizeof (void *));
964 array++;
969 if (pragma_kind == GOMP_MAP_VARS_TARGET)
971 for (i = 0; i < mapnum; i++)
973 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
974 gomp_copy_host2dev (devicep,
975 (void *) (tgt->tgt_start + i * sizeof (void *)),
976 (void *) &cur_node.tgt_offset, sizeof (void *),
977 cbufp);
981 if (cbufp)
983 long c = 0;
984 for (c = 0; c < cbuf.chunk_cnt; ++c)
985 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
986 (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
987 cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
988 free (cbuf.buf);
991 /* If the variable from "omp target enter data" map-list was already mapped,
992 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
993 gomp_exit_data. */
994 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
996 free (tgt);
997 tgt = NULL;
1000 gomp_mutex_unlock (&devicep->lock);
1001 return tgt;
1004 static void
1005 gomp_unmap_tgt (struct target_mem_desc *tgt)
1007 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1008 if (tgt->tgt_end)
1009 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1011 free (tgt->array);
1012 free (tgt);
1015 attribute_hidden bool
1016 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1018 bool is_tgt_unmapped = false;
1019 splay_tree_remove (&devicep->mem_map, k);
1020 if (k->link_key)
1021 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1022 if (k->tgt->refcount > 1)
1023 k->tgt->refcount--;
1024 else
1026 is_tgt_unmapped = true;
1027 gomp_unmap_tgt (k->tgt);
1029 return is_tgt_unmapped;
1032 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1033 variables back from device to host: if it is false, it is assumed that this
1034 has been done already. */
1036 attribute_hidden void
1037 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1039 struct gomp_device_descr *devicep = tgt->device_descr;
1041 if (tgt->list_count == 0)
1043 free (tgt);
1044 return;
1047 gomp_mutex_lock (&devicep->lock);
1048 if (devicep->state == GOMP_DEVICE_FINALIZED)
1050 gomp_mutex_unlock (&devicep->lock);
1051 free (tgt->array);
1052 free (tgt);
1053 return;
1056 size_t i;
1057 for (i = 0; i < tgt->list_count; i++)
1059 splay_tree_key k = tgt->list[i].key;
1060 if (k == NULL)
1061 continue;
1063 bool do_unmap = false;
1064 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1065 k->refcount--;
1066 else if (k->refcount == 1)
1068 k->refcount--;
1069 do_unmap = true;
1072 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1073 || tgt->list[i].always_copy_from)
1074 gomp_copy_dev2host (devicep,
1075 (void *) (k->host_start + tgt->list[i].offset),
1076 (void *) (k->tgt->tgt_start + k->tgt_offset
1077 + tgt->list[i].offset),
1078 tgt->list[i].length);
1079 if (do_unmap)
1080 gomp_remove_var (devicep, k);
1083 if (tgt->refcount > 1)
1084 tgt->refcount--;
1085 else
1086 gomp_unmap_tgt (tgt);
1088 gomp_mutex_unlock (&devicep->lock);
1091 static void
1092 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1093 size_t *sizes, void *kinds, bool short_mapkind)
1095 size_t i;
1096 struct splay_tree_key_s cur_node;
1097 const int typemask = short_mapkind ? 0xff : 0x7;
1099 if (!devicep)
1100 return;
1102 if (mapnum == 0)
1103 return;
1105 gomp_mutex_lock (&devicep->lock);
1106 if (devicep->state == GOMP_DEVICE_FINALIZED)
1108 gomp_mutex_unlock (&devicep->lock);
1109 return;
1112 for (i = 0; i < mapnum; i++)
1113 if (sizes[i])
1115 cur_node.host_start = (uintptr_t) hostaddrs[i];
1116 cur_node.host_end = cur_node.host_start + sizes[i];
1117 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1118 if (n)
1120 int kind = get_kind (short_mapkind, kinds, i);
1121 if (n->host_start > cur_node.host_start
1122 || n->host_end < cur_node.host_end)
1124 gomp_mutex_unlock (&devicep->lock);
1125 gomp_fatal ("Trying to update [%p..%p) object when "
1126 "only [%p..%p) is mapped",
1127 (void *) cur_node.host_start,
1128 (void *) cur_node.host_end,
1129 (void *) n->host_start,
1130 (void *) n->host_end);
1134 void *hostaddr = (void *) cur_node.host_start;
1135 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1136 + cur_node.host_start - n->host_start);
1137 size_t size = cur_node.host_end - cur_node.host_start;
1139 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1140 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1141 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1142 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1145 gomp_mutex_unlock (&devicep->lock);
1148 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1149 And insert to splay tree the mapping between addresses from HOST_TABLE and
1150 from loaded target image. We rely in the host and device compiler
1151 emitting variable and functions in the same order. */
1153 static void
1154 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1155 const void *host_table, const void *target_data,
1156 bool is_register_lock)
1158 void **host_func_table = ((void ***) host_table)[0];
1159 void **host_funcs_end = ((void ***) host_table)[1];
1160 void **host_var_table = ((void ***) host_table)[2];
1161 void **host_vars_end = ((void ***) host_table)[3];
1163 /* The func table contains only addresses, the var table contains addresses
1164 and corresponding sizes. */
1165 int num_funcs = host_funcs_end - host_func_table;
1166 int num_vars = (host_vars_end - host_var_table) / 2;
1168 /* Load image to device and get target addresses for the image. */
1169 struct addr_pair *target_table = NULL;
1170 int i, num_target_entries;
1172 num_target_entries
1173 = devicep->load_image_func (devicep->target_id, version,
1174 target_data, &target_table);
1176 if (num_target_entries != num_funcs + num_vars)
1178 gomp_mutex_unlock (&devicep->lock);
1179 if (is_register_lock)
1180 gomp_mutex_unlock (&register_lock);
1181 gomp_fatal ("Cannot map target functions or variables"
1182 " (expected %u, have %u)", num_funcs + num_vars,
1183 num_target_entries);
1186 /* Insert host-target address mapping into splay tree. */
1187 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1188 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1189 tgt->refcount = REFCOUNT_INFINITY;
1190 tgt->tgt_start = 0;
1191 tgt->tgt_end = 0;
1192 tgt->to_free = NULL;
1193 tgt->prev = NULL;
1194 tgt->list_count = 0;
1195 tgt->device_descr = devicep;
1196 splay_tree_node array = tgt->array;
1198 for (i = 0; i < num_funcs; i++)
1200 splay_tree_key k = &array->key;
1201 k->host_start = (uintptr_t) host_func_table[i];
1202 k->host_end = k->host_start + 1;
1203 k->tgt = tgt;
1204 k->tgt_offset = target_table[i].start;
1205 k->refcount = REFCOUNT_INFINITY;
1206 k->link_key = NULL;
1207 array->left = NULL;
1208 array->right = NULL;
1209 splay_tree_insert (&devicep->mem_map, array);
1210 array++;
1213 /* Most significant bit of the size in host and target tables marks
1214 "omp declare target link" variables. */
1215 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1216 const uintptr_t size_mask = ~link_bit;
1218 for (i = 0; i < num_vars; i++)
1220 struct addr_pair *target_var = &target_table[num_funcs + i];
1221 uintptr_t target_size = target_var->end - target_var->start;
1223 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1225 gomp_mutex_unlock (&devicep->lock);
1226 if (is_register_lock)
1227 gomp_mutex_unlock (&register_lock);
1228 gomp_fatal ("Cannot map target variables (size mismatch)");
1231 splay_tree_key k = &array->key;
1232 k->host_start = (uintptr_t) host_var_table[i * 2];
1233 k->host_end
1234 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1235 k->tgt = tgt;
1236 k->tgt_offset = target_var->start;
1237 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1238 k->link_key = NULL;
1239 array->left = NULL;
1240 array->right = NULL;
1241 splay_tree_insert (&devicep->mem_map, array);
1242 array++;
1245 free (target_table);
1248 /* Unload the mappings described by target_data from device DEVICE_P.
1249 The device must be locked. */
1251 static void
1252 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1253 unsigned version,
1254 const void *host_table, const void *target_data)
1256 void **host_func_table = ((void ***) host_table)[0];
1257 void **host_funcs_end = ((void ***) host_table)[1];
1258 void **host_var_table = ((void ***) host_table)[2];
1259 void **host_vars_end = ((void ***) host_table)[3];
1261 /* The func table contains only addresses, the var table contains addresses
1262 and corresponding sizes. */
1263 int num_funcs = host_funcs_end - host_func_table;
1264 int num_vars = (host_vars_end - host_var_table) / 2;
1266 struct splay_tree_key_s k;
1267 splay_tree_key node = NULL;
1269 /* Find mapping at start of node array */
1270 if (num_funcs || num_vars)
1272 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1273 : (uintptr_t) host_var_table[0]);
1274 k.host_end = k.host_start + 1;
1275 node = splay_tree_lookup (&devicep->mem_map, &k);
1278 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1280 gomp_mutex_unlock (&devicep->lock);
1281 gomp_fatal ("image unload fail");
1284 /* Remove mappings from splay tree. */
1285 int i;
1286 for (i = 0; i < num_funcs; i++)
1288 k.host_start = (uintptr_t) host_func_table[i];
1289 k.host_end = k.host_start + 1;
1290 splay_tree_remove (&devicep->mem_map, &k);
1293 /* Most significant bit of the size in host and target tables marks
1294 "omp declare target link" variables. */
1295 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1296 const uintptr_t size_mask = ~link_bit;
1297 bool is_tgt_unmapped = false;
1299 for (i = 0; i < num_vars; i++)
1301 k.host_start = (uintptr_t) host_var_table[i * 2];
1302 k.host_end
1303 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1305 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1306 splay_tree_remove (&devicep->mem_map, &k);
1307 else
1309 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1310 is_tgt_unmapped = gomp_remove_var (devicep, n);
1314 if (node && !is_tgt_unmapped)
1316 free (node->tgt);
1317 free (node);
1321 /* This function should be called from every offload image while loading.
1322 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1323 the target, and TARGET_DATA needed by target plugin. */
1325 void
1326 GOMP_offload_register_ver (unsigned version, const void *host_table,
1327 int target_type, const void *target_data)
1329 int i;
1331 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1332 gomp_fatal ("Library too old for offload (version %u < %u)",
1333 GOMP_VERSION, GOMP_VERSION_LIB (version));
1335 gomp_mutex_lock (&register_lock);
1337 /* Load image to all initialized devices. */
1338 for (i = 0; i < num_devices; i++)
1340 struct gomp_device_descr *devicep = &devices[i];
1341 gomp_mutex_lock (&devicep->lock);
1342 if (devicep->type == target_type
1343 && devicep->state == GOMP_DEVICE_INITIALIZED)
1344 gomp_load_image_to_device (devicep, version,
1345 host_table, target_data, true);
1346 gomp_mutex_unlock (&devicep->lock);
1349 /* Insert image to array of pending images. */
1350 offload_images
1351 = gomp_realloc_unlock (offload_images,
1352 (num_offload_images + 1)
1353 * sizeof (struct offload_image_descr));
1354 offload_images[num_offload_images].version = version;
1355 offload_images[num_offload_images].type = target_type;
1356 offload_images[num_offload_images].host_table = host_table;
1357 offload_images[num_offload_images].target_data = target_data;
1359 num_offload_images++;
1360 gomp_mutex_unlock (&register_lock);
1363 void
1364 GOMP_offload_register (const void *host_table, int target_type,
1365 const void *target_data)
1367 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1370 /* This function should be called from every offload image while unloading.
1371 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1372 the target, and TARGET_DATA needed by target plugin. */
1374 void
1375 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1376 int target_type, const void *target_data)
1378 int i;
1380 gomp_mutex_lock (&register_lock);
1382 /* Unload image from all initialized devices. */
1383 for (i = 0; i < num_devices; i++)
1385 struct gomp_device_descr *devicep = &devices[i];
1386 gomp_mutex_lock (&devicep->lock);
1387 if (devicep->type == target_type
1388 && devicep->state == GOMP_DEVICE_INITIALIZED)
1389 gomp_unload_image_from_device (devicep, version,
1390 host_table, target_data);
1391 gomp_mutex_unlock (&devicep->lock);
1394 /* Remove image from array of pending images. */
1395 for (i = 0; i < num_offload_images; i++)
1396 if (offload_images[i].target_data == target_data)
1398 offload_images[i] = offload_images[--num_offload_images];
1399 break;
1402 gomp_mutex_unlock (&register_lock);
1405 void
1406 GOMP_offload_unregister (const void *host_table, int target_type,
1407 const void *target_data)
1409 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1412 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1413 must be locked on entry, and remains locked on return. */
1415 attribute_hidden void
1416 gomp_init_device (struct gomp_device_descr *devicep)
1418 int i;
1419 if (!devicep->init_device_func (devicep->target_id))
1421 gomp_mutex_unlock (&devicep->lock);
1422 gomp_fatal ("device initialization failed");
1425 /* Load to device all images registered by the moment. */
1426 for (i = 0; i < num_offload_images; i++)
1428 struct offload_image_descr *image = &offload_images[i];
1429 if (image->type == devicep->type)
1430 gomp_load_image_to_device (devicep, image->version,
1431 image->host_table, image->target_data,
1432 false);
1435 devicep->state = GOMP_DEVICE_INITIALIZED;
1438 attribute_hidden void
1439 gomp_unload_device (struct gomp_device_descr *devicep)
1441 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1443 unsigned i;
1445 /* Unload from device all images registered at the moment. */
1446 for (i = 0; i < num_offload_images; i++)
1448 struct offload_image_descr *image = &offload_images[i];
1449 if (image->type == devicep->type)
1450 gomp_unload_image_from_device (devicep, image->version,
1451 image->host_table,
1452 image->target_data);
1457 /* Free address mapping tables. MM must be locked on entry, and remains locked
1458 on return. */
1460 attribute_hidden void
1461 gomp_free_memmap (struct splay_tree_s *mem_map)
1463 while (mem_map->root)
1465 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1467 splay_tree_remove (mem_map, &mem_map->root->key);
1468 free (tgt->array);
1469 free (tgt);
1473 /* Host fallback for GOMP_target{,_ext} routines. */
1475 static void
1476 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1478 struct gomp_thread old_thr, *thr = gomp_thread ();
1479 old_thr = *thr;
1480 memset (thr, '\0', sizeof (*thr));
1481 if (gomp_places_list)
1483 thr->place = old_thr.place;
1484 thr->ts.place_partition_len = gomp_places_list_len;
1486 fn (hostaddrs);
1487 gomp_free_thread (thr);
1488 *thr = old_thr;
1491 /* Calculate alignment and size requirements of a private copy of data shared
1492 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1494 static inline void
1495 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1496 unsigned short *kinds, size_t *tgt_align,
1497 size_t *tgt_size)
1499 size_t i;
1500 for (i = 0; i < mapnum; i++)
1501 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1503 size_t align = (size_t) 1 << (kinds[i] >> 8);
1504 if (*tgt_align < align)
1505 *tgt_align = align;
1506 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1507 *tgt_size += sizes[i];
1511 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1513 static inline void
1514 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1515 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1516 size_t tgt_size)
1518 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1519 if (al)
1520 tgt += tgt_align - al;
1521 tgt_size = 0;
1522 size_t i;
1523 for (i = 0; i < mapnum; i++)
1524 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1526 size_t align = (size_t) 1 << (kinds[i] >> 8);
1527 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1528 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1529 hostaddrs[i] = tgt + tgt_size;
1530 tgt_size = tgt_size + sizes[i];
1534 /* Helper function of GOMP_target{,_ext} routines. */
1536 static void *
1537 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1538 void (*host_fn) (void *))
1540 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1541 return (void *) host_fn;
1542 else
1544 gomp_mutex_lock (&devicep->lock);
1545 if (devicep->state == GOMP_DEVICE_FINALIZED)
1547 gomp_mutex_unlock (&devicep->lock);
1548 return NULL;
1551 struct splay_tree_key_s k;
1552 k.host_start = (uintptr_t) host_fn;
1553 k.host_end = k.host_start + 1;
1554 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1555 gomp_mutex_unlock (&devicep->lock);
1556 if (tgt_fn == NULL)
1557 return NULL;
1559 return (void *) tgt_fn->tgt_offset;
1563 /* Called when encountering a target directive. If DEVICE
1564 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1565 GOMP_DEVICE_HOST_FALLBACK (or any value
1566 larger than last available hw device), use host fallback.
1567 FN is address of host code, UNUSED is part of the current ABI, but
1568 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1569 with MAPNUM entries, with addresses of the host objects,
1570 sizes of the host objects (resp. for pointer kind pointer bias
1571 and assumed sizeof (void *) size) and kinds. */
1573 void
1574 GOMP_target (int device, void (*fn) (void *), const void *unused,
1575 size_t mapnum, void **hostaddrs, size_t *sizes,
1576 unsigned char *kinds)
1578 struct gomp_device_descr *devicep = resolve_device (device);
1580 void *fn_addr;
1581 if (devicep == NULL
1582 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1583 /* All shared memory devices should use the GOMP_target_ext function. */
1584 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1585 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1586 return gomp_target_fallback (fn, hostaddrs);
1588 struct target_mem_desc *tgt_vars
1589 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1590 GOMP_MAP_VARS_TARGET);
1591 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1592 NULL);
1593 gomp_unmap_vars (tgt_vars, true);
1596 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1597 and several arguments have been added:
1598 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1599 DEPEND is array of dependencies, see GOMP_task for details.
1601 ARGS is a pointer to an array consisting of a variable number of both
1602 device-independent and device-specific arguments, which can take one two
1603 elements where the first specifies for which device it is intended, the type
1604 and optionally also the value. If the value is not present in the first
1605 one, the whole second element the actual value. The last element of the
1606 array is a single NULL. Among the device independent can be for example
1607 NUM_TEAMS and THREAD_LIMIT.
1609 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1610 that value, or 1 if teams construct is not present, or 0, if
1611 teams construct does not have num_teams clause and so the choice is
1612 implementation defined, and -1 if it can't be determined on the host
1613 what value will GOMP_teams have on the device.
1614 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1615 body with that value, or 0, if teams construct does not have thread_limit
1616 clause or the teams construct is not present, or -1 if it can't be
1617 determined on the host what value will GOMP_teams have on the device. */
1619 void
1620 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1621 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1622 unsigned int flags, void **depend, void **args)
1624 struct gomp_device_descr *devicep = resolve_device (device);
1625 size_t tgt_align = 0, tgt_size = 0;
1626 bool fpc_done = false;
1628 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1630 struct gomp_thread *thr = gomp_thread ();
1631 /* Create a team if we don't have any around, as nowait
1632 target tasks make sense to run asynchronously even when
1633 outside of any parallel. */
1634 if (__builtin_expect (thr->ts.team == NULL, 0))
1636 struct gomp_team *team = gomp_new_team (1);
1637 struct gomp_task *task = thr->task;
1638 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1639 team->prev_ts = thr->ts;
1640 thr->ts.team = team;
1641 thr->ts.team_id = 0;
1642 thr->ts.work_share = &team->work_shares[0];
1643 thr->ts.last_work_share = NULL;
1644 #ifdef HAVE_SYNC_BUILTINS
1645 thr->ts.single_count = 0;
1646 #endif
1647 thr->ts.static_trip = 0;
1648 thr->task = &team->implicit_task[0];
1649 gomp_init_task (thr->task, NULL, icv);
1650 if (task)
1652 thr->task = task;
1653 gomp_end_task ();
1654 free (task);
1655 thr->task = &team->implicit_task[0];
1657 else
1658 pthread_setspecific (gomp_thread_destructor, thr);
1660 if (thr->ts.team
1661 && !thr->task->final_task)
1663 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1664 sizes, kinds, flags, depend, args,
1665 GOMP_TARGET_TASK_BEFORE_MAP);
1666 return;
1670 /* If there are depend clauses, but nowait is not present
1671 (or we are in a final task), block the parent task until the
1672 dependencies are resolved and then just continue with the rest
1673 of the function as if it is a merged task. */
1674 if (depend != NULL)
1676 struct gomp_thread *thr = gomp_thread ();
1677 if (thr->task && thr->task->depend_hash)
1679 /* If we might need to wait, copy firstprivate now. */
1680 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1681 &tgt_align, &tgt_size);
1682 if (tgt_align)
1684 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1685 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1686 tgt_align, tgt_size);
1688 fpc_done = true;
1689 gomp_task_maybe_wait_for_dependencies (depend);
1693 void *fn_addr;
1694 if (devicep == NULL
1695 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1696 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1697 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1699 if (!fpc_done)
1701 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1702 &tgt_align, &tgt_size);
1703 if (tgt_align)
1705 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1706 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1707 tgt_align, tgt_size);
1710 gomp_target_fallback (fn, hostaddrs);
1711 return;
1714 struct target_mem_desc *tgt_vars;
1715 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1717 if (!fpc_done)
1719 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1720 &tgt_align, &tgt_size);
1721 if (tgt_align)
1723 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1724 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1725 tgt_align, tgt_size);
1728 tgt_vars = NULL;
1730 else
1731 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1732 true, GOMP_MAP_VARS_TARGET);
1733 devicep->run_func (devicep->target_id, fn_addr,
1734 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1735 args);
1736 if (tgt_vars)
1737 gomp_unmap_vars (tgt_vars, true);
1740 /* Host fallback for GOMP_target_data{,_ext} routines. */
1742 static void
1743 gomp_target_data_fallback (void)
1745 struct gomp_task_icv *icv = gomp_icv (false);
1746 if (icv->target_data)
1748 /* Even when doing a host fallback, if there are any active
1749 #pragma omp target data constructs, need to remember the
1750 new #pragma omp target data, otherwise GOMP_target_end_data
1751 would get out of sync. */
1752 struct target_mem_desc *tgt
1753 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1754 GOMP_MAP_VARS_DATA);
1755 tgt->prev = icv->target_data;
1756 icv->target_data = tgt;
1760 void
1761 GOMP_target_data (int device, const void *unused, size_t mapnum,
1762 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1764 struct gomp_device_descr *devicep = resolve_device (device);
1766 if (devicep == NULL
1767 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1768 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1769 return gomp_target_data_fallback ();
1771 struct target_mem_desc *tgt
1772 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1773 GOMP_MAP_VARS_DATA);
1774 struct gomp_task_icv *icv = gomp_icv (true);
1775 tgt->prev = icv->target_data;
1776 icv->target_data = tgt;
1779 void
1780 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1781 size_t *sizes, unsigned short *kinds)
1783 struct gomp_device_descr *devicep = resolve_device (device);
1785 if (devicep == NULL
1786 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1787 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1788 return gomp_target_data_fallback ();
1790 struct target_mem_desc *tgt
1791 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1792 GOMP_MAP_VARS_DATA);
1793 struct gomp_task_icv *icv = gomp_icv (true);
1794 tgt->prev = icv->target_data;
1795 icv->target_data = tgt;
1798 void
1799 GOMP_target_end_data (void)
1801 struct gomp_task_icv *icv = gomp_icv (false);
1802 if (icv->target_data)
1804 struct target_mem_desc *tgt = icv->target_data;
1805 icv->target_data = tgt->prev;
1806 gomp_unmap_vars (tgt, true);
1810 void
1811 GOMP_target_update (int device, const void *unused, size_t mapnum,
1812 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1814 struct gomp_device_descr *devicep = resolve_device (device);
1816 if (devicep == NULL
1817 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1818 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1819 return;
1821 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1824 void
1825 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1826 size_t *sizes, unsigned short *kinds,
1827 unsigned int flags, void **depend)
1829 struct gomp_device_descr *devicep = resolve_device (device);
1831 /* If there are depend clauses, but nowait is not present,
1832 block the parent task until the dependencies are resolved
1833 and then just continue with the rest of the function as if it
1834 is a merged task. Until we are able to schedule task during
1835 variable mapping or unmapping, ignore nowait if depend clauses
1836 are not present. */
1837 if (depend != NULL)
1839 struct gomp_thread *thr = gomp_thread ();
1840 if (thr->task && thr->task->depend_hash)
1842 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1843 && thr->ts.team
1844 && !thr->task->final_task)
1846 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1847 mapnum, hostaddrs, sizes, kinds,
1848 flags | GOMP_TARGET_FLAG_UPDATE,
1849 depend, NULL, GOMP_TARGET_TASK_DATA))
1850 return;
1852 else
1854 struct gomp_team *team = thr->ts.team;
1855 /* If parallel or taskgroup has been cancelled, don't start new
1856 tasks. */
1857 if (__builtin_expect (gomp_cancel_var, 0) && team)
1859 if (gomp_team_barrier_cancelled (&team->barrier))
1860 return;
1861 if (thr->task->taskgroup)
1863 if (thr->task->taskgroup->cancelled)
1864 return;
1865 if (thr->task->taskgroup->workshare
1866 && thr->task->taskgroup->prev
1867 && thr->task->taskgroup->prev->cancelled)
1868 return;
1872 gomp_task_maybe_wait_for_dependencies (depend);
1877 if (devicep == NULL
1878 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1879 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1880 return;
1882 struct gomp_thread *thr = gomp_thread ();
1883 struct gomp_team *team = thr->ts.team;
1884 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1885 if (__builtin_expect (gomp_cancel_var, 0) && team)
1887 if (gomp_team_barrier_cancelled (&team->barrier))
1888 return;
1889 if (thr->task->taskgroup)
1891 if (thr->task->taskgroup->cancelled)
1892 return;
1893 if (thr->task->taskgroup->workshare
1894 && thr->task->taskgroup->prev
1895 && thr->task->taskgroup->prev->cancelled)
1896 return;
1900 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1903 static void
1904 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1905 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1907 const int typemask = 0xff;
1908 size_t i;
1909 gomp_mutex_lock (&devicep->lock);
1910 if (devicep->state == GOMP_DEVICE_FINALIZED)
1912 gomp_mutex_unlock (&devicep->lock);
1913 return;
1916 for (i = 0; i < mapnum; i++)
1918 struct splay_tree_key_s cur_node;
1919 unsigned char kind = kinds[i] & typemask;
1920 switch (kind)
1922 case GOMP_MAP_FROM:
1923 case GOMP_MAP_ALWAYS_FROM:
1924 case GOMP_MAP_DELETE:
1925 case GOMP_MAP_RELEASE:
1926 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1927 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1928 cur_node.host_start = (uintptr_t) hostaddrs[i];
1929 cur_node.host_end = cur_node.host_start + sizes[i];
1930 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1931 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1932 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1933 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1934 if (!k)
1935 continue;
1937 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1938 k->refcount--;
1939 if ((kind == GOMP_MAP_DELETE
1940 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1941 && k->refcount != REFCOUNT_INFINITY)
1942 k->refcount = 0;
1944 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1945 || kind == GOMP_MAP_ALWAYS_FROM)
1946 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1947 (void *) (k->tgt->tgt_start + k->tgt_offset
1948 + cur_node.host_start
1949 - k->host_start),
1950 cur_node.host_end - cur_node.host_start);
1951 if (k->refcount == 0)
1953 splay_tree_remove (&devicep->mem_map, k);
1954 if (k->link_key)
1955 splay_tree_insert (&devicep->mem_map,
1956 (splay_tree_node) k->link_key);
1957 if (k->tgt->refcount > 1)
1958 k->tgt->refcount--;
1959 else
1960 gomp_unmap_tgt (k->tgt);
1963 break;
1964 default:
1965 gomp_mutex_unlock (&devicep->lock);
1966 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1967 kind);
1971 gomp_mutex_unlock (&devicep->lock);
1974 void
1975 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1976 size_t *sizes, unsigned short *kinds,
1977 unsigned int flags, void **depend)
1979 struct gomp_device_descr *devicep = resolve_device (device);
1981 /* If there are depend clauses, but nowait is not present,
1982 block the parent task until the dependencies are resolved
1983 and then just continue with the rest of the function as if it
1984 is a merged task. Until we are able to schedule task during
1985 variable mapping or unmapping, ignore nowait if depend clauses
1986 are not present. */
1987 if (depend != NULL)
1989 struct gomp_thread *thr = gomp_thread ();
1990 if (thr->task && thr->task->depend_hash)
1992 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1993 && thr->ts.team
1994 && !thr->task->final_task)
1996 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1997 mapnum, hostaddrs, sizes, kinds,
1998 flags, depend, NULL,
1999 GOMP_TARGET_TASK_DATA))
2000 return;
2002 else
2004 struct gomp_team *team = thr->ts.team;
2005 /* If parallel or taskgroup has been cancelled, don't start new
2006 tasks. */
2007 if (__builtin_expect (gomp_cancel_var, 0) && team)
2009 if (gomp_team_barrier_cancelled (&team->barrier))
2010 return;
2011 if (thr->task->taskgroup)
2013 if (thr->task->taskgroup->cancelled)
2014 return;
2015 if (thr->task->taskgroup->workshare
2016 && thr->task->taskgroup->prev
2017 && thr->task->taskgroup->prev->cancelled)
2018 return;
2022 gomp_task_maybe_wait_for_dependencies (depend);
2027 if (devicep == NULL
2028 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2029 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2030 return;
2032 struct gomp_thread *thr = gomp_thread ();
2033 struct gomp_team *team = thr->ts.team;
2034 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2035 if (__builtin_expect (gomp_cancel_var, 0) && team)
2037 if (gomp_team_barrier_cancelled (&team->barrier))
2038 return;
2039 if (thr->task->taskgroup)
2041 if (thr->task->taskgroup->cancelled)
2042 return;
2043 if (thr->task->taskgroup->workshare
2044 && thr->task->taskgroup->prev
2045 && thr->task->taskgroup->prev->cancelled)
2046 return;
2050 size_t i;
2051 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2052 for (i = 0; i < mapnum; i++)
2053 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2055 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2056 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2057 i += sizes[i];
2059 else
2060 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2061 true, GOMP_MAP_VARS_ENTER_DATA);
2062 else
2063 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2066 bool
2067 gomp_target_task_fn (void *data)
2069 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2070 struct gomp_device_descr *devicep = ttask->devicep;
2072 if (ttask->fn != NULL)
2074 void *fn_addr;
2075 if (devicep == NULL
2076 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2077 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2078 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2080 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2081 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2082 return false;
2085 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2087 if (ttask->tgt)
2088 gomp_unmap_vars (ttask->tgt, true);
2089 return false;
2092 void *actual_arguments;
2093 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2095 ttask->tgt = NULL;
2096 actual_arguments = ttask->hostaddrs;
2098 else
2100 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2101 NULL, ttask->sizes, ttask->kinds, true,
2102 GOMP_MAP_VARS_TARGET);
2103 actual_arguments = (void *) ttask->tgt->tgt_start;
2105 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2107 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2108 ttask->args, (void *) ttask);
2109 return true;
2111 else if (devicep == NULL
2112 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2113 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2114 return false;
2116 size_t i;
2117 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2118 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2119 ttask->kinds, true);
2120 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2121 for (i = 0; i < ttask->mapnum; i++)
2122 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2124 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2125 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2126 GOMP_MAP_VARS_ENTER_DATA);
2127 i += ttask->sizes[i];
2129 else
2130 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2131 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2132 else
2133 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2134 ttask->kinds);
2135 return false;
2138 void
2139 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2141 if (thread_limit)
2143 struct gomp_task_icv *icv = gomp_icv (true);
2144 icv->thread_limit_var
2145 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2147 (void) num_teams;
2150 void *
2151 omp_target_alloc (size_t size, int device_num)
2153 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2154 return malloc (size);
2156 if (device_num < 0)
2157 return NULL;
2159 struct gomp_device_descr *devicep = resolve_device (device_num);
2160 if (devicep == NULL)
2161 return NULL;
2163 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2164 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2165 return malloc (size);
2167 gomp_mutex_lock (&devicep->lock);
2168 void *ret = devicep->alloc_func (devicep->target_id, size);
2169 gomp_mutex_unlock (&devicep->lock);
2170 return ret;
2173 void
2174 omp_target_free (void *device_ptr, int device_num)
2176 if (device_ptr == NULL)
2177 return;
2179 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2181 free (device_ptr);
2182 return;
2185 if (device_num < 0)
2186 return;
2188 struct gomp_device_descr *devicep = resolve_device (device_num);
2189 if (devicep == NULL)
2190 return;
2192 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2193 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2195 free (device_ptr);
2196 return;
2199 gomp_mutex_lock (&devicep->lock);
2200 gomp_free_device_memory (devicep, device_ptr);
2201 gomp_mutex_unlock (&devicep->lock);
2205 omp_target_is_present (const void *ptr, int device_num)
2207 if (ptr == NULL)
2208 return 1;
2210 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2211 return 1;
2213 if (device_num < 0)
2214 return 0;
2216 struct gomp_device_descr *devicep = resolve_device (device_num);
2217 if (devicep == NULL)
2218 return 0;
2220 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2221 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2222 return 1;
2224 gomp_mutex_lock (&devicep->lock);
2225 struct splay_tree_s *mem_map = &devicep->mem_map;
2226 struct splay_tree_key_s cur_node;
2228 cur_node.host_start = (uintptr_t) ptr;
2229 cur_node.host_end = cur_node.host_start;
2230 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2231 int ret = n != NULL;
2232 gomp_mutex_unlock (&devicep->lock);
2233 return ret;
2237 omp_target_memcpy (void *dst, const void *src, size_t length,
2238 size_t dst_offset, size_t src_offset, int dst_device_num,
2239 int src_device_num)
2241 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2242 bool ret;
2244 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2246 if (dst_device_num < 0)
2247 return EINVAL;
2249 dst_devicep = resolve_device (dst_device_num);
2250 if (dst_devicep == NULL)
2251 return EINVAL;
2253 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2254 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2255 dst_devicep = NULL;
2257 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2259 if (src_device_num < 0)
2260 return EINVAL;
2262 src_devicep = resolve_device (src_device_num);
2263 if (src_devicep == NULL)
2264 return EINVAL;
2266 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2267 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2268 src_devicep = NULL;
2270 if (src_devicep == NULL && dst_devicep == NULL)
2272 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2273 return 0;
2275 if (src_devicep == NULL)
2277 gomp_mutex_lock (&dst_devicep->lock);
2278 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2279 (char *) dst + dst_offset,
2280 (char *) src + src_offset, length);
2281 gomp_mutex_unlock (&dst_devicep->lock);
2282 return (ret ? 0 : EINVAL);
2284 if (dst_devicep == NULL)
2286 gomp_mutex_lock (&src_devicep->lock);
2287 ret = src_devicep->dev2host_func (src_devicep->target_id,
2288 (char *) dst + dst_offset,
2289 (char *) src + src_offset, length);
2290 gomp_mutex_unlock (&src_devicep->lock);
2291 return (ret ? 0 : EINVAL);
2293 if (src_devicep == dst_devicep)
2295 gomp_mutex_lock (&src_devicep->lock);
2296 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2297 (char *) dst + dst_offset,
2298 (char *) src + src_offset, length);
2299 gomp_mutex_unlock (&src_devicep->lock);
2300 return (ret ? 0 : EINVAL);
2302 return EINVAL;
2305 static int
2306 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2307 int num_dims, const size_t *volume,
2308 const size_t *dst_offsets,
2309 const size_t *src_offsets,
2310 const size_t *dst_dimensions,
2311 const size_t *src_dimensions,
2312 struct gomp_device_descr *dst_devicep,
2313 struct gomp_device_descr *src_devicep)
2315 size_t dst_slice = element_size;
2316 size_t src_slice = element_size;
2317 size_t j, dst_off, src_off, length;
2318 int i, ret;
2320 if (num_dims == 1)
2322 if (__builtin_mul_overflow (element_size, volume[0], &length)
2323 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2324 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2325 return EINVAL;
2326 if (dst_devicep == NULL && src_devicep == NULL)
2328 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2329 length);
2330 ret = 1;
2332 else if (src_devicep == NULL)
2333 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2334 (char *) dst + dst_off,
2335 (const char *) src + src_off,
2336 length);
2337 else if (dst_devicep == NULL)
2338 ret = src_devicep->dev2host_func (src_devicep->target_id,
2339 (char *) dst + dst_off,
2340 (const char *) src + src_off,
2341 length);
2342 else if (src_devicep == dst_devicep)
2343 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2344 (char *) dst + dst_off,
2345 (const char *) src + src_off,
2346 length);
2347 else
2348 ret = 0;
2349 return ret ? 0 : EINVAL;
2352 /* FIXME: it would be nice to have some plugin function to handle
2353 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2354 be handled in the generic recursion below, and for host-host it
2355 should be used even for any num_dims >= 2. */
2357 for (i = 1; i < num_dims; i++)
2358 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2359 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2360 return EINVAL;
2361 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2362 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2363 return EINVAL;
2364 for (j = 0; j < volume[0]; j++)
2366 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2367 (const char *) src + src_off,
2368 element_size, num_dims - 1,
2369 volume + 1, dst_offsets + 1,
2370 src_offsets + 1, dst_dimensions + 1,
2371 src_dimensions + 1, dst_devicep,
2372 src_devicep);
2373 if (ret)
2374 return ret;
2375 dst_off += dst_slice;
2376 src_off += src_slice;
2378 return 0;
2382 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2383 int num_dims, const size_t *volume,
2384 const size_t *dst_offsets,
2385 const size_t *src_offsets,
2386 const size_t *dst_dimensions,
2387 const size_t *src_dimensions,
2388 int dst_device_num, int src_device_num)
2390 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2392 if (!dst && !src)
2393 return INT_MAX;
2395 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2397 if (dst_device_num < 0)
2398 return EINVAL;
2400 dst_devicep = resolve_device (dst_device_num);
2401 if (dst_devicep == NULL)
2402 return EINVAL;
2404 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2405 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2406 dst_devicep = NULL;
2408 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2410 if (src_device_num < 0)
2411 return EINVAL;
2413 src_devicep = resolve_device (src_device_num);
2414 if (src_devicep == NULL)
2415 return EINVAL;
2417 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2418 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2419 src_devicep = NULL;
2422 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2423 return EINVAL;
2425 if (src_devicep)
2426 gomp_mutex_lock (&src_devicep->lock);
2427 else if (dst_devicep)
2428 gomp_mutex_lock (&dst_devicep->lock);
2429 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2430 volume, dst_offsets, src_offsets,
2431 dst_dimensions, src_dimensions,
2432 dst_devicep, src_devicep);
2433 if (src_devicep)
2434 gomp_mutex_unlock (&src_devicep->lock);
2435 else if (dst_devicep)
2436 gomp_mutex_unlock (&dst_devicep->lock);
2437 return ret;
2441 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2442 size_t size, size_t device_offset, int device_num)
2444 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2445 return EINVAL;
2447 if (device_num < 0)
2448 return EINVAL;
2450 struct gomp_device_descr *devicep = resolve_device (device_num);
2451 if (devicep == NULL)
2452 return EINVAL;
2454 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2455 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2456 return EINVAL;
2458 gomp_mutex_lock (&devicep->lock);
2460 struct splay_tree_s *mem_map = &devicep->mem_map;
2461 struct splay_tree_key_s cur_node;
2462 int ret = EINVAL;
2464 cur_node.host_start = (uintptr_t) host_ptr;
2465 cur_node.host_end = cur_node.host_start + size;
2466 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2467 if (n)
2469 if (n->tgt->tgt_start + n->tgt_offset
2470 == (uintptr_t) device_ptr + device_offset
2471 && n->host_start <= cur_node.host_start
2472 && n->host_end >= cur_node.host_end)
2473 ret = 0;
2475 else
2477 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2478 tgt->array = gomp_malloc (sizeof (*tgt->array));
2479 tgt->refcount = 1;
2480 tgt->tgt_start = 0;
2481 tgt->tgt_end = 0;
2482 tgt->to_free = NULL;
2483 tgt->prev = NULL;
2484 tgt->list_count = 0;
2485 tgt->device_descr = devicep;
2486 splay_tree_node array = tgt->array;
2487 splay_tree_key k = &array->key;
2488 k->host_start = cur_node.host_start;
2489 k->host_end = cur_node.host_end;
2490 k->tgt = tgt;
2491 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2492 k->refcount = REFCOUNT_INFINITY;
2493 array->left = NULL;
2494 array->right = NULL;
2495 splay_tree_insert (&devicep->mem_map, array);
2496 ret = 0;
2498 gomp_mutex_unlock (&devicep->lock);
2499 return ret;
2503 omp_target_disassociate_ptr (const void *ptr, int device_num)
2505 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2506 return EINVAL;
2508 if (device_num < 0)
2509 return EINVAL;
2511 struct gomp_device_descr *devicep = resolve_device (device_num);
2512 if (devicep == NULL)
2513 return EINVAL;
2515 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2516 return EINVAL;
2518 gomp_mutex_lock (&devicep->lock);
2520 struct splay_tree_s *mem_map = &devicep->mem_map;
2521 struct splay_tree_key_s cur_node;
2522 int ret = EINVAL;
2524 cur_node.host_start = (uintptr_t) ptr;
2525 cur_node.host_end = cur_node.host_start;
2526 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2527 if (n
2528 && n->host_start == cur_node.host_start
2529 && n->refcount == REFCOUNT_INFINITY
2530 && n->tgt->tgt_start == 0
2531 && n->tgt->to_free == NULL
2532 && n->tgt->refcount == 1
2533 && n->tgt->list_count == 0)
2535 splay_tree_remove (&devicep->mem_map, n);
2536 gomp_unmap_tgt (n->tgt);
2537 ret = 0;
2540 gomp_mutex_unlock (&devicep->lock);
2541 return ret;
2545 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2547 (void) kind;
2548 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2549 return gomp_pause_host ();
2550 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2551 return -1;
2552 /* Do nothing for target devices for now. */
2553 return 0;
2557 omp_pause_resource_all (omp_pause_resource_t kind)
2559 (void) kind;
2560 if (gomp_pause_host ())
2561 return -1;
2562 /* Do nothing for target devices for now. */
2563 return 0;
2566 ialias (omp_pause_resource)
2567 ialias (omp_pause_resource_all)
2569 #ifdef PLUGIN_SUPPORT
2571 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2572 in PLUGIN_NAME.
2573 The handles of the found functions are stored in the corresponding fields
2574 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2576 static bool
2577 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2578 const char *plugin_name)
2580 const char *err = NULL, *last_missing = NULL;
2582 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2583 if (!plugin_handle)
2584 goto dl_fail;
2586 /* Check if all required functions are available in the plugin and store
2587 their handlers. None of the symbols can legitimately be NULL,
2588 so we don't need to check dlerror all the time. */
2589 #define DLSYM(f) \
2590 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2591 goto dl_fail
2592 /* Similar, but missing functions are not an error. Return false if
2593 failed, true otherwise. */
2594 #define DLSYM_OPT(f, n) \
2595 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2596 || (last_missing = #n, 0))
2598 DLSYM (version);
2599 if (device->version_func () != GOMP_VERSION)
2601 err = "plugin version mismatch";
2602 goto fail;
2605 DLSYM (get_name);
2606 DLSYM (get_caps);
2607 DLSYM (get_type);
2608 DLSYM (get_num_devices);
2609 DLSYM (init_device);
2610 DLSYM (fini_device);
2611 DLSYM (load_image);
2612 DLSYM (unload_image);
2613 DLSYM (alloc);
2614 DLSYM (free);
2615 DLSYM (dev2host);
2616 DLSYM (host2dev);
2617 device->capabilities = device->get_caps_func ();
2618 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2620 DLSYM (run);
2621 DLSYM (async_run);
2622 DLSYM_OPT (can_run, can_run);
2623 DLSYM (dev2dev);
2625 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2627 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2628 || !DLSYM_OPT (openacc.register_async_cleanup,
2629 openacc_register_async_cleanup)
2630 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2631 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2632 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2633 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2634 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2635 || !DLSYM_OPT (openacc.async_wait_all_async,
2636 openacc_async_wait_all_async)
2637 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2638 || !DLSYM_OPT (openacc.create_thread_data,
2639 openacc_create_thread_data)
2640 || !DLSYM_OPT (openacc.destroy_thread_data,
2641 openacc_destroy_thread_data))
2643 /* Require all the OpenACC handlers if we have
2644 GOMP_OFFLOAD_CAP_OPENACC_200. */
2645 err = "plugin missing OpenACC handler function";
2646 goto fail;
2649 unsigned cuda = 0;
2650 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2651 openacc_cuda_get_current_device);
2652 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2653 openacc_cuda_get_current_context);
2654 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2655 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2656 if (cuda && cuda != 4)
2658 /* Make sure all the CUDA functions are there if any of them are. */
2659 err = "plugin missing OpenACC CUDA handler function";
2660 goto fail;
2663 #undef DLSYM
2664 #undef DLSYM_OPT
2666 return 1;
2668 dl_fail:
2669 err = dlerror ();
2670 fail:
2671 gomp_error ("while loading %s: %s", plugin_name, err);
2672 if (last_missing)
2673 gomp_error ("missing function was %s", last_missing);
2674 if (plugin_handle)
2675 dlclose (plugin_handle);
2677 return 0;
2680 /* This function finalizes all initialized devices. */
2682 static void
2683 gomp_target_fini (void)
2685 int i;
2686 for (i = 0; i < num_devices; i++)
2688 bool ret = true;
2689 struct gomp_device_descr *devicep = &devices[i];
2690 gomp_mutex_lock (&devicep->lock);
2691 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2693 ret = devicep->fini_device_func (devicep->target_id);
2694 devicep->state = GOMP_DEVICE_FINALIZED;
2696 gomp_mutex_unlock (&devicep->lock);
2697 if (!ret)
2698 gomp_fatal ("device finalization failed");
2702 /* This function initializes the runtime needed for offloading.
2703 It parses the list of offload targets and tries to load the plugins for
2704 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2705 will be set, and the array DEVICES initialized, containing descriptors for
2706 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2707 by the others. */
2709 static void
2710 gomp_target_init (void)
2712 const char *prefix ="libgomp-plugin-";
2713 const char *suffix = SONAME_SUFFIX (1);
2714 const char *cur, *next;
2715 char *plugin_name;
2716 int i, new_num_devices;
2718 num_devices = 0;
2719 devices = NULL;
2721 cur = OFFLOAD_TARGETS;
2722 if (*cur)
2725 struct gomp_device_descr current_device;
2726 size_t prefix_len, suffix_len, cur_len;
2728 next = strchr (cur, ',');
2730 prefix_len = strlen (prefix);
2731 cur_len = next ? next - cur : strlen (cur);
2732 suffix_len = strlen (suffix);
2734 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2735 if (!plugin_name)
2737 num_devices = 0;
2738 break;
2741 memcpy (plugin_name, prefix, prefix_len);
2742 memcpy (plugin_name + prefix_len, cur, cur_len);
2743 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2745 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2747 new_num_devices = current_device.get_num_devices_func ();
2748 if (new_num_devices >= 1)
2750 /* Augment DEVICES and NUM_DEVICES. */
2752 devices = realloc (devices, (num_devices + new_num_devices)
2753 * sizeof (struct gomp_device_descr));
2754 if (!devices)
2756 num_devices = 0;
2757 free (plugin_name);
2758 break;
2761 current_device.name = current_device.get_name_func ();
2762 /* current_device.capabilities has already been set. */
2763 current_device.type = current_device.get_type_func ();
2764 current_device.mem_map.root = NULL;
2765 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2766 current_device.openacc.data_environ = NULL;
2767 for (i = 0; i < new_num_devices; i++)
2769 current_device.target_id = i;
2770 devices[num_devices] = current_device;
2771 gomp_mutex_init (&devices[num_devices].lock);
2772 num_devices++;
2777 free (plugin_name);
2778 cur = next + 1;
2780 while (next);
2782 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2783 NUM_DEVICES_OPENMP. */
2784 struct gomp_device_descr *devices_s
2785 = malloc (num_devices * sizeof (struct gomp_device_descr));
2786 if (!devices_s)
2788 num_devices = 0;
2789 free (devices);
2790 devices = NULL;
2792 num_devices_openmp = 0;
2793 for (i = 0; i < num_devices; i++)
2794 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2795 devices_s[num_devices_openmp++] = devices[i];
2796 int num_devices_after_openmp = num_devices_openmp;
2797 for (i = 0; i < num_devices; i++)
2798 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2799 devices_s[num_devices_after_openmp++] = devices[i];
2800 free (devices);
2801 devices = devices_s;
2803 for (i = 0; i < num_devices; i++)
2805 /* The 'devices' array can be moved (by the realloc call) until we have
2806 found all the plugins, so registering with the OpenACC runtime (which
2807 takes a copy of the pointer argument) must be delayed until now. */
2808 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2809 goacc_register (&devices[i]);
2812 if (atexit (gomp_target_fini) != 0)
2813 gomp_fatal ("atexit failed");
2816 #else /* PLUGIN_SUPPORT */
2817 /* If dlfcn.h is unavailable we always fallback to host execution.
2818 GOMP_target* routines are just stubs for this case. */
2819 static void
2820 gomp_target_init (void)
2823 #endif /* PLUGIN_SUPPORT */