gcc/ChangeLog:
[official-gcc.git] / libgomp / target.c
bloba62ae2c3e4b334193f8b9063c267b91012f6a8b5
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
183 struct gomp_coalesce_buf
185 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
186 it will be copied to the device. */
187 void *buf;
188 struct target_mem_desc *tgt;
189 /* Array with offsets, chunks[2 * i] is the starting offset and
190 chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
191 of chunks which are to be copied to buf and later copied to device. */
192 size_t *chunks;
193 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
194 be performed. */
195 long chunk_cnt;
196 /* During construction of chunks array, how many memory regions are within
197 the last chunk. If there is just one memory region for a chunk, we copy
198 it directly to device rather than going through buf. */
199 long use_cnt;
202 /* Maximum size of memory region considered for coalescing. Larger copies
203 are performed directly. */
204 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
206 /* Maximum size of a gap in between regions to consider them being copied
207 within the same chunk. All the device offsets considered are within
208 newly allocated device memory, so it isn't fatal if we copy some padding
209 in between from host to device. The gaps come either from alignment
210 padding or from memory regions which are not supposed to be copied from
211 host to device (e.g. map(alloc:), map(from:) etc.). */
212 #define MAX_COALESCE_BUF_GAP (4 * 1024)
214 /* Add region with device tgt_start relative offset and length to CBUF. */
216 static inline void
217 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
219 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
220 return;
221 if (cbuf->chunk_cnt)
223 if (cbuf->chunk_cnt < 0)
224 return;
225 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
227 cbuf->chunk_cnt = -1;
228 return;
230 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
232 cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
233 cbuf->use_cnt++;
234 return;
236 /* If the last chunk is only used by one mapping, discard it,
237 as it will be one host to device copy anyway and
238 memcpying it around will only waste cycles. */
239 if (cbuf->use_cnt == 1)
240 cbuf->chunk_cnt--;
242 cbuf->chunks[2 * cbuf->chunk_cnt] = start;
243 cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
244 cbuf->chunk_cnt++;
245 cbuf->use_cnt = 1;
248 /* Return true for mapping kinds which need to copy data from the
249 host to device for regions that weren't previously mapped. */
251 static inline bool
252 gomp_to_device_kind_p (int kind)
254 switch (kind)
256 case GOMP_MAP_ALLOC:
257 case GOMP_MAP_FROM:
258 case GOMP_MAP_FORCE_ALLOC:
259 case GOMP_MAP_ALWAYS_FROM:
260 return false;
261 default:
262 return true;
266 static void
267 gomp_copy_host2dev (struct gomp_device_descr *devicep,
268 void *d, const void *h, size_t sz,
269 struct gomp_coalesce_buf *cbuf)
271 if (cbuf)
273 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
274 if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
276 long first = 0;
277 long last = cbuf->chunk_cnt - 1;
278 while (first <= last)
280 long middle = (first + last) >> 1;
281 if (cbuf->chunks[2 * middle + 1] <= doff)
282 first = middle + 1;
283 else if (cbuf->chunks[2 * middle] <= doff)
285 if (doff + sz > cbuf->chunks[2 * middle + 1])
286 gomp_fatal ("internal libgomp cbuf error");
287 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
288 h, sz);
289 return;
291 else
292 last = middle - 1;
296 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
299 static void
300 gomp_copy_dev2host (struct gomp_device_descr *devicep,
301 void *h, const void *d, size_t sz)
303 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
306 static void
307 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
309 if (!devicep->free_func (devicep->target_id, devptr))
311 gomp_mutex_unlock (&devicep->lock);
312 gomp_fatal ("error in freeing device memory block at %p", devptr);
316 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
317 gomp_map_0len_lookup found oldn for newn.
318 Helper function of gomp_map_vars. */
320 static inline void
321 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
322 splay_tree_key newn, struct target_var_desc *tgt_var,
323 unsigned char kind, struct gomp_coalesce_buf *cbuf)
325 tgt_var->key = oldn;
326 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
327 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
328 tgt_var->offset = newn->host_start - oldn->host_start;
329 tgt_var->length = newn->host_end - newn->host_start;
331 if ((kind & GOMP_MAP_FLAG_FORCE)
332 || oldn->host_start > newn->host_start
333 || oldn->host_end < newn->host_end)
335 gomp_mutex_unlock (&devicep->lock);
336 gomp_fatal ("Trying to map into device [%p..%p) object when "
337 "[%p..%p) is already mapped",
338 (void *) newn->host_start, (void *) newn->host_end,
339 (void *) oldn->host_start, (void *) oldn->host_end);
342 if (GOMP_MAP_ALWAYS_TO_P (kind))
343 gomp_copy_host2dev (devicep,
344 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
345 + newn->host_start - oldn->host_start),
346 (void *) newn->host_start,
347 newn->host_end - newn->host_start, cbuf);
349 if (oldn->refcount != REFCOUNT_INFINITY)
350 oldn->refcount++;
353 static int
354 get_kind (bool short_mapkind, void *kinds, int idx)
356 return short_mapkind ? ((unsigned short *) kinds)[idx]
357 : ((unsigned char *) kinds)[idx];
360 static void
361 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
362 uintptr_t target_offset, uintptr_t bias,
363 struct gomp_coalesce_buf *cbuf)
365 struct gomp_device_descr *devicep = tgt->device_descr;
366 struct splay_tree_s *mem_map = &devicep->mem_map;
367 struct splay_tree_key_s cur_node;
369 cur_node.host_start = host_ptr;
370 if (cur_node.host_start == (uintptr_t) NULL)
372 cur_node.tgt_offset = (uintptr_t) NULL;
373 gomp_copy_host2dev (devicep,
374 (void *) (tgt->tgt_start + target_offset),
375 (void *) &cur_node.tgt_offset,
376 sizeof (void *), cbuf);
377 return;
379 /* Add bias to the pointer value. */
380 cur_node.host_start += bias;
381 cur_node.host_end = cur_node.host_start;
382 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
383 if (n == NULL)
385 gomp_mutex_unlock (&devicep->lock);
386 gomp_fatal ("Pointer target of array section wasn't mapped");
388 cur_node.host_start -= n->host_start;
389 cur_node.tgt_offset
390 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
391 /* At this point tgt_offset is target address of the
392 array section. Now subtract bias to get what we want
393 to initialize the pointer with. */
394 cur_node.tgt_offset -= bias;
395 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
396 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
399 static void
400 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
401 size_t first, size_t i, void **hostaddrs,
402 size_t *sizes, void *kinds,
403 struct gomp_coalesce_buf *cbuf)
405 struct gomp_device_descr *devicep = tgt->device_descr;
406 struct splay_tree_s *mem_map = &devicep->mem_map;
407 struct splay_tree_key_s cur_node;
408 int kind;
409 const bool short_mapkind = true;
410 const int typemask = short_mapkind ? 0xff : 0x7;
412 cur_node.host_start = (uintptr_t) hostaddrs[i];
413 cur_node.host_end = cur_node.host_start + sizes[i];
414 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
415 kind = get_kind (short_mapkind, kinds, i);
416 if (n2
417 && n2->tgt == n->tgt
418 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
420 gomp_map_vars_existing (devicep, n2, &cur_node,
421 &tgt->list[i], kind & typemask, cbuf);
422 return;
424 if (sizes[i] == 0)
426 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
428 cur_node.host_start--;
429 n2 = splay_tree_lookup (mem_map, &cur_node);
430 cur_node.host_start++;
431 if (n2
432 && n2->tgt == n->tgt
433 && n2->host_start - n->host_start
434 == n2->tgt_offset - n->tgt_offset)
436 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
437 kind & typemask, cbuf);
438 return;
441 cur_node.host_end++;
442 n2 = splay_tree_lookup (mem_map, &cur_node);
443 cur_node.host_end--;
444 if (n2
445 && n2->tgt == n->tgt
446 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
448 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
449 kind & typemask, cbuf);
450 return;
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
455 "other mapped elements from the same structure weren't mapped "
456 "together with it", (void *) cur_node.host_start,
457 (void *) cur_node.host_end);
460 static inline uintptr_t
461 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
463 if (tgt->list[i].key != NULL)
464 return tgt->list[i].key->tgt->tgt_start
465 + tgt->list[i].key->tgt_offset
466 + tgt->list[i].offset;
467 if (tgt->list[i].offset == ~(uintptr_t) 0)
468 return (uintptr_t) hostaddrs[i];
469 if (tgt->list[i].offset == ~(uintptr_t) 1)
470 return 0;
471 if (tgt->list[i].offset == ~(uintptr_t) 2)
472 return tgt->list[i + 1].key->tgt->tgt_start
473 + tgt->list[i + 1].key->tgt_offset
474 + tgt->list[i + 1].offset
475 + (uintptr_t) hostaddrs[i]
476 - (uintptr_t) hostaddrs[i + 1];
477 return tgt->tgt_start + tgt->list[i].offset;
480 attribute_hidden struct target_mem_desc *
481 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
482 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
483 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
485 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
486 bool has_firstprivate = false;
487 const int rshift = short_mapkind ? 8 : 3;
488 const int typemask = short_mapkind ? 0xff : 0x7;
489 struct splay_tree_s *mem_map = &devicep->mem_map;
490 struct splay_tree_key_s cur_node;
491 struct target_mem_desc *tgt
492 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
493 tgt->list_count = mapnum;
494 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
495 tgt->device_descr = devicep;
496 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
498 if (mapnum == 0)
500 tgt->tgt_start = 0;
501 tgt->tgt_end = 0;
502 return tgt;
505 tgt_align = sizeof (void *);
506 tgt_size = 0;
507 cbuf.chunks = NULL;
508 cbuf.chunk_cnt = -1;
509 cbuf.use_cnt = 0;
510 cbuf.buf = NULL;
511 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
513 cbuf.chunks
514 = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
515 cbuf.chunk_cnt = 0;
517 if (pragma_kind == GOMP_MAP_VARS_TARGET)
519 size_t align = 4 * sizeof (void *);
520 tgt_align = align;
521 tgt_size = mapnum * sizeof (void *);
522 cbuf.chunk_cnt = 1;
523 cbuf.use_cnt = 1 + (mapnum > 1);
524 cbuf.chunks[0] = 0;
525 cbuf.chunks[1] = tgt_size;
528 gomp_mutex_lock (&devicep->lock);
529 if (devicep->state == GOMP_DEVICE_FINALIZED)
531 gomp_mutex_unlock (&devicep->lock);
532 free (tgt);
533 return NULL;
536 for (i = 0; i < mapnum; i++)
538 int kind = get_kind (short_mapkind, kinds, i);
539 if (hostaddrs[i] == NULL
540 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
542 tgt->list[i].key = NULL;
543 tgt->list[i].offset = ~(uintptr_t) 0;
544 continue;
546 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
548 cur_node.host_start = (uintptr_t) hostaddrs[i];
549 cur_node.host_end = cur_node.host_start;
550 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
551 if (n == NULL)
553 gomp_mutex_unlock (&devicep->lock);
554 gomp_fatal ("use_device_ptr pointer wasn't mapped");
556 cur_node.host_start -= n->host_start;
557 hostaddrs[i]
558 = (void *) (n->tgt->tgt_start + n->tgt_offset
559 + cur_node.host_start);
560 tgt->list[i].key = NULL;
561 tgt->list[i].offset = ~(uintptr_t) 0;
562 continue;
564 else if ((kind & typemask) == GOMP_MAP_STRUCT)
566 size_t first = i + 1;
567 size_t last = i + sizes[i];
568 cur_node.host_start = (uintptr_t) hostaddrs[i];
569 cur_node.host_end = (uintptr_t) hostaddrs[last]
570 + sizes[last];
571 tgt->list[i].key = NULL;
572 tgt->list[i].offset = ~(uintptr_t) 2;
573 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
574 if (n == NULL)
576 size_t align = (size_t) 1 << (kind >> rshift);
577 if (tgt_align < align)
578 tgt_align = align;
579 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
580 tgt_size = (tgt_size + align - 1) & ~(align - 1);
581 tgt_size += cur_node.host_end - cur_node.host_start;
582 not_found_cnt += last - i;
583 for (i = first; i <= last; i++)
585 tgt->list[i].key = NULL;
586 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
587 & typemask))
588 gomp_coalesce_buf_add (&cbuf,
589 tgt_size - cur_node.host_end
590 + (uintptr_t) hostaddrs[i],
591 sizes[i]);
593 i--;
594 continue;
596 for (i = first; i <= last; i++)
597 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
598 sizes, kinds, NULL);
599 i--;
600 continue;
602 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
604 tgt->list[i].key = NULL;
605 tgt->list[i].offset = ~(uintptr_t) 1;
606 has_firstprivate = true;
607 continue;
609 cur_node.host_start = (uintptr_t) hostaddrs[i];
610 if (!GOMP_MAP_POINTER_P (kind & typemask))
611 cur_node.host_end = cur_node.host_start + sizes[i];
612 else
613 cur_node.host_end = cur_node.host_start + sizeof (void *);
614 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
616 tgt->list[i].key = NULL;
618 size_t align = (size_t) 1 << (kind >> rshift);
619 if (tgt_align < align)
620 tgt_align = align;
621 tgt_size = (tgt_size + align - 1) & ~(align - 1);
622 gomp_coalesce_buf_add (&cbuf, tgt_size,
623 cur_node.host_end - cur_node.host_start);
624 tgt_size += cur_node.host_end - cur_node.host_start;
625 has_firstprivate = true;
626 continue;
628 splay_tree_key n;
629 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
631 n = gomp_map_0len_lookup (mem_map, &cur_node);
632 if (!n)
634 tgt->list[i].key = NULL;
635 tgt->list[i].offset = ~(uintptr_t) 1;
636 continue;
639 else
640 n = splay_tree_lookup (mem_map, &cur_node);
641 if (n && n->refcount != REFCOUNT_LINK)
642 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
643 kind & typemask, NULL);
644 else
646 tgt->list[i].key = NULL;
648 size_t align = (size_t) 1 << (kind >> rshift);
649 not_found_cnt++;
650 if (tgt_align < align)
651 tgt_align = align;
652 tgt_size = (tgt_size + align - 1) & ~(align - 1);
653 if (gomp_to_device_kind_p (kind & typemask))
654 gomp_coalesce_buf_add (&cbuf, tgt_size,
655 cur_node.host_end - cur_node.host_start);
656 tgt_size += cur_node.host_end - cur_node.host_start;
657 if ((kind & typemask) == GOMP_MAP_TO_PSET)
659 size_t j;
660 for (j = i + 1; j < mapnum; j++)
661 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
662 & typemask))
663 break;
664 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
665 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
666 > cur_node.host_end))
667 break;
668 else
670 tgt->list[j].key = NULL;
671 i++;
677 if (devaddrs)
679 if (mapnum != 1)
681 gomp_mutex_unlock (&devicep->lock);
682 gomp_fatal ("unexpected aggregation");
684 tgt->to_free = devaddrs[0];
685 tgt->tgt_start = (uintptr_t) tgt->to_free;
686 tgt->tgt_end = tgt->tgt_start + sizes[0];
688 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
690 /* Allocate tgt_align aligned tgt_size block of memory. */
691 /* FIXME: Perhaps change interface to allocate properly aligned
692 memory. */
693 tgt->to_free = devicep->alloc_func (devicep->target_id,
694 tgt_size + tgt_align - 1);
695 if (!tgt->to_free)
697 gomp_mutex_unlock (&devicep->lock);
698 gomp_fatal ("device memory allocation fail");
701 tgt->tgt_start = (uintptr_t) tgt->to_free;
702 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
703 tgt->tgt_end = tgt->tgt_start + tgt_size;
705 if (cbuf.use_cnt == 1)
706 cbuf.chunk_cnt--;
707 if (cbuf.chunk_cnt > 0)
709 cbuf.buf
710 = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
711 if (cbuf.buf)
713 cbuf.tgt = tgt;
714 cbufp = &cbuf;
718 else
720 tgt->to_free = NULL;
721 tgt->tgt_start = 0;
722 tgt->tgt_end = 0;
725 tgt_size = 0;
726 if (pragma_kind == GOMP_MAP_VARS_TARGET)
727 tgt_size = mapnum * sizeof (void *);
729 tgt->array = NULL;
730 if (not_found_cnt || has_firstprivate)
732 if (not_found_cnt)
733 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
734 splay_tree_node array = tgt->array;
735 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
736 uintptr_t field_tgt_base = 0;
738 for (i = 0; i < mapnum; i++)
739 if (tgt->list[i].key == NULL)
741 int kind = get_kind (short_mapkind, kinds, i);
742 if (hostaddrs[i] == NULL)
743 continue;
744 switch (kind & typemask)
746 size_t align, len, first, last;
747 splay_tree_key n;
748 case GOMP_MAP_FIRSTPRIVATE:
749 align = (size_t) 1 << (kind >> rshift);
750 tgt_size = (tgt_size + align - 1) & ~(align - 1);
751 tgt->list[i].offset = tgt_size;
752 len = sizes[i];
753 gomp_copy_host2dev (devicep,
754 (void *) (tgt->tgt_start + tgt_size),
755 (void *) hostaddrs[i], len, cbufp);
756 tgt_size += len;
757 continue;
758 case GOMP_MAP_FIRSTPRIVATE_INT:
759 case GOMP_MAP_USE_DEVICE_PTR:
760 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
761 continue;
762 case GOMP_MAP_STRUCT:
763 first = i + 1;
764 last = i + sizes[i];
765 cur_node.host_start = (uintptr_t) hostaddrs[i];
766 cur_node.host_end = (uintptr_t) hostaddrs[last]
767 + sizes[last];
768 if (tgt->list[first].key != NULL)
769 continue;
770 n = splay_tree_lookup (mem_map, &cur_node);
771 if (n == NULL)
773 size_t align = (size_t) 1 << (kind >> rshift);
774 tgt_size -= (uintptr_t) hostaddrs[first]
775 - (uintptr_t) hostaddrs[i];
776 tgt_size = (tgt_size + align - 1) & ~(align - 1);
777 tgt_size += (uintptr_t) hostaddrs[first]
778 - (uintptr_t) hostaddrs[i];
779 field_tgt_base = (uintptr_t) hostaddrs[first];
780 field_tgt_offset = tgt_size;
781 field_tgt_clear = last;
782 tgt_size += cur_node.host_end
783 - (uintptr_t) hostaddrs[first];
784 continue;
786 for (i = first; i <= last; i++)
787 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
788 sizes, kinds, cbufp);
789 i--;
790 continue;
791 case GOMP_MAP_ALWAYS_POINTER:
792 cur_node.host_start = (uintptr_t) hostaddrs[i];
793 cur_node.host_end = cur_node.host_start + sizeof (void *);
794 n = splay_tree_lookup (mem_map, &cur_node);
795 if (n == NULL
796 || n->host_start > cur_node.host_start
797 || n->host_end < cur_node.host_end)
799 gomp_mutex_unlock (&devicep->lock);
800 gomp_fatal ("always pointer not mapped");
802 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
803 != GOMP_MAP_ALWAYS_POINTER)
804 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
805 if (cur_node.tgt_offset)
806 cur_node.tgt_offset -= sizes[i];
807 gomp_copy_host2dev (devicep,
808 (void *) (n->tgt->tgt_start
809 + n->tgt_offset
810 + cur_node.host_start
811 - n->host_start),
812 (void *) &cur_node.tgt_offset,
813 sizeof (void *), cbufp);
814 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
815 + cur_node.host_start - n->host_start;
816 continue;
817 default:
818 break;
820 splay_tree_key k = &array->key;
821 k->host_start = (uintptr_t) hostaddrs[i];
822 if (!GOMP_MAP_POINTER_P (kind & typemask))
823 k->host_end = k->host_start + sizes[i];
824 else
825 k->host_end = k->host_start + sizeof (void *);
826 splay_tree_key n = splay_tree_lookup (mem_map, k);
827 if (n && n->refcount != REFCOUNT_LINK)
828 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
829 kind & typemask, cbufp);
830 else
832 k->link_key = NULL;
833 if (n && n->refcount == REFCOUNT_LINK)
835 /* Replace target address of the pointer with target address
836 of mapped object in the splay tree. */
837 splay_tree_remove (mem_map, n);
838 k->link_key = n;
840 size_t align = (size_t) 1 << (kind >> rshift);
841 tgt->list[i].key = k;
842 k->tgt = tgt;
843 if (field_tgt_clear != ~(size_t) 0)
845 k->tgt_offset = k->host_start - field_tgt_base
846 + field_tgt_offset;
847 if (i == field_tgt_clear)
848 field_tgt_clear = ~(size_t) 0;
850 else
852 tgt_size = (tgt_size + align - 1) & ~(align - 1);
853 k->tgt_offset = tgt_size;
854 tgt_size += k->host_end - k->host_start;
856 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
857 tgt->list[i].always_copy_from
858 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
859 tgt->list[i].offset = 0;
860 tgt->list[i].length = k->host_end - k->host_start;
861 k->refcount = 1;
862 k->dynamic_refcount = 0;
863 tgt->refcount++;
864 array->left = NULL;
865 array->right = NULL;
866 splay_tree_insert (mem_map, array);
867 switch (kind & typemask)
869 case GOMP_MAP_ALLOC:
870 case GOMP_MAP_FROM:
871 case GOMP_MAP_FORCE_ALLOC:
872 case GOMP_MAP_FORCE_FROM:
873 case GOMP_MAP_ALWAYS_FROM:
874 break;
875 case GOMP_MAP_TO:
876 case GOMP_MAP_TOFROM:
877 case GOMP_MAP_FORCE_TO:
878 case GOMP_MAP_FORCE_TOFROM:
879 case GOMP_MAP_ALWAYS_TO:
880 case GOMP_MAP_ALWAYS_TOFROM:
881 gomp_copy_host2dev (devicep,
882 (void *) (tgt->tgt_start
883 + k->tgt_offset),
884 (void *) k->host_start,
885 k->host_end - k->host_start, cbufp);
886 break;
887 case GOMP_MAP_POINTER:
888 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
889 k->tgt_offset, sizes[i], cbufp);
890 break;
891 case GOMP_MAP_TO_PSET:
892 gomp_copy_host2dev (devicep,
893 (void *) (tgt->tgt_start
894 + k->tgt_offset),
895 (void *) k->host_start,
896 k->host_end - k->host_start, cbufp);
898 for (j = i + 1; j < mapnum; j++)
899 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
901 & typemask))
902 break;
903 else if ((uintptr_t) hostaddrs[j] < k->host_start
904 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
905 > k->host_end))
906 break;
907 else
909 tgt->list[j].key = k;
910 tgt->list[j].copy_from = false;
911 tgt->list[j].always_copy_from = false;
912 if (k->refcount != REFCOUNT_INFINITY)
913 k->refcount++;
914 gomp_map_pointer (tgt,
915 (uintptr_t) *(void **) hostaddrs[j],
916 k->tgt_offset
917 + ((uintptr_t) hostaddrs[j]
918 - k->host_start),
919 sizes[j], cbufp);
920 i++;
922 break;
923 case GOMP_MAP_FORCE_PRESENT:
925 /* We already looked up the memory region above and it
926 was missing. */
927 size_t size = k->host_end - k->host_start;
928 gomp_mutex_unlock (&devicep->lock);
929 #ifdef HAVE_INTTYPES_H
930 gomp_fatal ("present clause: !acc_is_present (%p, "
931 "%"PRIu64" (0x%"PRIx64"))",
932 (void *) k->host_start,
933 (uint64_t) size, (uint64_t) size);
934 #else
935 gomp_fatal ("present clause: !acc_is_present (%p, "
936 "%lu (0x%lx))", (void *) k->host_start,
937 (unsigned long) size, (unsigned long) size);
938 #endif
940 break;
941 case GOMP_MAP_FORCE_DEVICEPTR:
942 assert (k->host_end - k->host_start == sizeof (void *));
943 gomp_copy_host2dev (devicep,
944 (void *) (tgt->tgt_start
945 + k->tgt_offset),
946 (void *) k->host_start,
947 sizeof (void *), cbufp);
948 break;
949 default:
950 gomp_mutex_unlock (&devicep->lock);
951 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
952 kind);
955 if (k->link_key)
957 /* Set link pointer on target to the device address of the
958 mapped object. */
959 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
960 /* We intentionally do not use coalescing here, as it's not
961 data allocated by the current call to this function. */
962 gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
963 &tgt_addr, sizeof (void *), NULL);
965 array++;
970 if (pragma_kind == GOMP_MAP_VARS_TARGET)
972 for (i = 0; i < mapnum; i++)
974 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
975 gomp_copy_host2dev (devicep,
976 (void *) (tgt->tgt_start + i * sizeof (void *)),
977 (void *) &cur_node.tgt_offset, sizeof (void *),
978 cbufp);
982 if (cbufp)
984 long c = 0;
985 for (c = 0; c < cbuf.chunk_cnt; ++c)
986 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
987 (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
988 cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
989 free (cbuf.buf);
992 /* If the variable from "omp target enter data" map-list was already mapped,
993 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
994 gomp_exit_data. */
995 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
997 free (tgt);
998 tgt = NULL;
1001 gomp_mutex_unlock (&devicep->lock);
1002 return tgt;
1005 static void
1006 gomp_unmap_tgt (struct target_mem_desc *tgt)
1008 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1009 if (tgt->tgt_end)
1010 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1012 free (tgt->array);
1013 free (tgt);
1016 attribute_hidden bool
1017 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1019 bool is_tgt_unmapped = false;
1020 splay_tree_remove (&devicep->mem_map, k);
1021 if (k->link_key)
1022 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1023 if (k->tgt->refcount > 1)
1024 k->tgt->refcount--;
1025 else
1027 is_tgt_unmapped = true;
1028 gomp_unmap_tgt (k->tgt);
1030 return is_tgt_unmapped;
1033 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1034 variables back from device to host: if it is false, it is assumed that this
1035 has been done already. */
1037 attribute_hidden void
1038 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1040 struct gomp_device_descr *devicep = tgt->device_descr;
1042 if (tgt->list_count == 0)
1044 free (tgt);
1045 return;
1048 gomp_mutex_lock (&devicep->lock);
1049 if (devicep->state == GOMP_DEVICE_FINALIZED)
1051 gomp_mutex_unlock (&devicep->lock);
1052 free (tgt->array);
1053 free (tgt);
1054 return;
1057 size_t i;
1058 for (i = 0; i < tgt->list_count; i++)
1060 splay_tree_key k = tgt->list[i].key;
1061 if (k == NULL)
1062 continue;
1064 bool do_unmap = false;
1065 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1066 k->refcount--;
1067 else if (k->refcount == 1)
1069 k->refcount--;
1070 do_unmap = true;
1073 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1074 || tgt->list[i].always_copy_from)
1075 gomp_copy_dev2host (devicep,
1076 (void *) (k->host_start + tgt->list[i].offset),
1077 (void *) (k->tgt->tgt_start + k->tgt_offset
1078 + tgt->list[i].offset),
1079 tgt->list[i].length);
1080 if (do_unmap)
1081 gomp_remove_var (devicep, k);
1084 if (tgt->refcount > 1)
1085 tgt->refcount--;
1086 else
1087 gomp_unmap_tgt (tgt);
1089 gomp_mutex_unlock (&devicep->lock);
1092 static void
1093 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1094 size_t *sizes, void *kinds, bool short_mapkind)
1096 size_t i;
1097 struct splay_tree_key_s cur_node;
1098 const int typemask = short_mapkind ? 0xff : 0x7;
1100 if (!devicep)
1101 return;
1103 if (mapnum == 0)
1104 return;
1106 gomp_mutex_lock (&devicep->lock);
1107 if (devicep->state == GOMP_DEVICE_FINALIZED)
1109 gomp_mutex_unlock (&devicep->lock);
1110 return;
1113 for (i = 0; i < mapnum; i++)
1114 if (sizes[i])
1116 cur_node.host_start = (uintptr_t) hostaddrs[i];
1117 cur_node.host_end = cur_node.host_start + sizes[i];
1118 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1119 if (n)
1121 int kind = get_kind (short_mapkind, kinds, i);
1122 if (n->host_start > cur_node.host_start
1123 || n->host_end < cur_node.host_end)
1125 gomp_mutex_unlock (&devicep->lock);
1126 gomp_fatal ("Trying to update [%p..%p) object when "
1127 "only [%p..%p) is mapped",
1128 (void *) cur_node.host_start,
1129 (void *) cur_node.host_end,
1130 (void *) n->host_start,
1131 (void *) n->host_end);
1135 void *hostaddr = (void *) cur_node.host_start;
1136 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1137 + cur_node.host_start - n->host_start);
1138 size_t size = cur_node.host_end - cur_node.host_start;
1140 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1141 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1142 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1143 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1146 gomp_mutex_unlock (&devicep->lock);
1149 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1150 And insert to splay tree the mapping between addresses from HOST_TABLE and
1151 from loaded target image. We rely in the host and device compiler
1152 emitting variable and functions in the same order. */
1154 static void
1155 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1156 const void *host_table, const void *target_data,
1157 bool is_register_lock)
1159 void **host_func_table = ((void ***) host_table)[0];
1160 void **host_funcs_end = ((void ***) host_table)[1];
1161 void **host_var_table = ((void ***) host_table)[2];
1162 void **host_vars_end = ((void ***) host_table)[3];
1164 /* The func table contains only addresses, the var table contains addresses
1165 and corresponding sizes. */
1166 int num_funcs = host_funcs_end - host_func_table;
1167 int num_vars = (host_vars_end - host_var_table) / 2;
1169 /* Load image to device and get target addresses for the image. */
1170 struct addr_pair *target_table = NULL;
1171 int i, num_target_entries;
1173 num_target_entries
1174 = devicep->load_image_func (devicep->target_id, version,
1175 target_data, &target_table);
1177 if (num_target_entries != num_funcs + num_vars)
1179 gomp_mutex_unlock (&devicep->lock);
1180 if (is_register_lock)
1181 gomp_mutex_unlock (&register_lock);
1182 gomp_fatal ("Cannot map target functions or variables"
1183 " (expected %u, have %u)", num_funcs + num_vars,
1184 num_target_entries);
1187 /* Insert host-target address mapping into splay tree. */
1188 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1189 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1190 tgt->refcount = REFCOUNT_INFINITY;
1191 tgt->tgt_start = 0;
1192 tgt->tgt_end = 0;
1193 tgt->to_free = NULL;
1194 tgt->prev = NULL;
1195 tgt->list_count = 0;
1196 tgt->device_descr = devicep;
1197 splay_tree_node array = tgt->array;
1199 for (i = 0; i < num_funcs; i++)
1201 splay_tree_key k = &array->key;
1202 k->host_start = (uintptr_t) host_func_table[i];
1203 k->host_end = k->host_start + 1;
1204 k->tgt = tgt;
1205 k->tgt_offset = target_table[i].start;
1206 k->refcount = REFCOUNT_INFINITY;
1207 k->link_key = NULL;
1208 array->left = NULL;
1209 array->right = NULL;
1210 splay_tree_insert (&devicep->mem_map, array);
1211 array++;
1214 /* Most significant bit of the size in host and target tables marks
1215 "omp declare target link" variables. */
1216 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1217 const uintptr_t size_mask = ~link_bit;
1219 for (i = 0; i < num_vars; i++)
1221 struct addr_pair *target_var = &target_table[num_funcs + i];
1222 uintptr_t target_size = target_var->end - target_var->start;
1224 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1226 gomp_mutex_unlock (&devicep->lock);
1227 if (is_register_lock)
1228 gomp_mutex_unlock (&register_lock);
1229 gomp_fatal ("Cannot map target variables (size mismatch)");
1232 splay_tree_key k = &array->key;
1233 k->host_start = (uintptr_t) host_var_table[i * 2];
1234 k->host_end
1235 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1236 k->tgt = tgt;
1237 k->tgt_offset = target_var->start;
1238 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1239 k->link_key = NULL;
1240 array->left = NULL;
1241 array->right = NULL;
1242 splay_tree_insert (&devicep->mem_map, array);
1243 array++;
1246 free (target_table);
1249 /* Unload the mappings described by target_data from device DEVICE_P.
1250 The device must be locked. */
1252 static void
1253 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1254 unsigned version,
1255 const void *host_table, const void *target_data)
1257 void **host_func_table = ((void ***) host_table)[0];
1258 void **host_funcs_end = ((void ***) host_table)[1];
1259 void **host_var_table = ((void ***) host_table)[2];
1260 void **host_vars_end = ((void ***) host_table)[3];
1262 /* The func table contains only addresses, the var table contains addresses
1263 and corresponding sizes. */
1264 int num_funcs = host_funcs_end - host_func_table;
1265 int num_vars = (host_vars_end - host_var_table) / 2;
1267 struct splay_tree_key_s k;
1268 splay_tree_key node = NULL;
1270 /* Find mapping at start of node array */
1271 if (num_funcs || num_vars)
1273 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1274 : (uintptr_t) host_var_table[0]);
1275 k.host_end = k.host_start + 1;
1276 node = splay_tree_lookup (&devicep->mem_map, &k);
1279 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1281 gomp_mutex_unlock (&devicep->lock);
1282 gomp_fatal ("image unload fail");
1285 /* Remove mappings from splay tree. */
1286 int i;
1287 for (i = 0; i < num_funcs; i++)
1289 k.host_start = (uintptr_t) host_func_table[i];
1290 k.host_end = k.host_start + 1;
1291 splay_tree_remove (&devicep->mem_map, &k);
1294 /* Most significant bit of the size in host and target tables marks
1295 "omp declare target link" variables. */
1296 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1297 const uintptr_t size_mask = ~link_bit;
1298 bool is_tgt_unmapped = false;
1300 for (i = 0; i < num_vars; i++)
1302 k.host_start = (uintptr_t) host_var_table[i * 2];
1303 k.host_end
1304 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1306 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1307 splay_tree_remove (&devicep->mem_map, &k);
1308 else
1310 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1311 is_tgt_unmapped = gomp_remove_var (devicep, n);
1315 if (node && !is_tgt_unmapped)
1317 free (node->tgt);
1318 free (node);
1322 /* This function should be called from every offload image while loading.
1323 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1324 the target, and TARGET_DATA needed by target plugin. */
1326 void
1327 GOMP_offload_register_ver (unsigned version, const void *host_table,
1328 int target_type, const void *target_data)
1330 int i;
1332 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1333 gomp_fatal ("Library too old for offload (version %u < %u)",
1334 GOMP_VERSION, GOMP_VERSION_LIB (version));
1336 gomp_mutex_lock (&register_lock);
1338 /* Load image to all initialized devices. */
1339 for (i = 0; i < num_devices; i++)
1341 struct gomp_device_descr *devicep = &devices[i];
1342 gomp_mutex_lock (&devicep->lock);
1343 if (devicep->type == target_type
1344 && devicep->state == GOMP_DEVICE_INITIALIZED)
1345 gomp_load_image_to_device (devicep, version,
1346 host_table, target_data, true);
1347 gomp_mutex_unlock (&devicep->lock);
1350 /* Insert image to array of pending images. */
1351 offload_images
1352 = gomp_realloc_unlock (offload_images,
1353 (num_offload_images + 1)
1354 * sizeof (struct offload_image_descr));
1355 offload_images[num_offload_images].version = version;
1356 offload_images[num_offload_images].type = target_type;
1357 offload_images[num_offload_images].host_table = host_table;
1358 offload_images[num_offload_images].target_data = target_data;
1360 num_offload_images++;
1361 gomp_mutex_unlock (&register_lock);
1364 void
1365 GOMP_offload_register (const void *host_table, int target_type,
1366 const void *target_data)
1368 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1371 /* This function should be called from every offload image while unloading.
1372 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1373 the target, and TARGET_DATA needed by target plugin. */
1375 void
1376 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1377 int target_type, const void *target_data)
1379 int i;
1381 gomp_mutex_lock (&register_lock);
1383 /* Unload image from all initialized devices. */
1384 for (i = 0; i < num_devices; i++)
1386 struct gomp_device_descr *devicep = &devices[i];
1387 gomp_mutex_lock (&devicep->lock);
1388 if (devicep->type == target_type
1389 && devicep->state == GOMP_DEVICE_INITIALIZED)
1390 gomp_unload_image_from_device (devicep, version,
1391 host_table, target_data);
1392 gomp_mutex_unlock (&devicep->lock);
1395 /* Remove image from array of pending images. */
1396 for (i = 0; i < num_offload_images; i++)
1397 if (offload_images[i].target_data == target_data)
1399 offload_images[i] = offload_images[--num_offload_images];
1400 break;
1403 gomp_mutex_unlock (&register_lock);
1406 void
1407 GOMP_offload_unregister (const void *host_table, int target_type,
1408 const void *target_data)
1410 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1413 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1414 must be locked on entry, and remains locked on return. */
1416 attribute_hidden void
1417 gomp_init_device (struct gomp_device_descr *devicep)
1419 int i;
1420 if (!devicep->init_device_func (devicep->target_id))
1422 gomp_mutex_unlock (&devicep->lock);
1423 gomp_fatal ("device initialization failed");
1426 /* Load to device all images registered by the moment. */
1427 for (i = 0; i < num_offload_images; i++)
1429 struct offload_image_descr *image = &offload_images[i];
1430 if (image->type == devicep->type)
1431 gomp_load_image_to_device (devicep, image->version,
1432 image->host_table, image->target_data,
1433 false);
1436 devicep->state = GOMP_DEVICE_INITIALIZED;
1439 attribute_hidden void
1440 gomp_unload_device (struct gomp_device_descr *devicep)
1442 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1444 unsigned i;
1446 /* Unload from device all images registered at the moment. */
1447 for (i = 0; i < num_offload_images; i++)
1449 struct offload_image_descr *image = &offload_images[i];
1450 if (image->type == devicep->type)
1451 gomp_unload_image_from_device (devicep, image->version,
1452 image->host_table,
1453 image->target_data);
1458 /* Free address mapping tables. MM must be locked on entry, and remains locked
1459 on return. */
1461 attribute_hidden void
1462 gomp_free_memmap (struct splay_tree_s *mem_map)
1464 while (mem_map->root)
1466 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1468 splay_tree_remove (mem_map, &mem_map->root->key);
1469 free (tgt->array);
1470 free (tgt);
1474 /* Host fallback for GOMP_target{,_ext} routines. */
1476 static void
1477 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1479 struct gomp_thread old_thr, *thr = gomp_thread ();
1480 old_thr = *thr;
1481 memset (thr, '\0', sizeof (*thr));
1482 if (gomp_places_list)
1484 thr->place = old_thr.place;
1485 thr->ts.place_partition_len = gomp_places_list_len;
1487 fn (hostaddrs);
1488 gomp_free_thread (thr);
1489 *thr = old_thr;
1492 /* Calculate alignment and size requirements of a private copy of data shared
1493 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1495 static inline void
1496 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1497 unsigned short *kinds, size_t *tgt_align,
1498 size_t *tgt_size)
1500 size_t i;
1501 for (i = 0; i < mapnum; i++)
1502 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1504 size_t align = (size_t) 1 << (kinds[i] >> 8);
1505 if (*tgt_align < align)
1506 *tgt_align = align;
1507 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1508 *tgt_size += sizes[i];
1512 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1514 static inline void
1515 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1516 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1517 size_t tgt_size)
1519 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1520 if (al)
1521 tgt += tgt_align - al;
1522 tgt_size = 0;
1523 size_t i;
1524 for (i = 0; i < mapnum; i++)
1525 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1527 size_t align = (size_t) 1 << (kinds[i] >> 8);
1528 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1529 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1530 hostaddrs[i] = tgt + tgt_size;
1531 tgt_size = tgt_size + sizes[i];
1535 /* Helper function of GOMP_target{,_ext} routines. */
1537 static void *
1538 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1539 void (*host_fn) (void *))
1541 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1542 return (void *) host_fn;
1543 else
1545 gomp_mutex_lock (&devicep->lock);
1546 if (devicep->state == GOMP_DEVICE_FINALIZED)
1548 gomp_mutex_unlock (&devicep->lock);
1549 return NULL;
1552 struct splay_tree_key_s k;
1553 k.host_start = (uintptr_t) host_fn;
1554 k.host_end = k.host_start + 1;
1555 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1556 gomp_mutex_unlock (&devicep->lock);
1557 if (tgt_fn == NULL)
1558 return NULL;
1560 return (void *) tgt_fn->tgt_offset;
1564 /* Called when encountering a target directive. If DEVICE
1565 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1566 GOMP_DEVICE_HOST_FALLBACK (or any value
1567 larger than last available hw device), use host fallback.
1568 FN is address of host code, UNUSED is part of the current ABI, but
1569 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1570 with MAPNUM entries, with addresses of the host objects,
1571 sizes of the host objects (resp. for pointer kind pointer bias
1572 and assumed sizeof (void *) size) and kinds. */
1574 void
1575 GOMP_target (int device, void (*fn) (void *), const void *unused,
1576 size_t mapnum, void **hostaddrs, size_t *sizes,
1577 unsigned char *kinds)
1579 struct gomp_device_descr *devicep = resolve_device (device);
1581 void *fn_addr;
1582 if (devicep == NULL
1583 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1584 /* All shared memory devices should use the GOMP_target_ext function. */
1585 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1586 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1587 return gomp_target_fallback (fn, hostaddrs);
1589 struct target_mem_desc *tgt_vars
1590 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1591 GOMP_MAP_VARS_TARGET);
1592 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1593 NULL);
1594 gomp_unmap_vars (tgt_vars, true);
1597 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1598 and several arguments have been added:
1599 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1600 DEPEND is array of dependencies, see GOMP_task for details.
1602 ARGS is a pointer to an array consisting of a variable number of both
1603 device-independent and device-specific arguments, which can take one two
1604 elements where the first specifies for which device it is intended, the type
1605 and optionally also the value. If the value is not present in the first
1606 one, the whole second element the actual value. The last element of the
1607 array is a single NULL. Among the device independent can be for example
1608 NUM_TEAMS and THREAD_LIMIT.
1610 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1611 that value, or 1 if teams construct is not present, or 0, if
1612 teams construct does not have num_teams clause and so the choice is
1613 implementation defined, and -1 if it can't be determined on the host
1614 what value will GOMP_teams have on the device.
1615 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1616 body with that value, or 0, if teams construct does not have thread_limit
1617 clause or the teams construct is not present, or -1 if it can't be
1618 determined on the host what value will GOMP_teams have on the device. */
1620 void
1621 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1622 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1623 unsigned int flags, void **depend, void **args)
1625 struct gomp_device_descr *devicep = resolve_device (device);
1626 size_t tgt_align = 0, tgt_size = 0;
1627 bool fpc_done = false;
1629 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1631 struct gomp_thread *thr = gomp_thread ();
1632 /* Create a team if we don't have any around, as nowait
1633 target tasks make sense to run asynchronously even when
1634 outside of any parallel. */
1635 if (__builtin_expect (thr->ts.team == NULL, 0))
1637 struct gomp_team *team = gomp_new_team (1);
1638 struct gomp_task *task = thr->task;
1639 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1640 team->prev_ts = thr->ts;
1641 thr->ts.team = team;
1642 thr->ts.team_id = 0;
1643 thr->ts.work_share = &team->work_shares[0];
1644 thr->ts.last_work_share = NULL;
1645 #ifdef HAVE_SYNC_BUILTINS
1646 thr->ts.single_count = 0;
1647 #endif
1648 thr->ts.static_trip = 0;
1649 thr->task = &team->implicit_task[0];
1650 gomp_init_task (thr->task, NULL, icv);
1651 if (task)
1653 thr->task = task;
1654 gomp_end_task ();
1655 free (task);
1656 thr->task = &team->implicit_task[0];
1658 else
1659 pthread_setspecific (gomp_thread_destructor, thr);
1661 if (thr->ts.team
1662 && !thr->task->final_task)
1664 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1665 sizes, kinds, flags, depend, args,
1666 GOMP_TARGET_TASK_BEFORE_MAP);
1667 return;
1671 /* If there are depend clauses, but nowait is not present
1672 (or we are in a final task), block the parent task until the
1673 dependencies are resolved and then just continue with the rest
1674 of the function as if it is a merged task. */
1675 if (depend != NULL)
1677 struct gomp_thread *thr = gomp_thread ();
1678 if (thr->task && thr->task->depend_hash)
1680 /* If we might need to wait, copy firstprivate now. */
1681 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1682 &tgt_align, &tgt_size);
1683 if (tgt_align)
1685 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1686 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1687 tgt_align, tgt_size);
1689 fpc_done = true;
1690 gomp_task_maybe_wait_for_dependencies (depend);
1694 void *fn_addr;
1695 if (devicep == NULL
1696 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1697 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1698 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1700 if (!fpc_done)
1702 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1703 &tgt_align, &tgt_size);
1704 if (tgt_align)
1706 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1707 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1708 tgt_align, tgt_size);
1711 gomp_target_fallback (fn, hostaddrs);
1712 return;
1715 struct target_mem_desc *tgt_vars;
1716 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1718 if (!fpc_done)
1720 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1721 &tgt_align, &tgt_size);
1722 if (tgt_align)
1724 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1725 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1726 tgt_align, tgt_size);
1729 tgt_vars = NULL;
1731 else
1732 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1733 true, GOMP_MAP_VARS_TARGET);
1734 devicep->run_func (devicep->target_id, fn_addr,
1735 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1736 args);
1737 if (tgt_vars)
1738 gomp_unmap_vars (tgt_vars, true);
1741 /* Host fallback for GOMP_target_data{,_ext} routines. */
1743 static void
1744 gomp_target_data_fallback (void)
1746 struct gomp_task_icv *icv = gomp_icv (false);
1747 if (icv->target_data)
1749 /* Even when doing a host fallback, if there are any active
1750 #pragma omp target data constructs, need to remember the
1751 new #pragma omp target data, otherwise GOMP_target_end_data
1752 would get out of sync. */
1753 struct target_mem_desc *tgt
1754 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1755 GOMP_MAP_VARS_DATA);
1756 tgt->prev = icv->target_data;
1757 icv->target_data = tgt;
1761 void
1762 GOMP_target_data (int device, const void *unused, size_t mapnum,
1763 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1765 struct gomp_device_descr *devicep = resolve_device (device);
1767 if (devicep == NULL
1768 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1769 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1770 return gomp_target_data_fallback ();
1772 struct target_mem_desc *tgt
1773 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1774 GOMP_MAP_VARS_DATA);
1775 struct gomp_task_icv *icv = gomp_icv (true);
1776 tgt->prev = icv->target_data;
1777 icv->target_data = tgt;
1780 void
1781 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1782 size_t *sizes, unsigned short *kinds)
1784 struct gomp_device_descr *devicep = resolve_device (device);
1786 if (devicep == NULL
1787 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1788 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1789 return gomp_target_data_fallback ();
1791 struct target_mem_desc *tgt
1792 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1793 GOMP_MAP_VARS_DATA);
1794 struct gomp_task_icv *icv = gomp_icv (true);
1795 tgt->prev = icv->target_data;
1796 icv->target_data = tgt;
1799 void
1800 GOMP_target_end_data (void)
1802 struct gomp_task_icv *icv = gomp_icv (false);
1803 if (icv->target_data)
1805 struct target_mem_desc *tgt = icv->target_data;
1806 icv->target_data = tgt->prev;
1807 gomp_unmap_vars (tgt, true);
1811 void
1812 GOMP_target_update (int device, const void *unused, size_t mapnum,
1813 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1815 struct gomp_device_descr *devicep = resolve_device (device);
1817 if (devicep == NULL
1818 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1819 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1820 return;
1822 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1825 void
1826 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1827 size_t *sizes, unsigned short *kinds,
1828 unsigned int flags, void **depend)
1830 struct gomp_device_descr *devicep = resolve_device (device);
1832 /* If there are depend clauses, but nowait is not present,
1833 block the parent task until the dependencies are resolved
1834 and then just continue with the rest of the function as if it
1835 is a merged task. Until we are able to schedule task during
1836 variable mapping or unmapping, ignore nowait if depend clauses
1837 are not present. */
1838 if (depend != NULL)
1840 struct gomp_thread *thr = gomp_thread ();
1841 if (thr->task && thr->task->depend_hash)
1843 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1844 && thr->ts.team
1845 && !thr->task->final_task)
1847 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1848 mapnum, hostaddrs, sizes, kinds,
1849 flags | GOMP_TARGET_FLAG_UPDATE,
1850 depend, NULL, GOMP_TARGET_TASK_DATA))
1851 return;
1853 else
1855 struct gomp_team *team = thr->ts.team;
1856 /* If parallel or taskgroup has been cancelled, don't start new
1857 tasks. */
1858 if (__builtin_expect (gomp_cancel_var, 0) && team)
1860 if (gomp_team_barrier_cancelled (&team->barrier))
1861 return;
1862 if (thr->task->taskgroup)
1864 if (thr->task->taskgroup->cancelled)
1865 return;
1866 if (thr->task->taskgroup->workshare
1867 && thr->task->taskgroup->prev
1868 && thr->task->taskgroup->prev->cancelled)
1869 return;
1873 gomp_task_maybe_wait_for_dependencies (depend);
1878 if (devicep == NULL
1879 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1880 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1881 return;
1883 struct gomp_thread *thr = gomp_thread ();
1884 struct gomp_team *team = thr->ts.team;
1885 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1886 if (__builtin_expect (gomp_cancel_var, 0) && team)
1888 if (gomp_team_barrier_cancelled (&team->barrier))
1889 return;
1890 if (thr->task->taskgroup)
1892 if (thr->task->taskgroup->cancelled)
1893 return;
1894 if (thr->task->taskgroup->workshare
1895 && thr->task->taskgroup->prev
1896 && thr->task->taskgroup->prev->cancelled)
1897 return;
1901 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1904 static void
1905 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1906 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1908 const int typemask = 0xff;
1909 size_t i;
1910 gomp_mutex_lock (&devicep->lock);
1911 if (devicep->state == GOMP_DEVICE_FINALIZED)
1913 gomp_mutex_unlock (&devicep->lock);
1914 return;
1917 for (i = 0; i < mapnum; i++)
1919 struct splay_tree_key_s cur_node;
1920 unsigned char kind = kinds[i] & typemask;
1921 switch (kind)
1923 case GOMP_MAP_FROM:
1924 case GOMP_MAP_ALWAYS_FROM:
1925 case GOMP_MAP_DELETE:
1926 case GOMP_MAP_RELEASE:
1927 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1928 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1929 cur_node.host_start = (uintptr_t) hostaddrs[i];
1930 cur_node.host_end = cur_node.host_start + sizes[i];
1931 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1932 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1933 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1934 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1935 if (!k)
1936 continue;
1938 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1939 k->refcount--;
1940 if ((kind == GOMP_MAP_DELETE
1941 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1942 && k->refcount != REFCOUNT_INFINITY)
1943 k->refcount = 0;
1945 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1946 || kind == GOMP_MAP_ALWAYS_FROM)
1947 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1948 (void *) (k->tgt->tgt_start + k->tgt_offset
1949 + cur_node.host_start
1950 - k->host_start),
1951 cur_node.host_end - cur_node.host_start);
1952 if (k->refcount == 0)
1954 splay_tree_remove (&devicep->mem_map, k);
1955 if (k->link_key)
1956 splay_tree_insert (&devicep->mem_map,
1957 (splay_tree_node) k->link_key);
1958 if (k->tgt->refcount > 1)
1959 k->tgt->refcount--;
1960 else
1961 gomp_unmap_tgt (k->tgt);
1964 break;
1965 default:
1966 gomp_mutex_unlock (&devicep->lock);
1967 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1968 kind);
1972 gomp_mutex_unlock (&devicep->lock);
1975 void
1976 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1977 size_t *sizes, unsigned short *kinds,
1978 unsigned int flags, void **depend)
1980 struct gomp_device_descr *devicep = resolve_device (device);
1982 /* If there are depend clauses, but nowait is not present,
1983 block the parent task until the dependencies are resolved
1984 and then just continue with the rest of the function as if it
1985 is a merged task. Until we are able to schedule task during
1986 variable mapping or unmapping, ignore nowait if depend clauses
1987 are not present. */
1988 if (depend != NULL)
1990 struct gomp_thread *thr = gomp_thread ();
1991 if (thr->task && thr->task->depend_hash)
1993 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1994 && thr->ts.team
1995 && !thr->task->final_task)
1997 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1998 mapnum, hostaddrs, sizes, kinds,
1999 flags, depend, NULL,
2000 GOMP_TARGET_TASK_DATA))
2001 return;
2003 else
2005 struct gomp_team *team = thr->ts.team;
2006 /* If parallel or taskgroup has been cancelled, don't start new
2007 tasks. */
2008 if (__builtin_expect (gomp_cancel_var, 0) && team)
2010 if (gomp_team_barrier_cancelled (&team->barrier))
2011 return;
2012 if (thr->task->taskgroup)
2014 if (thr->task->taskgroup->cancelled)
2015 return;
2016 if (thr->task->taskgroup->workshare
2017 && thr->task->taskgroup->prev
2018 && thr->task->taskgroup->prev->cancelled)
2019 return;
2023 gomp_task_maybe_wait_for_dependencies (depend);
2028 if (devicep == NULL
2029 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2030 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2031 return;
2033 struct gomp_thread *thr = gomp_thread ();
2034 struct gomp_team *team = thr->ts.team;
2035 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2036 if (__builtin_expect (gomp_cancel_var, 0) && team)
2038 if (gomp_team_barrier_cancelled (&team->barrier))
2039 return;
2040 if (thr->task->taskgroup)
2042 if (thr->task->taskgroup->cancelled)
2043 return;
2044 if (thr->task->taskgroup->workshare
2045 && thr->task->taskgroup->prev
2046 && thr->task->taskgroup->prev->cancelled)
2047 return;
2051 size_t i;
2052 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2053 for (i = 0; i < mapnum; i++)
2054 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2056 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2057 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2058 i += sizes[i];
2060 else
2061 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2062 true, GOMP_MAP_VARS_ENTER_DATA);
2063 else
2064 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2067 bool
2068 gomp_target_task_fn (void *data)
2070 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2071 struct gomp_device_descr *devicep = ttask->devicep;
2073 if (ttask->fn != NULL)
2075 void *fn_addr;
2076 if (devicep == NULL
2077 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2078 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2079 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2081 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2082 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2083 return false;
2086 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2088 if (ttask->tgt)
2089 gomp_unmap_vars (ttask->tgt, true);
2090 return false;
2093 void *actual_arguments;
2094 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2096 ttask->tgt = NULL;
2097 actual_arguments = ttask->hostaddrs;
2099 else
2101 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2102 NULL, ttask->sizes, ttask->kinds, true,
2103 GOMP_MAP_VARS_TARGET);
2104 actual_arguments = (void *) ttask->tgt->tgt_start;
2106 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2108 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2109 ttask->args, (void *) ttask);
2110 return true;
2112 else if (devicep == NULL
2113 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2114 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2115 return false;
2117 size_t i;
2118 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2119 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2120 ttask->kinds, true);
2121 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2122 for (i = 0; i < ttask->mapnum; i++)
2123 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2125 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2126 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2127 GOMP_MAP_VARS_ENTER_DATA);
2128 i += ttask->sizes[i];
2130 else
2131 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2132 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2133 else
2134 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2135 ttask->kinds);
2136 return false;
2139 void
2140 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2142 if (thread_limit)
2144 struct gomp_task_icv *icv = gomp_icv (true);
2145 icv->thread_limit_var
2146 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2148 (void) num_teams;
2151 void *
2152 omp_target_alloc (size_t size, int device_num)
2154 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2155 return malloc (size);
2157 if (device_num < 0)
2158 return NULL;
2160 struct gomp_device_descr *devicep = resolve_device (device_num);
2161 if (devicep == NULL)
2162 return NULL;
2164 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2165 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2166 return malloc (size);
2168 gomp_mutex_lock (&devicep->lock);
2169 void *ret = devicep->alloc_func (devicep->target_id, size);
2170 gomp_mutex_unlock (&devicep->lock);
2171 return ret;
2174 void
2175 omp_target_free (void *device_ptr, int device_num)
2177 if (device_ptr == NULL)
2178 return;
2180 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2182 free (device_ptr);
2183 return;
2186 if (device_num < 0)
2187 return;
2189 struct gomp_device_descr *devicep = resolve_device (device_num);
2190 if (devicep == NULL)
2191 return;
2193 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2194 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2196 free (device_ptr);
2197 return;
2200 gomp_mutex_lock (&devicep->lock);
2201 gomp_free_device_memory (devicep, device_ptr);
2202 gomp_mutex_unlock (&devicep->lock);
2206 omp_target_is_present (const void *ptr, int device_num)
2208 if (ptr == NULL)
2209 return 1;
2211 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2212 return 1;
2214 if (device_num < 0)
2215 return 0;
2217 struct gomp_device_descr *devicep = resolve_device (device_num);
2218 if (devicep == NULL)
2219 return 0;
2221 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2222 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2223 return 1;
2225 gomp_mutex_lock (&devicep->lock);
2226 struct splay_tree_s *mem_map = &devicep->mem_map;
2227 struct splay_tree_key_s cur_node;
2229 cur_node.host_start = (uintptr_t) ptr;
2230 cur_node.host_end = cur_node.host_start;
2231 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2232 int ret = n != NULL;
2233 gomp_mutex_unlock (&devicep->lock);
2234 return ret;
2238 omp_target_memcpy (void *dst, const void *src, size_t length,
2239 size_t dst_offset, size_t src_offset, int dst_device_num,
2240 int src_device_num)
2242 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2243 bool ret;
2245 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2247 if (dst_device_num < 0)
2248 return EINVAL;
2250 dst_devicep = resolve_device (dst_device_num);
2251 if (dst_devicep == NULL)
2252 return EINVAL;
2254 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2255 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2256 dst_devicep = NULL;
2258 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2260 if (src_device_num < 0)
2261 return EINVAL;
2263 src_devicep = resolve_device (src_device_num);
2264 if (src_devicep == NULL)
2265 return EINVAL;
2267 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2268 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2269 src_devicep = NULL;
2271 if (src_devicep == NULL && dst_devicep == NULL)
2273 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2274 return 0;
2276 if (src_devicep == NULL)
2278 gomp_mutex_lock (&dst_devicep->lock);
2279 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2280 (char *) dst + dst_offset,
2281 (char *) src + src_offset, length);
2282 gomp_mutex_unlock (&dst_devicep->lock);
2283 return (ret ? 0 : EINVAL);
2285 if (dst_devicep == NULL)
2287 gomp_mutex_lock (&src_devicep->lock);
2288 ret = src_devicep->dev2host_func (src_devicep->target_id,
2289 (char *) dst + dst_offset,
2290 (char *) src + src_offset, length);
2291 gomp_mutex_unlock (&src_devicep->lock);
2292 return (ret ? 0 : EINVAL);
2294 if (src_devicep == dst_devicep)
2296 gomp_mutex_lock (&src_devicep->lock);
2297 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2298 (char *) dst + dst_offset,
2299 (char *) src + src_offset, length);
2300 gomp_mutex_unlock (&src_devicep->lock);
2301 return (ret ? 0 : EINVAL);
2303 return EINVAL;
2306 static int
2307 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2308 int num_dims, const size_t *volume,
2309 const size_t *dst_offsets,
2310 const size_t *src_offsets,
2311 const size_t *dst_dimensions,
2312 const size_t *src_dimensions,
2313 struct gomp_device_descr *dst_devicep,
2314 struct gomp_device_descr *src_devicep)
2316 size_t dst_slice = element_size;
2317 size_t src_slice = element_size;
2318 size_t j, dst_off, src_off, length;
2319 int i, ret;
2321 if (num_dims == 1)
2323 if (__builtin_mul_overflow (element_size, volume[0], &length)
2324 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2325 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2326 return EINVAL;
2327 if (dst_devicep == NULL && src_devicep == NULL)
2329 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2330 length);
2331 ret = 1;
2333 else if (src_devicep == NULL)
2334 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2335 (char *) dst + dst_off,
2336 (const char *) src + src_off,
2337 length);
2338 else if (dst_devicep == NULL)
2339 ret = src_devicep->dev2host_func (src_devicep->target_id,
2340 (char *) dst + dst_off,
2341 (const char *) src + src_off,
2342 length);
2343 else if (src_devicep == dst_devicep)
2344 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2345 (char *) dst + dst_off,
2346 (const char *) src + src_off,
2347 length);
2348 else
2349 ret = 0;
2350 return ret ? 0 : EINVAL;
2353 /* FIXME: it would be nice to have some plugin function to handle
2354 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2355 be handled in the generic recursion below, and for host-host it
2356 should be used even for any num_dims >= 2. */
2358 for (i = 1; i < num_dims; i++)
2359 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2360 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2361 return EINVAL;
2362 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2363 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2364 return EINVAL;
2365 for (j = 0; j < volume[0]; j++)
2367 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2368 (const char *) src + src_off,
2369 element_size, num_dims - 1,
2370 volume + 1, dst_offsets + 1,
2371 src_offsets + 1, dst_dimensions + 1,
2372 src_dimensions + 1, dst_devicep,
2373 src_devicep);
2374 if (ret)
2375 return ret;
2376 dst_off += dst_slice;
2377 src_off += src_slice;
2379 return 0;
2383 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2384 int num_dims, const size_t *volume,
2385 const size_t *dst_offsets,
2386 const size_t *src_offsets,
2387 const size_t *dst_dimensions,
2388 const size_t *src_dimensions,
2389 int dst_device_num, int src_device_num)
2391 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2393 if (!dst && !src)
2394 return INT_MAX;
2396 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2398 if (dst_device_num < 0)
2399 return EINVAL;
2401 dst_devicep = resolve_device (dst_device_num);
2402 if (dst_devicep == NULL)
2403 return EINVAL;
2405 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2406 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2407 dst_devicep = NULL;
2409 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2411 if (src_device_num < 0)
2412 return EINVAL;
2414 src_devicep = resolve_device (src_device_num);
2415 if (src_devicep == NULL)
2416 return EINVAL;
2418 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2419 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2420 src_devicep = NULL;
2423 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2424 return EINVAL;
2426 if (src_devicep)
2427 gomp_mutex_lock (&src_devicep->lock);
2428 else if (dst_devicep)
2429 gomp_mutex_lock (&dst_devicep->lock);
2430 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2431 volume, dst_offsets, src_offsets,
2432 dst_dimensions, src_dimensions,
2433 dst_devicep, src_devicep);
2434 if (src_devicep)
2435 gomp_mutex_unlock (&src_devicep->lock);
2436 else if (dst_devicep)
2437 gomp_mutex_unlock (&dst_devicep->lock);
2438 return ret;
2442 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2443 size_t size, size_t device_offset, int device_num)
2445 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2446 return EINVAL;
2448 if (device_num < 0)
2449 return EINVAL;
2451 struct gomp_device_descr *devicep = resolve_device (device_num);
2452 if (devicep == NULL)
2453 return EINVAL;
2455 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2456 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2457 return EINVAL;
2459 gomp_mutex_lock (&devicep->lock);
2461 struct splay_tree_s *mem_map = &devicep->mem_map;
2462 struct splay_tree_key_s cur_node;
2463 int ret = EINVAL;
2465 cur_node.host_start = (uintptr_t) host_ptr;
2466 cur_node.host_end = cur_node.host_start + size;
2467 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2468 if (n)
2470 if (n->tgt->tgt_start + n->tgt_offset
2471 == (uintptr_t) device_ptr + device_offset
2472 && n->host_start <= cur_node.host_start
2473 && n->host_end >= cur_node.host_end)
2474 ret = 0;
2476 else
2478 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2479 tgt->array = gomp_malloc (sizeof (*tgt->array));
2480 tgt->refcount = 1;
2481 tgt->tgt_start = 0;
2482 tgt->tgt_end = 0;
2483 tgt->to_free = NULL;
2484 tgt->prev = NULL;
2485 tgt->list_count = 0;
2486 tgt->device_descr = devicep;
2487 splay_tree_node array = tgt->array;
2488 splay_tree_key k = &array->key;
2489 k->host_start = cur_node.host_start;
2490 k->host_end = cur_node.host_end;
2491 k->tgt = tgt;
2492 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2493 k->refcount = REFCOUNT_INFINITY;
2494 array->left = NULL;
2495 array->right = NULL;
2496 splay_tree_insert (&devicep->mem_map, array);
2497 ret = 0;
2499 gomp_mutex_unlock (&devicep->lock);
2500 return ret;
2504 omp_target_disassociate_ptr (const void *ptr, int device_num)
2506 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2507 return EINVAL;
2509 if (device_num < 0)
2510 return EINVAL;
2512 struct gomp_device_descr *devicep = resolve_device (device_num);
2513 if (devicep == NULL)
2514 return EINVAL;
2516 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2517 return EINVAL;
2519 gomp_mutex_lock (&devicep->lock);
2521 struct splay_tree_s *mem_map = &devicep->mem_map;
2522 struct splay_tree_key_s cur_node;
2523 int ret = EINVAL;
2525 cur_node.host_start = (uintptr_t) ptr;
2526 cur_node.host_end = cur_node.host_start;
2527 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2528 if (n
2529 && n->host_start == cur_node.host_start
2530 && n->refcount == REFCOUNT_INFINITY
2531 && n->tgt->tgt_start == 0
2532 && n->tgt->to_free == NULL
2533 && n->tgt->refcount == 1
2534 && n->tgt->list_count == 0)
2536 splay_tree_remove (&devicep->mem_map, n);
2537 gomp_unmap_tgt (n->tgt);
2538 ret = 0;
2541 gomp_mutex_unlock (&devicep->lock);
2542 return ret;
2546 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2548 (void) kind;
2549 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2550 return gomp_pause_host ();
2551 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2552 return -1;
2553 /* Do nothing for target devices for now. */
2554 return 0;
2558 omp_pause_resource_all (omp_pause_resource_t kind)
2560 (void) kind;
2561 if (gomp_pause_host ())
2562 return -1;
2563 /* Do nothing for target devices for now. */
2564 return 0;
2567 ialias (omp_pause_resource)
2568 ialias (omp_pause_resource_all)
2570 #ifdef PLUGIN_SUPPORT
2572 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2573 in PLUGIN_NAME.
2574 The handles of the found functions are stored in the corresponding fields
2575 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2577 static bool
2578 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2579 const char *plugin_name)
2581 const char *err = NULL, *last_missing = NULL;
2583 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2584 if (!plugin_handle)
2585 goto dl_fail;
2587 /* Check if all required functions are available in the plugin and store
2588 their handlers. None of the symbols can legitimately be NULL,
2589 so we don't need to check dlerror all the time. */
2590 #define DLSYM(f) \
2591 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2592 goto dl_fail
2593 /* Similar, but missing functions are not an error. Return false if
2594 failed, true otherwise. */
2595 #define DLSYM_OPT(f, n) \
2596 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2597 || (last_missing = #n, 0))
2599 DLSYM (version);
2600 if (device->version_func () != GOMP_VERSION)
2602 err = "plugin version mismatch";
2603 goto fail;
2606 DLSYM (get_name);
2607 DLSYM (get_caps);
2608 DLSYM (get_type);
2609 DLSYM (get_num_devices);
2610 DLSYM (init_device);
2611 DLSYM (fini_device);
2612 DLSYM (load_image);
2613 DLSYM (unload_image);
2614 DLSYM (alloc);
2615 DLSYM (free);
2616 DLSYM (dev2host);
2617 DLSYM (host2dev);
2618 device->capabilities = device->get_caps_func ();
2619 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2621 DLSYM (run);
2622 DLSYM (async_run);
2623 DLSYM_OPT (can_run, can_run);
2624 DLSYM (dev2dev);
2626 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2628 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2629 || !DLSYM_OPT (openacc.register_async_cleanup,
2630 openacc_register_async_cleanup)
2631 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2632 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2633 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2634 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2635 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2636 || !DLSYM_OPT (openacc.async_wait_all_async,
2637 openacc_async_wait_all_async)
2638 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2639 || !DLSYM_OPT (openacc.create_thread_data,
2640 openacc_create_thread_data)
2641 || !DLSYM_OPT (openacc.destroy_thread_data,
2642 openacc_destroy_thread_data))
2644 /* Require all the OpenACC handlers if we have
2645 GOMP_OFFLOAD_CAP_OPENACC_200. */
2646 err = "plugin missing OpenACC handler function";
2647 goto fail;
2650 unsigned cuda = 0;
2651 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2652 openacc_cuda_get_current_device);
2653 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2654 openacc_cuda_get_current_context);
2655 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2656 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2657 if (cuda && cuda != 4)
2659 /* Make sure all the CUDA functions are there if any of them are. */
2660 err = "plugin missing OpenACC CUDA handler function";
2661 goto fail;
2664 #undef DLSYM
2665 #undef DLSYM_OPT
2667 return 1;
2669 dl_fail:
2670 err = dlerror ();
2671 fail:
2672 gomp_error ("while loading %s: %s", plugin_name, err);
2673 if (last_missing)
2674 gomp_error ("missing function was %s", last_missing);
2675 if (plugin_handle)
2676 dlclose (plugin_handle);
2678 return 0;
2681 /* This function finalizes all initialized devices. */
2683 static void
2684 gomp_target_fini (void)
2686 int i;
2687 for (i = 0; i < num_devices; i++)
2689 bool ret = true;
2690 struct gomp_device_descr *devicep = &devices[i];
2691 gomp_mutex_lock (&devicep->lock);
2692 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2694 ret = devicep->fini_device_func (devicep->target_id);
2695 devicep->state = GOMP_DEVICE_FINALIZED;
2697 gomp_mutex_unlock (&devicep->lock);
2698 if (!ret)
2699 gomp_fatal ("device finalization failed");
2703 /* This function initializes the runtime needed for offloading.
2704 It parses the list of offload targets and tries to load the plugins for
2705 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2706 will be set, and the array DEVICES initialized, containing descriptors for
2707 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2708 by the others. */
2710 static void
2711 gomp_target_init (void)
2713 const char *prefix ="libgomp-plugin-";
2714 const char *suffix = SONAME_SUFFIX (1);
2715 const char *cur, *next;
2716 char *plugin_name;
2717 int i, new_num_devices;
2719 num_devices = 0;
2720 devices = NULL;
2722 cur = OFFLOAD_TARGETS;
2723 if (*cur)
2726 struct gomp_device_descr current_device;
2727 size_t prefix_len, suffix_len, cur_len;
2729 next = strchr (cur, ',');
2731 prefix_len = strlen (prefix);
2732 cur_len = next ? next - cur : strlen (cur);
2733 suffix_len = strlen (suffix);
2735 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2736 if (!plugin_name)
2738 num_devices = 0;
2739 break;
2742 memcpy (plugin_name, prefix, prefix_len);
2743 memcpy (plugin_name + prefix_len, cur, cur_len);
2744 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2746 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2748 new_num_devices = current_device.get_num_devices_func ();
2749 if (new_num_devices >= 1)
2751 /* Augment DEVICES and NUM_DEVICES. */
2753 devices = realloc (devices, (num_devices + new_num_devices)
2754 * sizeof (struct gomp_device_descr));
2755 if (!devices)
2757 num_devices = 0;
2758 free (plugin_name);
2759 break;
2762 current_device.name = current_device.get_name_func ();
2763 /* current_device.capabilities has already been set. */
2764 current_device.type = current_device.get_type_func ();
2765 current_device.mem_map.root = NULL;
2766 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2767 current_device.openacc.data_environ = NULL;
2768 for (i = 0; i < new_num_devices; i++)
2770 current_device.target_id = i;
2771 devices[num_devices] = current_device;
2772 gomp_mutex_init (&devices[num_devices].lock);
2773 num_devices++;
2778 free (plugin_name);
2779 cur = next + 1;
2781 while (next);
2783 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2784 NUM_DEVICES_OPENMP. */
2785 struct gomp_device_descr *devices_s
2786 = malloc (num_devices * sizeof (struct gomp_device_descr));
2787 if (!devices_s)
2789 num_devices = 0;
2790 free (devices);
2791 devices = NULL;
2793 num_devices_openmp = 0;
2794 for (i = 0; i < num_devices; i++)
2795 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2796 devices_s[num_devices_openmp++] = devices[i];
2797 int num_devices_after_openmp = num_devices_openmp;
2798 for (i = 0; i < num_devices; i++)
2799 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2800 devices_s[num_devices_after_openmp++] = devices[i];
2801 free (devices);
2802 devices = devices_s;
2804 for (i = 0; i < num_devices; i++)
2806 /* The 'devices' array can be moved (by the realloc call) until we have
2807 found all the plugins, so registering with the OpenACC runtime (which
2808 takes a copy of the pointer argument) must be delayed until now. */
2809 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2810 goacc_register (&devices[i]);
2813 if (atexit (gomp_target_fini) != 0)
2814 gomp_fatal ("atexit failed");
2817 #else /* PLUGIN_SUPPORT */
2818 /* If dlfcn.h is unavailable we always fallback to host execution.
2819 GOMP_target* routines are just stubs for this case. */
2820 static void
2821 gomp_target_init (void)
2824 #endif /* PLUGIN_SUPPORT */