Record configure regenerate
[official-gcc.git] / libgomp / target.c
blobf1f58492ee5490c01731f10be39dc08a2665d3de
1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
166 gomp_map_0len_lookup found oldn for newn.
167 Helper function of gomp_map_vars. */
169 static inline void
170 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
171 splay_tree_key newn, struct target_var_desc *tgt_var,
172 unsigned char kind)
174 tgt_var->key = oldn;
175 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
176 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
177 tgt_var->offset = newn->host_start - oldn->host_start;
178 tgt_var->length = newn->host_end - newn->host_start;
180 if ((kind & GOMP_MAP_FLAG_FORCE)
181 || oldn->host_start > newn->host_start
182 || oldn->host_end < newn->host_end)
184 gomp_mutex_unlock (&devicep->lock);
185 gomp_fatal ("Trying to map into device [%p..%p) object when "
186 "[%p..%p) is already mapped",
187 (void *) newn->host_start, (void *) newn->host_end,
188 (void *) oldn->host_start, (void *) oldn->host_end);
191 if (GOMP_MAP_ALWAYS_TO_P (kind))
192 devicep->host2dev_func (devicep->target_id,
193 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
194 + newn->host_start - oldn->host_start),
195 (void *) newn->host_start,
196 newn->host_end - newn->host_start);
197 if (oldn->refcount != REFCOUNT_INFINITY)
198 oldn->refcount++;
201 static int
202 get_kind (bool short_mapkind, void *kinds, int idx)
204 return short_mapkind ? ((unsigned short *) kinds)[idx]
205 : ((unsigned char *) kinds)[idx];
208 static void
209 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
210 uintptr_t target_offset, uintptr_t bias)
212 struct gomp_device_descr *devicep = tgt->device_descr;
213 struct splay_tree_s *mem_map = &devicep->mem_map;
214 struct splay_tree_key_s cur_node;
216 cur_node.host_start = host_ptr;
217 if (cur_node.host_start == (uintptr_t) NULL)
219 cur_node.tgt_offset = (uintptr_t) NULL;
220 /* FIXME: see comment about coalescing host/dev transfers below. */
221 devicep->host2dev_func (devicep->target_id,
222 (void *) (tgt->tgt_start + target_offset),
223 (void *) &cur_node.tgt_offset,
224 sizeof (void *));
225 return;
227 /* Add bias to the pointer value. */
228 cur_node.host_start += bias;
229 cur_node.host_end = cur_node.host_start;
230 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
231 if (n == NULL)
233 gomp_mutex_unlock (&devicep->lock);
234 gomp_fatal ("Pointer target of array section wasn't mapped");
236 cur_node.host_start -= n->host_start;
237 cur_node.tgt_offset
238 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
239 /* At this point tgt_offset is target address of the
240 array section. Now subtract bias to get what we want
241 to initialize the pointer with. */
242 cur_node.tgt_offset -= bias;
243 /* FIXME: see comment about coalescing host/dev transfers below. */
244 devicep->host2dev_func (devicep->target_id,
245 (void *) (tgt->tgt_start + target_offset),
246 (void *) &cur_node.tgt_offset,
247 sizeof (void *));
250 static void
251 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
252 size_t first, size_t i, void **hostaddrs,
253 size_t *sizes, void *kinds)
255 struct gomp_device_descr *devicep = tgt->device_descr;
256 struct splay_tree_s *mem_map = &devicep->mem_map;
257 struct splay_tree_key_s cur_node;
258 int kind;
259 const bool short_mapkind = true;
260 const int typemask = short_mapkind ? 0xff : 0x7;
262 cur_node.host_start = (uintptr_t) hostaddrs[i];
263 cur_node.host_end = cur_node.host_start + sizes[i];
264 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
265 kind = get_kind (short_mapkind, kinds, i);
266 if (n2
267 && n2->tgt == n->tgt
268 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
270 gomp_map_vars_existing (devicep, n2, &cur_node,
271 &tgt->list[i], kind & typemask);
272 return;
274 if (sizes[i] == 0)
276 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
278 cur_node.host_start--;
279 n2 = splay_tree_lookup (mem_map, &cur_node);
280 cur_node.host_start++;
281 if (n2
282 && n2->tgt == n->tgt
283 && n2->host_start - n->host_start
284 == n2->tgt_offset - n->tgt_offset)
286 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
287 kind & typemask);
288 return;
291 cur_node.host_end++;
292 n2 = splay_tree_lookup (mem_map, &cur_node);
293 cur_node.host_end--;
294 if (n2
295 && n2->tgt == n->tgt
296 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
298 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
299 kind & typemask);
300 return;
303 gomp_mutex_unlock (&devicep->lock);
304 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
305 "other mapped elements from the same structure weren't mapped "
306 "together with it", (void *) cur_node.host_start,
307 (void *) cur_node.host_end);
310 static inline uintptr_t
311 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
313 if (tgt->list[i].key != NULL)
314 return tgt->list[i].key->tgt->tgt_start
315 + tgt->list[i].key->tgt_offset
316 + tgt->list[i].offset;
317 if (tgt->list[i].offset == ~(uintptr_t) 0)
318 return (uintptr_t) hostaddrs[i];
319 if (tgt->list[i].offset == ~(uintptr_t) 1)
320 return 0;
321 if (tgt->list[i].offset == ~(uintptr_t) 2)
322 return tgt->list[i + 1].key->tgt->tgt_start
323 + tgt->list[i + 1].key->tgt_offset
324 + tgt->list[i + 1].offset
325 + (uintptr_t) hostaddrs[i]
326 - (uintptr_t) hostaddrs[i + 1];
327 return tgt->tgt_start + tgt->list[i].offset;
330 attribute_hidden struct target_mem_desc *
331 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
332 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
333 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
335 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
336 bool has_firstprivate = false;
337 const int rshift = short_mapkind ? 8 : 3;
338 const int typemask = short_mapkind ? 0xff : 0x7;
339 struct splay_tree_s *mem_map = &devicep->mem_map;
340 struct splay_tree_key_s cur_node;
341 struct target_mem_desc *tgt
342 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
343 tgt->list_count = mapnum;
344 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
345 tgt->device_descr = devicep;
347 if (mapnum == 0)
349 tgt->tgt_start = 0;
350 tgt->tgt_end = 0;
351 return tgt;
354 tgt_align = sizeof (void *);
355 tgt_size = 0;
356 if (pragma_kind == GOMP_MAP_VARS_TARGET)
358 size_t align = 4 * sizeof (void *);
359 tgt_align = align;
360 tgt_size = mapnum * sizeof (void *);
363 gomp_mutex_lock (&devicep->lock);
364 if (devicep->state == GOMP_DEVICE_FINALIZED)
366 gomp_mutex_unlock (&devicep->lock);
367 free (tgt);
368 return NULL;
371 for (i = 0; i < mapnum; i++)
373 int kind = get_kind (short_mapkind, kinds, i);
374 if (hostaddrs[i] == NULL
375 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
377 tgt->list[i].key = NULL;
378 tgt->list[i].offset = ~(uintptr_t) 0;
379 continue;
381 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
383 cur_node.host_start = (uintptr_t) hostaddrs[i];
384 cur_node.host_end = cur_node.host_start;
385 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
386 if (n == NULL)
388 gomp_mutex_unlock (&devicep->lock);
389 gomp_fatal ("use_device_ptr pointer wasn't mapped");
391 cur_node.host_start -= n->host_start;
392 hostaddrs[i]
393 = (void *) (n->tgt->tgt_start + n->tgt_offset
394 + cur_node.host_start);
395 tgt->list[i].key = NULL;
396 tgt->list[i].offset = ~(uintptr_t) 0;
397 continue;
399 else if ((kind & typemask) == GOMP_MAP_STRUCT)
401 size_t first = i + 1;
402 size_t last = i + sizes[i];
403 cur_node.host_start = (uintptr_t) hostaddrs[i];
404 cur_node.host_end = (uintptr_t) hostaddrs[last]
405 + sizes[last];
406 tgt->list[i].key = NULL;
407 tgt->list[i].offset = ~(uintptr_t) 2;
408 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
409 if (n == NULL)
411 size_t align = (size_t) 1 << (kind >> rshift);
412 if (tgt_align < align)
413 tgt_align = align;
414 tgt_size -= (uintptr_t) hostaddrs[first]
415 - (uintptr_t) hostaddrs[i];
416 tgt_size = (tgt_size + align - 1) & ~(align - 1);
417 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
418 not_found_cnt += last - i;
419 for (i = first; i <= last; i++)
420 tgt->list[i].key = NULL;
421 i--;
422 continue;
424 for (i = first; i <= last; i++)
425 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
426 sizes, kinds);
427 i--;
428 continue;
430 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
432 tgt->list[i].key = NULL;
433 tgt->list[i].offset = ~(uintptr_t) 1;
434 has_firstprivate = true;
435 continue;
437 cur_node.host_start = (uintptr_t) hostaddrs[i];
438 if (!GOMP_MAP_POINTER_P (kind & typemask))
439 cur_node.host_end = cur_node.host_start + sizes[i];
440 else
441 cur_node.host_end = cur_node.host_start + sizeof (void *);
442 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
444 tgt->list[i].key = NULL;
446 size_t align = (size_t) 1 << (kind >> rshift);
447 if (tgt_align < align)
448 tgt_align = align;
449 tgt_size = (tgt_size + align - 1) & ~(align - 1);
450 tgt_size += cur_node.host_end - cur_node.host_start;
451 has_firstprivate = true;
452 continue;
454 splay_tree_key n;
455 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
457 n = gomp_map_0len_lookup (mem_map, &cur_node);
458 if (!n)
460 tgt->list[i].key = NULL;
461 tgt->list[i].offset = ~(uintptr_t) 1;
462 continue;
465 else
466 n = splay_tree_lookup (mem_map, &cur_node);
467 if (n && n->refcount != REFCOUNT_LINK)
468 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
469 kind & typemask);
470 else
472 tgt->list[i].key = NULL;
474 size_t align = (size_t) 1 << (kind >> rshift);
475 not_found_cnt++;
476 if (tgt_align < align)
477 tgt_align = align;
478 tgt_size = (tgt_size + align - 1) & ~(align - 1);
479 tgt_size += cur_node.host_end - cur_node.host_start;
480 if ((kind & typemask) == GOMP_MAP_TO_PSET)
482 size_t j;
483 for (j = i + 1; j < mapnum; j++)
484 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
485 & typemask))
486 break;
487 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
488 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
489 > cur_node.host_end))
490 break;
491 else
493 tgt->list[j].key = NULL;
494 i++;
500 if (devaddrs)
502 if (mapnum != 1)
504 gomp_mutex_unlock (&devicep->lock);
505 gomp_fatal ("unexpected aggregation");
507 tgt->to_free = devaddrs[0];
508 tgt->tgt_start = (uintptr_t) tgt->to_free;
509 tgt->tgt_end = tgt->tgt_start + sizes[0];
511 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
513 /* Allocate tgt_align aligned tgt_size block of memory. */
514 /* FIXME: Perhaps change interface to allocate properly aligned
515 memory. */
516 tgt->to_free = devicep->alloc_func (devicep->target_id,
517 tgt_size + tgt_align - 1);
518 tgt->tgt_start = (uintptr_t) tgt->to_free;
519 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
520 tgt->tgt_end = tgt->tgt_start + tgt_size;
522 else
524 tgt->to_free = NULL;
525 tgt->tgt_start = 0;
526 tgt->tgt_end = 0;
529 tgt_size = 0;
530 if (pragma_kind == GOMP_MAP_VARS_TARGET)
531 tgt_size = mapnum * sizeof (void *);
533 tgt->array = NULL;
534 if (not_found_cnt || has_firstprivate)
536 if (not_found_cnt)
537 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
538 splay_tree_node array = tgt->array;
539 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
540 uintptr_t field_tgt_base = 0;
542 for (i = 0; i < mapnum; i++)
543 if (tgt->list[i].key == NULL)
545 int kind = get_kind (short_mapkind, kinds, i);
546 if (hostaddrs[i] == NULL)
547 continue;
548 switch (kind & typemask)
550 size_t align, len, first, last;
551 splay_tree_key n;
552 case GOMP_MAP_FIRSTPRIVATE:
553 align = (size_t) 1 << (kind >> rshift);
554 tgt_size = (tgt_size + align - 1) & ~(align - 1);
555 tgt->list[i].offset = tgt_size;
556 len = sizes[i];
557 devicep->host2dev_func (devicep->target_id,
558 (void *) (tgt->tgt_start + tgt_size),
559 (void *) hostaddrs[i], len);
560 tgt_size += len;
561 continue;
562 case GOMP_MAP_FIRSTPRIVATE_INT:
563 case GOMP_MAP_USE_DEVICE_PTR:
564 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
565 continue;
566 case GOMP_MAP_STRUCT:
567 first = i + 1;
568 last = i + sizes[i];
569 cur_node.host_start = (uintptr_t) hostaddrs[i];
570 cur_node.host_end = (uintptr_t) hostaddrs[last]
571 + sizes[last];
572 if (tgt->list[first].key != NULL)
573 continue;
574 n = splay_tree_lookup (mem_map, &cur_node);
575 if (n == NULL)
577 size_t align = (size_t) 1 << (kind >> rshift);
578 tgt_size -= (uintptr_t) hostaddrs[first]
579 - (uintptr_t) hostaddrs[i];
580 tgt_size = (tgt_size + align - 1) & ~(align - 1);
581 tgt_size += (uintptr_t) hostaddrs[first]
582 - (uintptr_t) hostaddrs[i];
583 field_tgt_base = (uintptr_t) hostaddrs[first];
584 field_tgt_offset = tgt_size;
585 field_tgt_clear = last;
586 tgt_size += cur_node.host_end
587 - (uintptr_t) hostaddrs[first];
588 continue;
590 for (i = first; i <= last; i++)
591 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
592 sizes, kinds);
593 i--;
594 continue;
595 case GOMP_MAP_ALWAYS_POINTER:
596 cur_node.host_start = (uintptr_t) hostaddrs[i];
597 cur_node.host_end = cur_node.host_start + sizeof (void *);
598 n = splay_tree_lookup (mem_map, &cur_node);
599 if (n == NULL
600 || n->host_start > cur_node.host_start
601 || n->host_end < cur_node.host_end)
603 gomp_mutex_unlock (&devicep->lock);
604 gomp_fatal ("always pointer not mapped");
606 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
607 != GOMP_MAP_ALWAYS_POINTER)
608 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
609 if (cur_node.tgt_offset)
610 cur_node.tgt_offset -= sizes[i];
611 devicep->host2dev_func (devicep->target_id,
612 (void *) (n->tgt->tgt_start
613 + n->tgt_offset
614 + cur_node.host_start
615 - n->host_start),
616 (void *) &cur_node.tgt_offset,
617 sizeof (void *));
618 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
619 + cur_node.host_start - n->host_start;
620 continue;
621 default:
622 break;
624 splay_tree_key k = &array->key;
625 k->host_start = (uintptr_t) hostaddrs[i];
626 if (!GOMP_MAP_POINTER_P (kind & typemask))
627 k->host_end = k->host_start + sizes[i];
628 else
629 k->host_end = k->host_start + sizeof (void *);
630 splay_tree_key n = splay_tree_lookup (mem_map, k);
631 if (n && n->refcount != REFCOUNT_LINK)
632 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
633 kind & typemask);
634 else
636 k->link_key = NULL;
637 if (n && n->refcount == REFCOUNT_LINK)
639 /* Replace target address of the pointer with target address
640 of mapped object in the splay tree. */
641 splay_tree_remove (mem_map, n);
642 k->link_key = n;
644 size_t align = (size_t) 1 << (kind >> rshift);
645 tgt->list[i].key = k;
646 k->tgt = tgt;
647 if (field_tgt_clear != ~(size_t) 0)
649 k->tgt_offset = k->host_start - field_tgt_base
650 + field_tgt_offset;
651 if (i == field_tgt_clear)
652 field_tgt_clear = ~(size_t) 0;
654 else
656 tgt_size = (tgt_size + align - 1) & ~(align - 1);
657 k->tgt_offset = tgt_size;
658 tgt_size += k->host_end - k->host_start;
660 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
661 tgt->list[i].always_copy_from
662 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
663 tgt->list[i].offset = 0;
664 tgt->list[i].length = k->host_end - k->host_start;
665 k->refcount = 1;
666 k->async_refcount = 0;
667 tgt->refcount++;
668 array->left = NULL;
669 array->right = NULL;
670 splay_tree_insert (mem_map, array);
671 switch (kind & typemask)
673 case GOMP_MAP_ALLOC:
674 case GOMP_MAP_FROM:
675 case GOMP_MAP_FORCE_ALLOC:
676 case GOMP_MAP_FORCE_FROM:
677 case GOMP_MAP_ALWAYS_FROM:
678 break;
679 case GOMP_MAP_TO:
680 case GOMP_MAP_TOFROM:
681 case GOMP_MAP_FORCE_TO:
682 case GOMP_MAP_FORCE_TOFROM:
683 case GOMP_MAP_ALWAYS_TO:
684 case GOMP_MAP_ALWAYS_TOFROM:
685 /* FIXME: Perhaps add some smarts, like if copying
686 several adjacent fields from host to target, use some
687 host buffer to avoid sending each var individually. */
688 devicep->host2dev_func (devicep->target_id,
689 (void *) (tgt->tgt_start
690 + k->tgt_offset),
691 (void *) k->host_start,
692 k->host_end - k->host_start);
693 break;
694 case GOMP_MAP_POINTER:
695 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
696 k->tgt_offset, sizes[i]);
697 break;
698 case GOMP_MAP_TO_PSET:
699 /* FIXME: see above FIXME comment. */
700 devicep->host2dev_func (devicep->target_id,
701 (void *) (tgt->tgt_start
702 + k->tgt_offset),
703 (void *) k->host_start,
704 k->host_end - k->host_start);
706 for (j = i + 1; j < mapnum; j++)
707 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
709 & typemask))
710 break;
711 else if ((uintptr_t) hostaddrs[j] < k->host_start
712 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
713 > k->host_end))
714 break;
715 else
717 tgt->list[j].key = k;
718 tgt->list[j].copy_from = false;
719 tgt->list[j].always_copy_from = false;
720 if (k->refcount != REFCOUNT_INFINITY)
721 k->refcount++;
722 gomp_map_pointer (tgt,
723 (uintptr_t) *(void **) hostaddrs[j],
724 k->tgt_offset
725 + ((uintptr_t) hostaddrs[j]
726 - k->host_start),
727 sizes[j]);
728 i++;
730 break;
731 case GOMP_MAP_FORCE_PRESENT:
733 /* We already looked up the memory region above and it
734 was missing. */
735 size_t size = k->host_end - k->host_start;
736 gomp_mutex_unlock (&devicep->lock);
737 #ifdef HAVE_INTTYPES_H
738 gomp_fatal ("present clause: !acc_is_present (%p, "
739 "%"PRIu64" (0x%"PRIx64"))",
740 (void *) k->host_start,
741 (uint64_t) size, (uint64_t) size);
742 #else
743 gomp_fatal ("present clause: !acc_is_present (%p, "
744 "%lu (0x%lx))", (void *) k->host_start,
745 (unsigned long) size, (unsigned long) size);
746 #endif
748 break;
749 case GOMP_MAP_FORCE_DEVICEPTR:
750 assert (k->host_end - k->host_start == sizeof (void *));
752 devicep->host2dev_func (devicep->target_id,
753 (void *) (tgt->tgt_start
754 + k->tgt_offset),
755 (void *) k->host_start,
756 sizeof (void *));
757 break;
758 default:
759 gomp_mutex_unlock (&devicep->lock);
760 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
761 kind);
764 if (k->link_key)
766 /* Set link pointer on target to the device address of the
767 mapped object. */
768 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
769 devicep->host2dev_func (devicep->target_id,
770 (void *) n->tgt_offset,
771 &tgt_addr, sizeof (void *));
773 array++;
778 if (pragma_kind == GOMP_MAP_VARS_TARGET)
780 for (i = 0; i < mapnum; i++)
782 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
783 /* FIXME: see above FIXME comment. */
784 devicep->host2dev_func (devicep->target_id,
785 (void *) (tgt->tgt_start
786 + i * sizeof (void *)),
787 (void *) &cur_node.tgt_offset,
788 sizeof (void *));
792 /* If the variable from "omp target enter data" map-list was already mapped,
793 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
794 gomp_exit_data. */
795 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
797 free (tgt);
798 tgt = NULL;
801 gomp_mutex_unlock (&devicep->lock);
802 return tgt;
805 static void
806 gomp_unmap_tgt (struct target_mem_desc *tgt)
808 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
809 if (tgt->tgt_end)
810 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
812 free (tgt->array);
813 free (tgt);
816 /* Decrease the refcount for a set of mapped variables, and queue asychronous
817 copies from the device back to the host after any work that has been issued.
818 Because the regions are still "live", increment an asynchronous reference
819 count to indicate that they should not be unmapped from host-side data
820 structures until the asynchronous copy has completed. */
822 attribute_hidden void
823 gomp_copy_from_async (struct target_mem_desc *tgt)
825 struct gomp_device_descr *devicep = tgt->device_descr;
826 size_t i;
828 gomp_mutex_lock (&devicep->lock);
830 for (i = 0; i < tgt->list_count; i++)
831 if (tgt->list[i].key == NULL)
833 else if (tgt->list[i].key->refcount > 1)
835 tgt->list[i].key->refcount--;
836 tgt->list[i].key->async_refcount++;
838 else
840 splay_tree_key k = tgt->list[i].key;
841 if (tgt->list[i].copy_from)
842 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
843 (void *) (k->tgt->tgt_start + k->tgt_offset),
844 k->host_end - k->host_start);
847 gomp_mutex_unlock (&devicep->lock);
850 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
851 variables back from device to host: if it is false, it is assumed that this
852 has been done already, i.e. by gomp_copy_from_async above. */
854 attribute_hidden void
855 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
857 struct gomp_device_descr *devicep = tgt->device_descr;
859 if (tgt->list_count == 0)
861 free (tgt);
862 return;
865 gomp_mutex_lock (&devicep->lock);
866 if (devicep->state == GOMP_DEVICE_FINALIZED)
868 gomp_mutex_unlock (&devicep->lock);
869 free (tgt->array);
870 free (tgt);
871 return;
874 size_t i;
875 for (i = 0; i < tgt->list_count; i++)
877 splay_tree_key k = tgt->list[i].key;
878 if (k == NULL)
879 continue;
881 bool do_unmap = false;
882 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
883 k->refcount--;
884 else if (k->refcount == 1)
886 if (k->async_refcount > 0)
887 k->async_refcount--;
888 else
890 k->refcount--;
891 do_unmap = true;
895 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
896 || tgt->list[i].always_copy_from)
897 devicep->dev2host_func (devicep->target_id,
898 (void *) (k->host_start + tgt->list[i].offset),
899 (void *) (k->tgt->tgt_start + k->tgt_offset
900 + tgt->list[i].offset),
901 tgt->list[i].length);
902 if (do_unmap)
904 splay_tree_remove (&devicep->mem_map, k);
905 if (k->link_key)
906 splay_tree_insert (&devicep->mem_map,
907 (splay_tree_node) k->link_key);
908 if (k->tgt->refcount > 1)
909 k->tgt->refcount--;
910 else
911 gomp_unmap_tgt (k->tgt);
915 if (tgt->refcount > 1)
916 tgt->refcount--;
917 else
918 gomp_unmap_tgt (tgt);
920 gomp_mutex_unlock (&devicep->lock);
923 static void
924 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
925 size_t *sizes, void *kinds, bool short_mapkind)
927 size_t i;
928 struct splay_tree_key_s cur_node;
929 const int typemask = short_mapkind ? 0xff : 0x7;
931 if (!devicep)
932 return;
934 if (mapnum == 0)
935 return;
937 gomp_mutex_lock (&devicep->lock);
938 if (devicep->state == GOMP_DEVICE_FINALIZED)
940 gomp_mutex_unlock (&devicep->lock);
941 return;
944 for (i = 0; i < mapnum; i++)
945 if (sizes[i])
947 cur_node.host_start = (uintptr_t) hostaddrs[i];
948 cur_node.host_end = cur_node.host_start + sizes[i];
949 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
950 if (n)
952 int kind = get_kind (short_mapkind, kinds, i);
953 if (n->host_start > cur_node.host_start
954 || n->host_end < cur_node.host_end)
956 gomp_mutex_unlock (&devicep->lock);
957 gomp_fatal ("Trying to update [%p..%p) object when "
958 "only [%p..%p) is mapped",
959 (void *) cur_node.host_start,
960 (void *) cur_node.host_end,
961 (void *) n->host_start,
962 (void *) n->host_end);
964 if (GOMP_MAP_COPY_TO_P (kind & typemask))
965 devicep->host2dev_func (devicep->target_id,
966 (void *) (n->tgt->tgt_start
967 + n->tgt_offset
968 + cur_node.host_start
969 - n->host_start),
970 (void *) cur_node.host_start,
971 cur_node.host_end - cur_node.host_start);
972 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
973 devicep->dev2host_func (devicep->target_id,
974 (void *) cur_node.host_start,
975 (void *) (n->tgt->tgt_start
976 + n->tgt_offset
977 + cur_node.host_start
978 - n->host_start),
979 cur_node.host_end - cur_node.host_start);
982 gomp_mutex_unlock (&devicep->lock);
985 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
986 And insert to splay tree the mapping between addresses from HOST_TABLE and
987 from loaded target image. We rely in the host and device compiler
988 emitting variable and functions in the same order. */
990 static void
991 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
992 const void *host_table, const void *target_data,
993 bool is_register_lock)
995 void **host_func_table = ((void ***) host_table)[0];
996 void **host_funcs_end = ((void ***) host_table)[1];
997 void **host_var_table = ((void ***) host_table)[2];
998 void **host_vars_end = ((void ***) host_table)[3];
1000 /* The func table contains only addresses, the var table contains addresses
1001 and corresponding sizes. */
1002 int num_funcs = host_funcs_end - host_func_table;
1003 int num_vars = (host_vars_end - host_var_table) / 2;
1005 /* Load image to device and get target addresses for the image. */
1006 struct addr_pair *target_table = NULL;
1007 int i, num_target_entries;
1009 num_target_entries
1010 = devicep->load_image_func (devicep->target_id, version,
1011 target_data, &target_table);
1013 if (num_target_entries != num_funcs + num_vars)
1015 gomp_mutex_unlock (&devicep->lock);
1016 if (is_register_lock)
1017 gomp_mutex_unlock (&register_lock);
1018 gomp_fatal ("Cannot map target functions or variables"
1019 " (expected %u, have %u)", num_funcs + num_vars,
1020 num_target_entries);
1023 /* Insert host-target address mapping into splay tree. */
1024 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1025 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1026 tgt->refcount = REFCOUNT_INFINITY;
1027 tgt->tgt_start = 0;
1028 tgt->tgt_end = 0;
1029 tgt->to_free = NULL;
1030 tgt->prev = NULL;
1031 tgt->list_count = 0;
1032 tgt->device_descr = devicep;
1033 splay_tree_node array = tgt->array;
1035 for (i = 0; i < num_funcs; i++)
1037 splay_tree_key k = &array->key;
1038 k->host_start = (uintptr_t) host_func_table[i];
1039 k->host_end = k->host_start + 1;
1040 k->tgt = tgt;
1041 k->tgt_offset = target_table[i].start;
1042 k->refcount = REFCOUNT_INFINITY;
1043 k->async_refcount = 0;
1044 k->link_key = NULL;
1045 array->left = NULL;
1046 array->right = NULL;
1047 splay_tree_insert (&devicep->mem_map, array);
1048 array++;
1051 /* Most significant bit of the size in host and target tables marks
1052 "omp declare target link" variables. */
1053 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1054 const uintptr_t size_mask = ~link_bit;
1056 for (i = 0; i < num_vars; i++)
1058 struct addr_pair *target_var = &target_table[num_funcs + i];
1059 uintptr_t target_size = target_var->end - target_var->start;
1061 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1063 gomp_mutex_unlock (&devicep->lock);
1064 if (is_register_lock)
1065 gomp_mutex_unlock (&register_lock);
1066 gomp_fatal ("Cannot map target variables (size mismatch)");
1069 splay_tree_key k = &array->key;
1070 k->host_start = (uintptr_t) host_var_table[i * 2];
1071 k->host_end
1072 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1073 k->tgt = tgt;
1074 k->tgt_offset = target_var->start;
1075 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1076 k->async_refcount = 0;
1077 k->link_key = NULL;
1078 array->left = NULL;
1079 array->right = NULL;
1080 splay_tree_insert (&devicep->mem_map, array);
1081 array++;
1084 free (target_table);
1087 /* Unload the mappings described by target_data from device DEVICE_P.
1088 The device must be locked. */
1090 static void
1091 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1092 unsigned version,
1093 const void *host_table, const void *target_data)
1095 void **host_func_table = ((void ***) host_table)[0];
1096 void **host_funcs_end = ((void ***) host_table)[1];
1097 void **host_var_table = ((void ***) host_table)[2];
1098 void **host_vars_end = ((void ***) host_table)[3];
1100 /* The func table contains only addresses, the var table contains addresses
1101 and corresponding sizes. */
1102 int num_funcs = host_funcs_end - host_func_table;
1103 int num_vars = (host_vars_end - host_var_table) / 2;
1105 struct splay_tree_key_s k;
1106 splay_tree_key node = NULL;
1108 /* Find mapping at start of node array */
1109 if (num_funcs || num_vars)
1111 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1112 : (uintptr_t) host_var_table[0]);
1113 k.host_end = k.host_start + 1;
1114 node = splay_tree_lookup (&devicep->mem_map, &k);
1117 devicep->unload_image_func (devicep->target_id, version, target_data);
1119 /* Remove mappings from splay tree. */
1120 int i;
1121 for (i = 0; i < num_funcs; i++)
1123 k.host_start = (uintptr_t) host_func_table[i];
1124 k.host_end = k.host_start + 1;
1125 splay_tree_remove (&devicep->mem_map, &k);
1128 /* Most significant bit of the size in host and target tables marks
1129 "omp declare target link" variables. */
1130 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1131 const uintptr_t size_mask = ~link_bit;
1132 bool is_tgt_unmapped = false;
1134 for (i = 0; i < num_vars; i++)
1136 k.host_start = (uintptr_t) host_var_table[i * 2];
1137 k.host_end
1138 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1140 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1141 splay_tree_remove (&devicep->mem_map, &k);
1142 else
1144 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1145 splay_tree_remove (&devicep->mem_map, n);
1146 if (n->link_key)
1148 if (n->tgt->refcount > 1)
1149 n->tgt->refcount--;
1150 else
1152 is_tgt_unmapped = true;
1153 gomp_unmap_tgt (n->tgt);
1159 if (node && !is_tgt_unmapped)
1161 free (node->tgt);
1162 free (node);
1166 /* This function should be called from every offload image while loading.
1167 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1168 the target, and TARGET_DATA needed by target plugin. */
1170 void
1171 GOMP_offload_register_ver (unsigned version, const void *host_table,
1172 int target_type, const void *target_data)
1174 int i;
1176 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1177 gomp_fatal ("Library too old for offload (version %u < %u)",
1178 GOMP_VERSION, GOMP_VERSION_LIB (version));
1180 gomp_mutex_lock (&register_lock);
1182 /* Load image to all initialized devices. */
1183 for (i = 0; i < num_devices; i++)
1185 struct gomp_device_descr *devicep = &devices[i];
1186 gomp_mutex_lock (&devicep->lock);
1187 if (devicep->type == target_type
1188 && devicep->state == GOMP_DEVICE_INITIALIZED)
1189 gomp_load_image_to_device (devicep, version,
1190 host_table, target_data, true);
1191 gomp_mutex_unlock (&devicep->lock);
1194 /* Insert image to array of pending images. */
1195 offload_images
1196 = gomp_realloc_unlock (offload_images,
1197 (num_offload_images + 1)
1198 * sizeof (struct offload_image_descr));
1199 offload_images[num_offload_images].version = version;
1200 offload_images[num_offload_images].type = target_type;
1201 offload_images[num_offload_images].host_table = host_table;
1202 offload_images[num_offload_images].target_data = target_data;
1204 num_offload_images++;
1205 gomp_mutex_unlock (&register_lock);
1208 void
1209 GOMP_offload_register (const void *host_table, int target_type,
1210 const void *target_data)
1212 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1215 /* This function should be called from every offload image while unloading.
1216 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1217 the target, and TARGET_DATA needed by target plugin. */
1219 void
1220 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1221 int target_type, const void *target_data)
1223 int i;
1225 gomp_mutex_lock (&register_lock);
1227 /* Unload image from all initialized devices. */
1228 for (i = 0; i < num_devices; i++)
1230 struct gomp_device_descr *devicep = &devices[i];
1231 gomp_mutex_lock (&devicep->lock);
1232 if (devicep->type == target_type
1233 && devicep->state == GOMP_DEVICE_INITIALIZED)
1234 gomp_unload_image_from_device (devicep, version,
1235 host_table, target_data);
1236 gomp_mutex_unlock (&devicep->lock);
1239 /* Remove image from array of pending images. */
1240 for (i = 0; i < num_offload_images; i++)
1241 if (offload_images[i].target_data == target_data)
1243 offload_images[i] = offload_images[--num_offload_images];
1244 break;
1247 gomp_mutex_unlock (&register_lock);
1250 void
1251 GOMP_offload_unregister (const void *host_table, int target_type,
1252 const void *target_data)
1254 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1257 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1258 must be locked on entry, and remains locked on return. */
1260 attribute_hidden void
1261 gomp_init_device (struct gomp_device_descr *devicep)
1263 int i;
1264 devicep->init_device_func (devicep->target_id);
1266 /* Load to device all images registered by the moment. */
1267 for (i = 0; i < num_offload_images; i++)
1269 struct offload_image_descr *image = &offload_images[i];
1270 if (image->type == devicep->type)
1271 gomp_load_image_to_device (devicep, image->version,
1272 image->host_table, image->target_data,
1273 false);
1276 devicep->state = GOMP_DEVICE_INITIALIZED;
1279 attribute_hidden void
1280 gomp_unload_device (struct gomp_device_descr *devicep)
1282 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1284 unsigned i;
1286 /* Unload from device all images registered at the moment. */
1287 for (i = 0; i < num_offload_images; i++)
1289 struct offload_image_descr *image = &offload_images[i];
1290 if (image->type == devicep->type)
1291 gomp_unload_image_from_device (devicep, image->version,
1292 image->host_table,
1293 image->target_data);
1298 /* Free address mapping tables. MM must be locked on entry, and remains locked
1299 on return. */
1301 attribute_hidden void
1302 gomp_free_memmap (struct splay_tree_s *mem_map)
1304 while (mem_map->root)
1306 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1308 splay_tree_remove (mem_map, &mem_map->root->key);
1309 free (tgt->array);
1310 free (tgt);
1314 /* Host fallback for GOMP_target{,_ext} routines. */
1316 static void
1317 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1319 struct gomp_thread old_thr, *thr = gomp_thread ();
1320 old_thr = *thr;
1321 memset (thr, '\0', sizeof (*thr));
1322 if (gomp_places_list)
1324 thr->place = old_thr.place;
1325 thr->ts.place_partition_len = gomp_places_list_len;
1327 fn (hostaddrs);
1328 gomp_free_thread (thr);
1329 *thr = old_thr;
1332 /* Calculate alignment and size requirements of a private copy of data shared
1333 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1335 static inline void
1336 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1337 unsigned short *kinds, size_t *tgt_align,
1338 size_t *tgt_size)
1340 size_t i;
1341 for (i = 0; i < mapnum; i++)
1342 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1344 size_t align = (size_t) 1 << (kinds[i] >> 8);
1345 if (*tgt_align < align)
1346 *tgt_align = align;
1347 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1348 *tgt_size += sizes[i];
1352 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1354 static inline void
1355 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1356 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1357 size_t tgt_size)
1359 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1360 if (al)
1361 tgt += tgt_align - al;
1362 tgt_size = 0;
1363 size_t i;
1364 for (i = 0; i < mapnum; i++)
1365 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1367 size_t align = (size_t) 1 << (kinds[i] >> 8);
1368 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1369 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1370 hostaddrs[i] = tgt + tgt_size;
1371 tgt_size = tgt_size + sizes[i];
1375 /* Host fallback with firstprivate map-type handling. */
1377 static void
1378 gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
1379 void **hostaddrs, size_t *sizes,
1380 unsigned short *kinds)
1382 size_t tgt_align = 0, tgt_size = 0;
1383 calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
1384 &tgt_size);
1385 if (tgt_align)
1387 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1388 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
1389 tgt_size);
1391 gomp_target_fallback (fn, hostaddrs);
1394 /* Handle firstprivate map-type for shared memory devices and the host
1395 fallback. Return the pointer of firstprivate copies which has to be freed
1396 after use. */
1398 static void *
1399 gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs,
1400 size_t *sizes, unsigned short *kinds)
1402 size_t tgt_align = 0, tgt_size = 0;
1403 char *tgt = NULL;
1405 calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
1406 &tgt_size);
1407 if (tgt_align)
1409 tgt = gomp_malloc (tgt_size + tgt_align - 1);
1410 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
1411 tgt_size);
1413 return tgt;
1416 /* Helper function of GOMP_target{,_ext} routines. */
1418 static void *
1419 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1420 void (*host_fn) (void *))
1422 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1423 return (void *) host_fn;
1424 else
1426 gomp_mutex_lock (&devicep->lock);
1427 if (devicep->state == GOMP_DEVICE_FINALIZED)
1429 gomp_mutex_unlock (&devicep->lock);
1430 return NULL;
1433 struct splay_tree_key_s k;
1434 k.host_start = (uintptr_t) host_fn;
1435 k.host_end = k.host_start + 1;
1436 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1437 gomp_mutex_unlock (&devicep->lock);
1438 if (tgt_fn == NULL)
1440 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1441 return NULL;
1442 else
1443 gomp_fatal ("Target function wasn't mapped");
1446 return (void *) tgt_fn->tgt_offset;
1450 /* Called when encountering a target directive. If DEVICE
1451 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1452 GOMP_DEVICE_HOST_FALLBACK (or any value
1453 larger than last available hw device), use host fallback.
1454 FN is address of host code, UNUSED is part of the current ABI, but
1455 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1456 with MAPNUM entries, with addresses of the host objects,
1457 sizes of the host objects (resp. for pointer kind pointer bias
1458 and assumed sizeof (void *) size) and kinds. */
1460 void
1461 GOMP_target (int device, void (*fn) (void *), const void *unused,
1462 size_t mapnum, void **hostaddrs, size_t *sizes,
1463 unsigned char *kinds)
1465 struct gomp_device_descr *devicep = resolve_device (device);
1467 void *fn_addr;
1468 if (devicep == NULL
1469 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1470 /* All shared memory devices should use the GOMP_target_ext function. */
1471 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1472 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1473 return gomp_target_fallback (fn, hostaddrs);
1475 struct target_mem_desc *tgt_vars
1476 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1477 GOMP_MAP_VARS_TARGET);
1478 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1479 NULL);
1480 gomp_unmap_vars (tgt_vars, true);
1483 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1484 and several arguments have been added:
1485 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1486 DEPEND is array of dependencies, see GOMP_task for details.
1488 ARGS is a pointer to an array consisting of a variable number of both
1489 device-independent and device-specific arguments, which can take one two
1490 elements where the first specifies for which device it is intended, the type
1491 and optionally also the value. If the value is not present in the first
1492 one, the whole second element the actual value. The last element of the
1493 array is a single NULL. Among the device independent can be for example
1494 NUM_TEAMS and THREAD_LIMIT.
1496 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1497 that value, or 1 if teams construct is not present, or 0, if
1498 teams construct does not have num_teams clause and so the choice is
1499 implementation defined, and -1 if it can't be determined on the host
1500 what value will GOMP_teams have on the device.
1501 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1502 body with that value, or 0, if teams construct does not have thread_limit
1503 clause or the teams construct is not present, or -1 if it can't be
1504 determined on the host what value will GOMP_teams have on the device. */
1506 void
1507 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1508 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1509 unsigned int flags, void **depend, void **args)
1511 struct gomp_device_descr *devicep = resolve_device (device);
1513 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1515 struct gomp_thread *thr = gomp_thread ();
1516 /* Create a team if we don't have any around, as nowait
1517 target tasks make sense to run asynchronously even when
1518 outside of any parallel. */
1519 if (__builtin_expect (thr->ts.team == NULL, 0))
1521 struct gomp_team *team = gomp_new_team (1);
1522 struct gomp_task *task = thr->task;
1523 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1524 team->prev_ts = thr->ts;
1525 thr->ts.team = team;
1526 thr->ts.team_id = 0;
1527 thr->ts.work_share = &team->work_shares[0];
1528 thr->ts.last_work_share = NULL;
1529 #ifdef HAVE_SYNC_BUILTINS
1530 thr->ts.single_count = 0;
1531 #endif
1532 thr->ts.static_trip = 0;
1533 thr->task = &team->implicit_task[0];
1534 gomp_init_task (thr->task, NULL, icv);
1535 if (task)
1537 thr->task = task;
1538 gomp_end_task ();
1539 free (task);
1540 thr->task = &team->implicit_task[0];
1542 else
1543 pthread_setspecific (gomp_thread_destructor, thr);
1545 if (thr->ts.team
1546 && !thr->task->final_task)
1548 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1549 sizes, kinds, flags, depend, args,
1550 GOMP_TARGET_TASK_BEFORE_MAP);
1551 return;
1555 /* If there are depend clauses, but nowait is not present
1556 (or we are in a final task), block the parent task until the
1557 dependencies are resolved and then just continue with the rest
1558 of the function as if it is a merged task. */
1559 if (depend != NULL)
1561 struct gomp_thread *thr = gomp_thread ();
1562 if (thr->task && thr->task->depend_hash)
1563 gomp_task_maybe_wait_for_dependencies (depend);
1566 void *fn_addr;
1567 if (devicep == NULL
1568 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1569 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1570 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1572 gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
1573 return;
1576 struct target_mem_desc *tgt_vars;
1577 void *fpc = NULL;
1578 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1580 fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds);
1581 tgt_vars = NULL;
1583 else
1584 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1585 true, GOMP_MAP_VARS_TARGET);
1586 devicep->run_func (devicep->target_id, fn_addr,
1587 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1588 args);
1589 if (tgt_vars)
1590 gomp_unmap_vars (tgt_vars, true);
1591 else
1592 free (fpc);
1595 /* Host fallback for GOMP_target_data{,_ext} routines. */
1597 static void
1598 gomp_target_data_fallback (void)
1600 struct gomp_task_icv *icv = gomp_icv (false);
1601 if (icv->target_data)
1603 /* Even when doing a host fallback, if there are any active
1604 #pragma omp target data constructs, need to remember the
1605 new #pragma omp target data, otherwise GOMP_target_end_data
1606 would get out of sync. */
1607 struct target_mem_desc *tgt
1608 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1609 GOMP_MAP_VARS_DATA);
1610 tgt->prev = icv->target_data;
1611 icv->target_data = tgt;
1615 void
1616 GOMP_target_data (int device, const void *unused, size_t mapnum,
1617 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1619 struct gomp_device_descr *devicep = resolve_device (device);
1621 if (devicep == NULL
1622 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1623 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1624 return gomp_target_data_fallback ();
1626 struct target_mem_desc *tgt
1627 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1628 GOMP_MAP_VARS_DATA);
1629 struct gomp_task_icv *icv = gomp_icv (true);
1630 tgt->prev = icv->target_data;
1631 icv->target_data = tgt;
1634 void
1635 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1636 size_t *sizes, unsigned short *kinds)
1638 struct gomp_device_descr *devicep = resolve_device (device);
1640 if (devicep == NULL
1641 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1642 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1643 return gomp_target_data_fallback ();
1645 struct target_mem_desc *tgt
1646 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1647 GOMP_MAP_VARS_DATA);
1648 struct gomp_task_icv *icv = gomp_icv (true);
1649 tgt->prev = icv->target_data;
1650 icv->target_data = tgt;
1653 void
1654 GOMP_target_end_data (void)
1656 struct gomp_task_icv *icv = gomp_icv (false);
1657 if (icv->target_data)
1659 struct target_mem_desc *tgt = icv->target_data;
1660 icv->target_data = tgt->prev;
1661 gomp_unmap_vars (tgt, true);
1665 void
1666 GOMP_target_update (int device, const void *unused, size_t mapnum,
1667 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1669 struct gomp_device_descr *devicep = resolve_device (device);
1671 if (devicep == NULL
1672 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1673 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1674 return;
1676 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1679 void
1680 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1681 size_t *sizes, unsigned short *kinds,
1682 unsigned int flags, void **depend)
1684 struct gomp_device_descr *devicep = resolve_device (device);
1686 /* If there are depend clauses, but nowait is not present,
1687 block the parent task until the dependencies are resolved
1688 and then just continue with the rest of the function as if it
1689 is a merged task. Until we are able to schedule task during
1690 variable mapping or unmapping, ignore nowait if depend clauses
1691 are not present. */
1692 if (depend != NULL)
1694 struct gomp_thread *thr = gomp_thread ();
1695 if (thr->task && thr->task->depend_hash)
1697 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1698 && thr->ts.team
1699 && !thr->task->final_task)
1701 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1702 mapnum, hostaddrs, sizes, kinds,
1703 flags | GOMP_TARGET_FLAG_UPDATE,
1704 depend, NULL, GOMP_TARGET_TASK_DATA))
1705 return;
1707 else
1709 struct gomp_team *team = thr->ts.team;
1710 /* If parallel or taskgroup has been cancelled, don't start new
1711 tasks. */
1712 if (team
1713 && (gomp_team_barrier_cancelled (&team->barrier)
1714 || (thr->task->taskgroup
1715 && thr->task->taskgroup->cancelled)))
1716 return;
1718 gomp_task_maybe_wait_for_dependencies (depend);
1723 if (devicep == NULL
1724 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1725 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1726 return;
1728 struct gomp_thread *thr = gomp_thread ();
1729 struct gomp_team *team = thr->ts.team;
1730 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1731 if (team
1732 && (gomp_team_barrier_cancelled (&team->barrier)
1733 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1734 return;
1736 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1739 static void
1740 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1741 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1743 const int typemask = 0xff;
1744 size_t i;
1745 gomp_mutex_lock (&devicep->lock);
1746 if (devicep->state == GOMP_DEVICE_FINALIZED)
1748 gomp_mutex_unlock (&devicep->lock);
1749 return;
1752 for (i = 0; i < mapnum; i++)
1754 struct splay_tree_key_s cur_node;
1755 unsigned char kind = kinds[i] & typemask;
1756 switch (kind)
1758 case GOMP_MAP_FROM:
1759 case GOMP_MAP_ALWAYS_FROM:
1760 case GOMP_MAP_DELETE:
1761 case GOMP_MAP_RELEASE:
1762 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1763 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1764 cur_node.host_start = (uintptr_t) hostaddrs[i];
1765 cur_node.host_end = cur_node.host_start + sizes[i];
1766 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1767 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1768 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1769 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1770 if (!k)
1771 continue;
1773 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1774 k->refcount--;
1775 if ((kind == GOMP_MAP_DELETE
1776 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1777 && k->refcount != REFCOUNT_INFINITY)
1778 k->refcount = 0;
1780 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1781 || kind == GOMP_MAP_ALWAYS_FROM)
1782 devicep->dev2host_func (devicep->target_id,
1783 (void *) cur_node.host_start,
1784 (void *) (k->tgt->tgt_start + k->tgt_offset
1785 + cur_node.host_start
1786 - k->host_start),
1787 cur_node.host_end - cur_node.host_start);
1788 if (k->refcount == 0)
1790 splay_tree_remove (&devicep->mem_map, k);
1791 if (k->link_key)
1792 splay_tree_insert (&devicep->mem_map,
1793 (splay_tree_node) k->link_key);
1794 if (k->tgt->refcount > 1)
1795 k->tgt->refcount--;
1796 else
1797 gomp_unmap_tgt (k->tgt);
1800 break;
1801 default:
1802 gomp_mutex_unlock (&devicep->lock);
1803 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1804 kind);
1808 gomp_mutex_unlock (&devicep->lock);
1811 void
1812 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1813 size_t *sizes, unsigned short *kinds,
1814 unsigned int flags, void **depend)
1816 struct gomp_device_descr *devicep = resolve_device (device);
1818 /* If there are depend clauses, but nowait is not present,
1819 block the parent task until the dependencies are resolved
1820 and then just continue with the rest of the function as if it
1821 is a merged task. Until we are able to schedule task during
1822 variable mapping or unmapping, ignore nowait if depend clauses
1823 are not present. */
1824 if (depend != NULL)
1826 struct gomp_thread *thr = gomp_thread ();
1827 if (thr->task && thr->task->depend_hash)
1829 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1830 && thr->ts.team
1831 && !thr->task->final_task)
1833 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1834 mapnum, hostaddrs, sizes, kinds,
1835 flags, depend, NULL,
1836 GOMP_TARGET_TASK_DATA))
1837 return;
1839 else
1841 struct gomp_team *team = thr->ts.team;
1842 /* If parallel or taskgroup has been cancelled, don't start new
1843 tasks. */
1844 if (team
1845 && (gomp_team_barrier_cancelled (&team->barrier)
1846 || (thr->task->taskgroup
1847 && thr->task->taskgroup->cancelled)))
1848 return;
1850 gomp_task_maybe_wait_for_dependencies (depend);
1855 if (devicep == NULL
1856 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1857 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1858 return;
1860 struct gomp_thread *thr = gomp_thread ();
1861 struct gomp_team *team = thr->ts.team;
1862 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1863 if (team
1864 && (gomp_team_barrier_cancelled (&team->barrier)
1865 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1866 return;
1868 size_t i;
1869 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1870 for (i = 0; i < mapnum; i++)
1871 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1873 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1874 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1875 i += sizes[i];
1877 else
1878 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1879 true, GOMP_MAP_VARS_ENTER_DATA);
1880 else
1881 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1884 bool
1885 gomp_target_task_fn (void *data)
1887 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1888 struct gomp_device_descr *devicep = ttask->devicep;
1890 if (ttask->fn != NULL)
1892 void *fn_addr;
1893 if (devicep == NULL
1894 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1895 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1896 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1898 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1899 gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
1900 ttask->hostaddrs, ttask->sizes,
1901 ttask->kinds);
1902 return false;
1905 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1907 if (ttask->tgt)
1908 gomp_unmap_vars (ttask->tgt, true);
1909 return false;
1912 void *actual_arguments;
1913 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1915 ttask->tgt = NULL;
1916 ttask->firstprivate_copies
1917 = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs,
1918 ttask->sizes, ttask->kinds);
1919 actual_arguments = ttask->hostaddrs;
1921 else
1923 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1924 NULL, ttask->sizes, ttask->kinds, true,
1925 GOMP_MAP_VARS_TARGET);
1926 actual_arguments = (void *) ttask->tgt->tgt_start;
1928 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1930 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1931 ttask->args, (void *) ttask);
1932 return true;
1934 else if (devicep == NULL
1935 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1936 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1937 return false;
1939 size_t i;
1940 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1941 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1942 ttask->kinds, true);
1943 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1944 for (i = 0; i < ttask->mapnum; i++)
1945 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1947 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1948 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1949 GOMP_MAP_VARS_ENTER_DATA);
1950 i += ttask->sizes[i];
1952 else
1953 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1954 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1955 else
1956 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1957 ttask->kinds);
1958 return false;
1961 void
1962 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1964 if (thread_limit)
1966 struct gomp_task_icv *icv = gomp_icv (true);
1967 icv->thread_limit_var
1968 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1970 (void) num_teams;
1973 void *
1974 omp_target_alloc (size_t size, int device_num)
1976 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1977 return malloc (size);
1979 if (device_num < 0)
1980 return NULL;
1982 struct gomp_device_descr *devicep = resolve_device (device_num);
1983 if (devicep == NULL)
1984 return NULL;
1986 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1987 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1988 return malloc (size);
1990 gomp_mutex_lock (&devicep->lock);
1991 void *ret = devicep->alloc_func (devicep->target_id, size);
1992 gomp_mutex_unlock (&devicep->lock);
1993 return ret;
1996 void
1997 omp_target_free (void *device_ptr, int device_num)
1999 if (device_ptr == NULL)
2000 return;
2002 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2004 free (device_ptr);
2005 return;
2008 if (device_num < 0)
2009 return;
2011 struct gomp_device_descr *devicep = resolve_device (device_num);
2012 if (devicep == NULL)
2013 return;
2015 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2016 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2018 free (device_ptr);
2019 return;
2022 gomp_mutex_lock (&devicep->lock);
2023 devicep->free_func (devicep->target_id, device_ptr);
2024 gomp_mutex_unlock (&devicep->lock);
2028 omp_target_is_present (void *ptr, int device_num)
2030 if (ptr == NULL)
2031 return 1;
2033 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2034 return 1;
2036 if (device_num < 0)
2037 return 0;
2039 struct gomp_device_descr *devicep = resolve_device (device_num);
2040 if (devicep == NULL)
2041 return 0;
2043 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2044 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2045 return 1;
2047 gomp_mutex_lock (&devicep->lock);
2048 struct splay_tree_s *mem_map = &devicep->mem_map;
2049 struct splay_tree_key_s cur_node;
2051 cur_node.host_start = (uintptr_t) ptr;
2052 cur_node.host_end = cur_node.host_start;
2053 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2054 int ret = n != NULL;
2055 gomp_mutex_unlock (&devicep->lock);
2056 return ret;
2060 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2061 size_t src_offset, int dst_device_num, int src_device_num)
2063 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2065 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2067 if (dst_device_num < 0)
2068 return EINVAL;
2070 dst_devicep = resolve_device (dst_device_num);
2071 if (dst_devicep == NULL)
2072 return EINVAL;
2074 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2075 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2076 dst_devicep = NULL;
2078 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2080 if (src_device_num < 0)
2081 return EINVAL;
2083 src_devicep = resolve_device (src_device_num);
2084 if (src_devicep == NULL)
2085 return EINVAL;
2087 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2088 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2089 src_devicep = NULL;
2091 if (src_devicep == NULL && dst_devicep == NULL)
2093 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2094 return 0;
2096 if (src_devicep == NULL)
2098 gomp_mutex_lock (&dst_devicep->lock);
2099 dst_devicep->host2dev_func (dst_devicep->target_id,
2100 (char *) dst + dst_offset,
2101 (char *) src + src_offset, length);
2102 gomp_mutex_unlock (&dst_devicep->lock);
2103 return 0;
2105 if (dst_devicep == NULL)
2107 gomp_mutex_lock (&src_devicep->lock);
2108 src_devicep->dev2host_func (src_devicep->target_id,
2109 (char *) dst + dst_offset,
2110 (char *) src + src_offset, length);
2111 gomp_mutex_unlock (&src_devicep->lock);
2112 return 0;
2114 if (src_devicep == dst_devicep)
2116 gomp_mutex_lock (&src_devicep->lock);
2117 src_devicep->dev2dev_func (src_devicep->target_id,
2118 (char *) dst + dst_offset,
2119 (char *) src + src_offset, length);
2120 gomp_mutex_unlock (&src_devicep->lock);
2121 return 0;
2123 return EINVAL;
2126 static int
2127 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2128 int num_dims, const size_t *volume,
2129 const size_t *dst_offsets,
2130 const size_t *src_offsets,
2131 const size_t *dst_dimensions,
2132 const size_t *src_dimensions,
2133 struct gomp_device_descr *dst_devicep,
2134 struct gomp_device_descr *src_devicep)
2136 size_t dst_slice = element_size;
2137 size_t src_slice = element_size;
2138 size_t j, dst_off, src_off, length;
2139 int i, ret;
2141 if (num_dims == 1)
2143 if (__builtin_mul_overflow (element_size, volume[0], &length)
2144 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2145 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2146 return EINVAL;
2147 if (dst_devicep == NULL && src_devicep == NULL)
2148 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2149 else if (src_devicep == NULL)
2150 dst_devicep->host2dev_func (dst_devicep->target_id,
2151 (char *) dst + dst_off,
2152 (char *) src + src_off, length);
2153 else if (dst_devicep == NULL)
2154 src_devicep->dev2host_func (src_devicep->target_id,
2155 (char *) dst + dst_off,
2156 (char *) src + src_off, length);
2157 else if (src_devicep == dst_devicep)
2158 src_devicep->dev2dev_func (src_devicep->target_id,
2159 (char *) dst + dst_off,
2160 (char *) src + src_off, length);
2161 else
2162 return EINVAL;
2163 return 0;
2166 /* FIXME: it would be nice to have some plugin function to handle
2167 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2168 be handled in the generic recursion below, and for host-host it
2169 should be used even for any num_dims >= 2. */
2171 for (i = 1; i < num_dims; i++)
2172 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2173 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2174 return EINVAL;
2175 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2176 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2177 return EINVAL;
2178 for (j = 0; j < volume[0]; j++)
2180 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2181 (char *) src + src_off,
2182 element_size, num_dims - 1,
2183 volume + 1, dst_offsets + 1,
2184 src_offsets + 1, dst_dimensions + 1,
2185 src_dimensions + 1, dst_devicep,
2186 src_devicep);
2187 if (ret)
2188 return ret;
2189 dst_off += dst_slice;
2190 src_off += src_slice;
2192 return 0;
2196 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2197 int num_dims, const size_t *volume,
2198 const size_t *dst_offsets,
2199 const size_t *src_offsets,
2200 const size_t *dst_dimensions,
2201 const size_t *src_dimensions,
2202 int dst_device_num, int src_device_num)
2204 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2206 if (!dst && !src)
2207 return INT_MAX;
2209 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2211 if (dst_device_num < 0)
2212 return EINVAL;
2214 dst_devicep = resolve_device (dst_device_num);
2215 if (dst_devicep == NULL)
2216 return EINVAL;
2218 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2219 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2220 dst_devicep = NULL;
2222 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2224 if (src_device_num < 0)
2225 return EINVAL;
2227 src_devicep = resolve_device (src_device_num);
2228 if (src_devicep == NULL)
2229 return EINVAL;
2231 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2232 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2233 src_devicep = NULL;
2236 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2237 return EINVAL;
2239 if (src_devicep)
2240 gomp_mutex_lock (&src_devicep->lock);
2241 else if (dst_devicep)
2242 gomp_mutex_lock (&dst_devicep->lock);
2243 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2244 volume, dst_offsets, src_offsets,
2245 dst_dimensions, src_dimensions,
2246 dst_devicep, src_devicep);
2247 if (src_devicep)
2248 gomp_mutex_unlock (&src_devicep->lock);
2249 else if (dst_devicep)
2250 gomp_mutex_unlock (&dst_devicep->lock);
2251 return ret;
2255 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2256 size_t device_offset, int device_num)
2258 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2259 return EINVAL;
2261 if (device_num < 0)
2262 return EINVAL;
2264 struct gomp_device_descr *devicep = resolve_device (device_num);
2265 if (devicep == NULL)
2266 return EINVAL;
2268 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2269 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2270 return EINVAL;
2272 gomp_mutex_lock (&devicep->lock);
2274 struct splay_tree_s *mem_map = &devicep->mem_map;
2275 struct splay_tree_key_s cur_node;
2276 int ret = EINVAL;
2278 cur_node.host_start = (uintptr_t) host_ptr;
2279 cur_node.host_end = cur_node.host_start + size;
2280 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2281 if (n)
2283 if (n->tgt->tgt_start + n->tgt_offset
2284 == (uintptr_t) device_ptr + device_offset
2285 && n->host_start <= cur_node.host_start
2286 && n->host_end >= cur_node.host_end)
2287 ret = 0;
2289 else
2291 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2292 tgt->array = gomp_malloc (sizeof (*tgt->array));
2293 tgt->refcount = 1;
2294 tgt->tgt_start = 0;
2295 tgt->tgt_end = 0;
2296 tgt->to_free = NULL;
2297 tgt->prev = NULL;
2298 tgt->list_count = 0;
2299 tgt->device_descr = devicep;
2300 splay_tree_node array = tgt->array;
2301 splay_tree_key k = &array->key;
2302 k->host_start = cur_node.host_start;
2303 k->host_end = cur_node.host_end;
2304 k->tgt = tgt;
2305 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2306 k->refcount = REFCOUNT_INFINITY;
2307 k->async_refcount = 0;
2308 array->left = NULL;
2309 array->right = NULL;
2310 splay_tree_insert (&devicep->mem_map, array);
2311 ret = 0;
2313 gomp_mutex_unlock (&devicep->lock);
2314 return ret;
2318 omp_target_disassociate_ptr (void *ptr, int device_num)
2320 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2321 return EINVAL;
2323 if (device_num < 0)
2324 return EINVAL;
2326 struct gomp_device_descr *devicep = resolve_device (device_num);
2327 if (devicep == NULL)
2328 return EINVAL;
2330 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2331 return EINVAL;
2333 gomp_mutex_lock (&devicep->lock);
2335 struct splay_tree_s *mem_map = &devicep->mem_map;
2336 struct splay_tree_key_s cur_node;
2337 int ret = EINVAL;
2339 cur_node.host_start = (uintptr_t) ptr;
2340 cur_node.host_end = cur_node.host_start;
2341 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2342 if (n
2343 && n->host_start == cur_node.host_start
2344 && n->refcount == REFCOUNT_INFINITY
2345 && n->tgt->tgt_start == 0
2346 && n->tgt->to_free == NULL
2347 && n->tgt->refcount == 1
2348 && n->tgt->list_count == 0)
2350 splay_tree_remove (&devicep->mem_map, n);
2351 gomp_unmap_tgt (n->tgt);
2352 ret = 0;
2355 gomp_mutex_unlock (&devicep->lock);
2356 return ret;
2359 #ifdef PLUGIN_SUPPORT
2361 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2362 in PLUGIN_NAME.
2363 The handles of the found functions are stored in the corresponding fields
2364 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2366 static bool
2367 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2368 const char *plugin_name)
2370 const char *err = NULL, *last_missing = NULL;
2372 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2373 if (!plugin_handle)
2374 goto dl_fail;
2376 /* Check if all required functions are available in the plugin and store
2377 their handlers. None of the symbols can legitimately be NULL,
2378 so we don't need to check dlerror all the time. */
2379 #define DLSYM(f) \
2380 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2381 goto dl_fail
2382 /* Similar, but missing functions are not an error. Return false if
2383 failed, true otherwise. */
2384 #define DLSYM_OPT(f, n) \
2385 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2386 || (last_missing = #n, 0))
2388 DLSYM (version);
2389 if (device->version_func () != GOMP_VERSION)
2391 err = "plugin version mismatch";
2392 goto fail;
2395 DLSYM (get_name);
2396 DLSYM (get_caps);
2397 DLSYM (get_type);
2398 DLSYM (get_num_devices);
2399 DLSYM (init_device);
2400 DLSYM (fini_device);
2401 DLSYM (load_image);
2402 DLSYM (unload_image);
2403 DLSYM (alloc);
2404 DLSYM (free);
2405 DLSYM (dev2host);
2406 DLSYM (host2dev);
2407 device->capabilities = device->get_caps_func ();
2408 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2410 DLSYM (run);
2411 DLSYM (async_run);
2412 DLSYM_OPT (can_run, can_run);
2413 DLSYM (dev2dev);
2415 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2417 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2418 || !DLSYM_OPT (openacc.register_async_cleanup,
2419 openacc_register_async_cleanup)
2420 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2421 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2422 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2423 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2424 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2425 || !DLSYM_OPT (openacc.async_wait_all_async,
2426 openacc_async_wait_all_async)
2427 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2428 || !DLSYM_OPT (openacc.create_thread_data,
2429 openacc_create_thread_data)
2430 || !DLSYM_OPT (openacc.destroy_thread_data,
2431 openacc_destroy_thread_data))
2433 /* Require all the OpenACC handlers if we have
2434 GOMP_OFFLOAD_CAP_OPENACC_200. */
2435 err = "plugin missing OpenACC handler function";
2436 goto fail;
2439 unsigned cuda = 0;
2440 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2441 openacc_get_current_cuda_device);
2442 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2443 openacc_get_current_cuda_context);
2444 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2445 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2446 if (cuda && cuda != 4)
2448 /* Make sure all the CUDA functions are there if any of them are. */
2449 err = "plugin missing OpenACC CUDA handler function";
2450 goto fail;
2453 #undef DLSYM
2454 #undef DLSYM_OPT
2456 return 1;
2458 dl_fail:
2459 err = dlerror ();
2460 fail:
2461 gomp_error ("while loading %s: %s", plugin_name, err);
2462 if (last_missing)
2463 gomp_error ("missing function was %s", last_missing);
2464 if (plugin_handle)
2465 dlclose (plugin_handle);
2467 return 0;
2470 /* This function finalizes all initialized devices. */
2472 static void
2473 gomp_target_fini (void)
2475 int i;
2476 for (i = 0; i < num_devices; i++)
2478 struct gomp_device_descr *devicep = &devices[i];
2479 gomp_mutex_lock (&devicep->lock);
2480 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2482 devicep->fini_device_func (devicep->target_id);
2483 devicep->state = GOMP_DEVICE_FINALIZED;
2485 gomp_mutex_unlock (&devicep->lock);
2489 /* This function initializes the runtime needed for offloading.
2490 It parses the list of offload targets and tries to load the plugins for
2491 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2492 will be set, and the array DEVICES initialized, containing descriptors for
2493 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2494 by the others. */
2496 static void
2497 gomp_target_init (void)
2499 const char *prefix ="libgomp-plugin-";
2500 const char *suffix = SONAME_SUFFIX (1);
2501 const char *cur, *next;
2502 char *plugin_name;
2503 int i, new_num_devices;
2505 num_devices = 0;
2506 devices = NULL;
2508 cur = OFFLOAD_TARGETS;
2509 if (*cur)
2512 struct gomp_device_descr current_device;
2514 next = strchr (cur, ',');
2516 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2517 + strlen (prefix) + strlen (suffix));
2518 if (!plugin_name)
2520 num_devices = 0;
2521 break;
2524 strcpy (plugin_name, prefix);
2525 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2526 strcat (plugin_name, suffix);
2528 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2530 new_num_devices = current_device.get_num_devices_func ();
2531 if (new_num_devices >= 1)
2533 /* Augment DEVICES and NUM_DEVICES. */
2535 devices = realloc (devices, (num_devices + new_num_devices)
2536 * sizeof (struct gomp_device_descr));
2537 if (!devices)
2539 num_devices = 0;
2540 free (plugin_name);
2541 break;
2544 current_device.name = current_device.get_name_func ();
2545 /* current_device.capabilities has already been set. */
2546 current_device.type = current_device.get_type_func ();
2547 current_device.mem_map.root = NULL;
2548 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2549 current_device.openacc.data_environ = NULL;
2550 for (i = 0; i < new_num_devices; i++)
2552 current_device.target_id = i;
2553 devices[num_devices] = current_device;
2554 gomp_mutex_init (&devices[num_devices].lock);
2555 num_devices++;
2560 free (plugin_name);
2561 cur = next + 1;
2563 while (next);
2565 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2566 NUM_DEVICES_OPENMP. */
2567 struct gomp_device_descr *devices_s
2568 = malloc (num_devices * sizeof (struct gomp_device_descr));
2569 if (!devices_s)
2571 num_devices = 0;
2572 free (devices);
2573 devices = NULL;
2575 num_devices_openmp = 0;
2576 for (i = 0; i < num_devices; i++)
2577 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2578 devices_s[num_devices_openmp++] = devices[i];
2579 int num_devices_after_openmp = num_devices_openmp;
2580 for (i = 0; i < num_devices; i++)
2581 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2582 devices_s[num_devices_after_openmp++] = devices[i];
2583 free (devices);
2584 devices = devices_s;
2586 for (i = 0; i < num_devices; i++)
2588 /* The 'devices' array can be moved (by the realloc call) until we have
2589 found all the plugins, so registering with the OpenACC runtime (which
2590 takes a copy of the pointer argument) must be delayed until now. */
2591 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2592 goacc_register (&devices[i]);
2595 if (atexit (gomp_target_fini) != 0)
2596 gomp_fatal ("atexit failed");
2599 #else /* PLUGIN_SUPPORT */
2600 /* If dlfcn.h is unavailable we always fallback to host execution.
2601 GOMP_target* routines are just stubs for this case. */
2602 static void
2603 gomp_target_init (void)
2606 #endif /* PLUGIN_SUPPORT */