2015-11-24 Steve Ellcey <sellcey@imgtec.com>
[official-gcc.git] / libgomp / target.c
blobcf9d0e64c7c241d9870642ee9954da3d74fb6827
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (!devices[device_id].is_initialized)
122 gomp_init_device (&devices[device_id]);
123 gomp_mutex_unlock (&devices[device_id].lock);
125 return &devices[device_id];
129 static inline splay_tree_key
130 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
132 if (key->host_start != key->host_end)
133 return splay_tree_lookup (mem_map, key);
135 key->host_end++;
136 splay_tree_key n = splay_tree_lookup (mem_map, key);
137 key->host_end--;
138 if (n)
139 return n;
140 key->host_start--;
141 n = splay_tree_lookup (mem_map, key);
142 key->host_start++;
143 if (n)
144 return n;
145 return splay_tree_lookup (mem_map, key);
148 static inline splay_tree_key
149 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
151 if (key->host_start != key->host_end)
152 return splay_tree_lookup (mem_map, key);
154 key->host_end++;
155 splay_tree_key n = splay_tree_lookup (mem_map, key);
156 key->host_end--;
157 return n;
160 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
161 gomp_map_0len_lookup found oldn for newn.
162 Helper function of gomp_map_vars. */
164 static inline void
165 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
166 splay_tree_key newn, struct target_var_desc *tgt_var,
167 unsigned char kind)
169 tgt_var->key = oldn;
170 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
171 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
172 tgt_var->offset = newn->host_start - oldn->host_start;
173 tgt_var->length = newn->host_end - newn->host_start;
175 if ((kind & GOMP_MAP_FLAG_FORCE)
176 || oldn->host_start > newn->host_start
177 || oldn->host_end < newn->host_end)
179 gomp_mutex_unlock (&devicep->lock);
180 gomp_fatal ("Trying to map into device [%p..%p) object when "
181 "[%p..%p) is already mapped",
182 (void *) newn->host_start, (void *) newn->host_end,
183 (void *) oldn->host_start, (void *) oldn->host_end);
186 if (GOMP_MAP_ALWAYS_TO_P (kind))
187 devicep->host2dev_func (devicep->target_id,
188 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
189 + newn->host_start - oldn->host_start),
190 (void *) newn->host_start,
191 newn->host_end - newn->host_start);
192 if (oldn->refcount != REFCOUNT_INFINITY)
193 oldn->refcount++;
196 static int
197 get_kind (bool short_mapkind, void *kinds, int idx)
199 return short_mapkind ? ((unsigned short *) kinds)[idx]
200 : ((unsigned char *) kinds)[idx];
203 static void
204 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
205 uintptr_t target_offset, uintptr_t bias)
207 struct gomp_device_descr *devicep = tgt->device_descr;
208 struct splay_tree_s *mem_map = &devicep->mem_map;
209 struct splay_tree_key_s cur_node;
211 cur_node.host_start = host_ptr;
212 if (cur_node.host_start == (uintptr_t) NULL)
214 cur_node.tgt_offset = (uintptr_t) NULL;
215 /* FIXME: see comment about coalescing host/dev transfers below. */
216 devicep->host2dev_func (devicep->target_id,
217 (void *) (tgt->tgt_start + target_offset),
218 (void *) &cur_node.tgt_offset,
219 sizeof (void *));
220 return;
222 /* Add bias to the pointer value. */
223 cur_node.host_start += bias;
224 cur_node.host_end = cur_node.host_start;
225 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
226 if (n == NULL)
228 gomp_mutex_unlock (&devicep->lock);
229 gomp_fatal ("Pointer target of array section wasn't mapped");
231 cur_node.host_start -= n->host_start;
232 cur_node.tgt_offset
233 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
234 /* At this point tgt_offset is target address of the
235 array section. Now subtract bias to get what we want
236 to initialize the pointer with. */
237 cur_node.tgt_offset -= bias;
238 /* FIXME: see comment about coalescing host/dev transfers below. */
239 devicep->host2dev_func (devicep->target_id,
240 (void *) (tgt->tgt_start + target_offset),
241 (void *) &cur_node.tgt_offset,
242 sizeof (void *));
245 static void
246 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
247 size_t first, size_t i, void **hostaddrs,
248 size_t *sizes, void *kinds)
250 struct gomp_device_descr *devicep = tgt->device_descr;
251 struct splay_tree_s *mem_map = &devicep->mem_map;
252 struct splay_tree_key_s cur_node;
253 int kind;
254 const bool short_mapkind = true;
255 const int typemask = short_mapkind ? 0xff : 0x7;
257 cur_node.host_start = (uintptr_t) hostaddrs[i];
258 cur_node.host_end = cur_node.host_start + sizes[i];
259 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
260 kind = get_kind (short_mapkind, kinds, i);
261 if (n2
262 && n2->tgt == n->tgt
263 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
265 gomp_map_vars_existing (devicep, n2, &cur_node,
266 &tgt->list[i], kind & typemask);
267 return;
269 if (sizes[i] == 0)
271 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
273 cur_node.host_start--;
274 n2 = splay_tree_lookup (mem_map, &cur_node);
275 cur_node.host_start++;
276 if (n2
277 && n2->tgt == n->tgt
278 && n2->host_start - n->host_start
279 == n2->tgt_offset - n->tgt_offset)
281 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
282 kind & typemask);
283 return;
286 cur_node.host_end++;
287 n2 = splay_tree_lookup (mem_map, &cur_node);
288 cur_node.host_end--;
289 if (n2
290 && n2->tgt == n->tgt
291 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
293 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
294 kind & typemask);
295 return;
298 gomp_mutex_unlock (&devicep->lock);
299 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
300 "other mapped elements from the same structure weren't mapped "
301 "together with it", (void *) cur_node.host_start,
302 (void *) cur_node.host_end);
305 static inline uintptr_t
306 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
308 if (tgt->list[i].key != NULL)
309 return tgt->list[i].key->tgt->tgt_start
310 + tgt->list[i].key->tgt_offset
311 + tgt->list[i].offset;
312 if (tgt->list[i].offset == ~(uintptr_t) 0)
313 return (uintptr_t) hostaddrs[i];
314 if (tgt->list[i].offset == ~(uintptr_t) 1)
315 return 0;
316 if (tgt->list[i].offset == ~(uintptr_t) 2)
317 return tgt->list[i + 1].key->tgt->tgt_start
318 + tgt->list[i + 1].key->tgt_offset
319 + tgt->list[i + 1].offset
320 + (uintptr_t) hostaddrs[i]
321 - (uintptr_t) hostaddrs[i + 1];
322 return tgt->tgt_start + tgt->list[i].offset;
325 attribute_hidden struct target_mem_desc *
326 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
327 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
328 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
330 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
331 bool has_firstprivate = false;
332 const int rshift = short_mapkind ? 8 : 3;
333 const int typemask = short_mapkind ? 0xff : 0x7;
334 struct splay_tree_s *mem_map = &devicep->mem_map;
335 struct splay_tree_key_s cur_node;
336 struct target_mem_desc *tgt
337 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
338 tgt->list_count = mapnum;
339 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
340 tgt->device_descr = devicep;
342 if (mapnum == 0)
344 tgt->tgt_start = 0;
345 tgt->tgt_end = 0;
346 return tgt;
349 tgt_align = sizeof (void *);
350 tgt_size = 0;
351 if (pragma_kind == GOMP_MAP_VARS_TARGET)
353 size_t align = 4 * sizeof (void *);
354 tgt_align = align;
355 tgt_size = mapnum * sizeof (void *);
358 gomp_mutex_lock (&devicep->lock);
360 for (i = 0; i < mapnum; i++)
362 int kind = get_kind (short_mapkind, kinds, i);
363 if (hostaddrs[i] == NULL
364 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
366 tgt->list[i].key = NULL;
367 tgt->list[i].offset = ~(uintptr_t) 0;
368 continue;
370 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
372 cur_node.host_start = (uintptr_t) hostaddrs[i];
373 cur_node.host_end = cur_node.host_start;
374 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
375 if (n == NULL)
377 gomp_mutex_unlock (&devicep->lock);
378 gomp_fatal ("use_device_ptr pointer wasn't mapped");
380 cur_node.host_start -= n->host_start;
381 hostaddrs[i]
382 = (void *) (n->tgt->tgt_start + n->tgt_offset
383 + cur_node.host_start);
384 tgt->list[i].key = NULL;
385 tgt->list[i].offset = ~(uintptr_t) 0;
386 continue;
388 else if ((kind & typemask) == GOMP_MAP_STRUCT)
390 size_t first = i + 1;
391 size_t last = i + sizes[i];
392 cur_node.host_start = (uintptr_t) hostaddrs[i];
393 cur_node.host_end = (uintptr_t) hostaddrs[last]
394 + sizes[last];
395 tgt->list[i].key = NULL;
396 tgt->list[i].offset = ~(uintptr_t) 2;
397 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
398 if (n == NULL)
400 size_t align = (size_t) 1 << (kind >> rshift);
401 if (tgt_align < align)
402 tgt_align = align;
403 tgt_size -= (uintptr_t) hostaddrs[first]
404 - (uintptr_t) hostaddrs[i];
405 tgt_size = (tgt_size + align - 1) & ~(align - 1);
406 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
407 not_found_cnt += last - i;
408 for (i = first; i <= last; i++)
409 tgt->list[i].key = NULL;
410 i--;
411 continue;
413 for (i = first; i <= last; i++)
414 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
415 sizes, kinds);
416 i--;
417 continue;
419 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
421 tgt->list[i].key = NULL;
422 tgt->list[i].offset = ~(uintptr_t) 1;
423 has_firstprivate = true;
424 continue;
426 cur_node.host_start = (uintptr_t) hostaddrs[i];
427 if (!GOMP_MAP_POINTER_P (kind & typemask))
428 cur_node.host_end = cur_node.host_start + sizes[i];
429 else
430 cur_node.host_end = cur_node.host_start + sizeof (void *);
431 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
433 tgt->list[i].key = NULL;
435 size_t align = (size_t) 1 << (kind >> rshift);
436 if (tgt_align < align)
437 tgt_align = align;
438 tgt_size = (tgt_size + align - 1) & ~(align - 1);
439 tgt_size += cur_node.host_end - cur_node.host_start;
440 has_firstprivate = true;
441 continue;
443 splay_tree_key n;
444 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
446 n = gomp_map_0len_lookup (mem_map, &cur_node);
447 if (!n)
449 tgt->list[i].key = NULL;
450 tgt->list[i].offset = ~(uintptr_t) 1;
451 continue;
454 else
455 n = splay_tree_lookup (mem_map, &cur_node);
456 if (n)
457 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
458 kind & typemask);
459 else
461 tgt->list[i].key = NULL;
463 size_t align = (size_t) 1 << (kind >> rshift);
464 not_found_cnt++;
465 if (tgt_align < align)
466 tgt_align = align;
467 tgt_size = (tgt_size + align - 1) & ~(align - 1);
468 tgt_size += cur_node.host_end - cur_node.host_start;
469 if ((kind & typemask) == GOMP_MAP_TO_PSET)
471 size_t j;
472 for (j = i + 1; j < mapnum; j++)
473 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
474 & typemask))
475 break;
476 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
477 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
478 > cur_node.host_end))
479 break;
480 else
482 tgt->list[j].key = NULL;
483 i++;
489 if (devaddrs)
491 if (mapnum != 1)
493 gomp_mutex_unlock (&devicep->lock);
494 gomp_fatal ("unexpected aggregation");
496 tgt->to_free = devaddrs[0];
497 tgt->tgt_start = (uintptr_t) tgt->to_free;
498 tgt->tgt_end = tgt->tgt_start + sizes[0];
500 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
502 /* Allocate tgt_align aligned tgt_size block of memory. */
503 /* FIXME: Perhaps change interface to allocate properly aligned
504 memory. */
505 tgt->to_free = devicep->alloc_func (devicep->target_id,
506 tgt_size + tgt_align - 1);
507 tgt->tgt_start = (uintptr_t) tgt->to_free;
508 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
509 tgt->tgt_end = tgt->tgt_start + tgt_size;
511 else
513 tgt->to_free = NULL;
514 tgt->tgt_start = 0;
515 tgt->tgt_end = 0;
518 tgt_size = 0;
519 if (pragma_kind == GOMP_MAP_VARS_TARGET)
520 tgt_size = mapnum * sizeof (void *);
522 tgt->array = NULL;
523 if (not_found_cnt || has_firstprivate)
525 if (not_found_cnt)
526 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
527 splay_tree_node array = tgt->array;
528 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
529 uintptr_t field_tgt_base = 0;
531 for (i = 0; i < mapnum; i++)
532 if (tgt->list[i].key == NULL)
534 int kind = get_kind (short_mapkind, kinds, i);
535 if (hostaddrs[i] == NULL)
536 continue;
537 switch (kind & typemask)
539 size_t align, len, first, last;
540 splay_tree_key n;
541 case GOMP_MAP_FIRSTPRIVATE:
542 align = (size_t) 1 << (kind >> rshift);
543 tgt_size = (tgt_size + align - 1) & ~(align - 1);
544 tgt->list[i].offset = tgt_size;
545 len = sizes[i];
546 devicep->host2dev_func (devicep->target_id,
547 (void *) (tgt->tgt_start + tgt_size),
548 (void *) hostaddrs[i], len);
549 tgt_size += len;
550 continue;
551 case GOMP_MAP_FIRSTPRIVATE_INT:
552 case GOMP_MAP_USE_DEVICE_PTR:
553 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
554 continue;
555 case GOMP_MAP_STRUCT:
556 first = i + 1;
557 last = i + sizes[i];
558 cur_node.host_start = (uintptr_t) hostaddrs[i];
559 cur_node.host_end = (uintptr_t) hostaddrs[last]
560 + sizes[last];
561 if (tgt->list[first].key != NULL)
562 continue;
563 n = splay_tree_lookup (mem_map, &cur_node);
564 if (n == NULL)
566 size_t align = (size_t) 1 << (kind >> rshift);
567 tgt_size -= (uintptr_t) hostaddrs[first]
568 - (uintptr_t) hostaddrs[i];
569 tgt_size = (tgt_size + align - 1) & ~(align - 1);
570 tgt_size += (uintptr_t) hostaddrs[first]
571 - (uintptr_t) hostaddrs[i];
572 field_tgt_base = (uintptr_t) hostaddrs[first];
573 field_tgt_offset = tgt_size;
574 field_tgt_clear = last;
575 tgt_size += cur_node.host_end
576 - (uintptr_t) hostaddrs[first];
577 continue;
579 for (i = first; i <= last; i++)
580 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
581 sizes, kinds);
582 i--;
583 continue;
584 case GOMP_MAP_ALWAYS_POINTER:
585 cur_node.host_start = (uintptr_t) hostaddrs[i];
586 cur_node.host_end = cur_node.host_start + sizeof (void *);
587 n = splay_tree_lookup (mem_map, &cur_node);
588 if (n == NULL
589 || n->host_start > cur_node.host_start
590 || n->host_end < cur_node.host_end)
592 gomp_mutex_unlock (&devicep->lock);
593 gomp_fatal ("always pointer not mapped");
595 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
596 != GOMP_MAP_ALWAYS_POINTER)
597 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
598 if (cur_node.tgt_offset)
599 cur_node.tgt_offset -= sizes[i];
600 devicep->host2dev_func (devicep->target_id,
601 (void *) (n->tgt->tgt_start
602 + n->tgt_offset
603 + cur_node.host_start
604 - n->host_start),
605 (void *) &cur_node.tgt_offset,
606 sizeof (void *));
607 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
608 + cur_node.host_start - n->host_start;
609 continue;
610 default:
611 break;
613 splay_tree_key k = &array->key;
614 k->host_start = (uintptr_t) hostaddrs[i];
615 if (!GOMP_MAP_POINTER_P (kind & typemask))
616 k->host_end = k->host_start + sizes[i];
617 else
618 k->host_end = k->host_start + sizeof (void *);
619 splay_tree_key n = splay_tree_lookup (mem_map, k);
620 if (n)
621 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
622 kind & typemask);
623 else
625 size_t align = (size_t) 1 << (kind >> rshift);
626 tgt->list[i].key = k;
627 k->tgt = tgt;
628 if (field_tgt_clear != ~(size_t) 0)
630 k->tgt_offset = k->host_start - field_tgt_base
631 + field_tgt_offset;
632 if (i == field_tgt_clear)
633 field_tgt_clear = ~(size_t) 0;
635 else
637 tgt_size = (tgt_size + align - 1) & ~(align - 1);
638 k->tgt_offset = tgt_size;
639 tgt_size += k->host_end - k->host_start;
641 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
642 tgt->list[i].always_copy_from
643 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
644 tgt->list[i].offset = 0;
645 tgt->list[i].length = k->host_end - k->host_start;
646 k->refcount = 1;
647 k->async_refcount = 0;
648 tgt->refcount++;
649 array->left = NULL;
650 array->right = NULL;
651 splay_tree_insert (mem_map, array);
652 switch (kind & typemask)
654 case GOMP_MAP_ALLOC:
655 case GOMP_MAP_FROM:
656 case GOMP_MAP_FORCE_ALLOC:
657 case GOMP_MAP_FORCE_FROM:
658 case GOMP_MAP_ALWAYS_FROM:
659 break;
660 case GOMP_MAP_TO:
661 case GOMP_MAP_TOFROM:
662 case GOMP_MAP_FORCE_TO:
663 case GOMP_MAP_FORCE_TOFROM:
664 case GOMP_MAP_ALWAYS_TO:
665 case GOMP_MAP_ALWAYS_TOFROM:
666 /* FIXME: Perhaps add some smarts, like if copying
667 several adjacent fields from host to target, use some
668 host buffer to avoid sending each var individually. */
669 devicep->host2dev_func (devicep->target_id,
670 (void *) (tgt->tgt_start
671 + k->tgt_offset),
672 (void *) k->host_start,
673 k->host_end - k->host_start);
674 break;
675 case GOMP_MAP_POINTER:
676 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
677 k->tgt_offset, sizes[i]);
678 break;
679 case GOMP_MAP_TO_PSET:
680 /* FIXME: see above FIXME comment. */
681 devicep->host2dev_func (devicep->target_id,
682 (void *) (tgt->tgt_start
683 + k->tgt_offset),
684 (void *) k->host_start,
685 k->host_end - k->host_start);
687 for (j = i + 1; j < mapnum; j++)
688 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
690 & typemask))
691 break;
692 else if ((uintptr_t) hostaddrs[j] < k->host_start
693 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
694 > k->host_end))
695 break;
696 else
698 tgt->list[j].key = k;
699 tgt->list[j].copy_from = false;
700 tgt->list[j].always_copy_from = false;
701 if (k->refcount != REFCOUNT_INFINITY)
702 k->refcount++;
703 gomp_map_pointer (tgt,
704 (uintptr_t) *(void **) hostaddrs[j],
705 k->tgt_offset
706 + ((uintptr_t) hostaddrs[j]
707 - k->host_start),
708 sizes[j]);
709 i++;
711 break;
712 case GOMP_MAP_FORCE_PRESENT:
714 /* We already looked up the memory region above and it
715 was missing. */
716 size_t size = k->host_end - k->host_start;
717 gomp_mutex_unlock (&devicep->lock);
718 #ifdef HAVE_INTTYPES_H
719 gomp_fatal ("present clause: !acc_is_present (%p, "
720 "%"PRIu64" (0x%"PRIx64"))",
721 (void *) k->host_start,
722 (uint64_t) size, (uint64_t) size);
723 #else
724 gomp_fatal ("present clause: !acc_is_present (%p, "
725 "%lu (0x%lx))", (void *) k->host_start,
726 (unsigned long) size, (unsigned long) size);
727 #endif
729 break;
730 case GOMP_MAP_FORCE_DEVICEPTR:
731 assert (k->host_end - k->host_start == sizeof (void *));
733 devicep->host2dev_func (devicep->target_id,
734 (void *) (tgt->tgt_start
735 + k->tgt_offset),
736 (void *) k->host_start,
737 sizeof (void *));
738 break;
739 default:
740 gomp_mutex_unlock (&devicep->lock);
741 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
742 kind);
744 array++;
749 if (pragma_kind == GOMP_MAP_VARS_TARGET)
751 for (i = 0; i < mapnum; i++)
753 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
754 /* FIXME: see above FIXME comment. */
755 devicep->host2dev_func (devicep->target_id,
756 (void *) (tgt->tgt_start
757 + i * sizeof (void *)),
758 (void *) &cur_node.tgt_offset,
759 sizeof (void *));
763 /* If the variable from "omp target enter data" map-list was already mapped,
764 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
765 gomp_exit_data. */
766 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
768 free (tgt);
769 tgt = NULL;
772 gomp_mutex_unlock (&devicep->lock);
773 return tgt;
776 static void
777 gomp_unmap_tgt (struct target_mem_desc *tgt)
779 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
780 if (tgt->tgt_end)
781 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
783 free (tgt->array);
784 free (tgt);
787 /* Decrease the refcount for a set of mapped variables, and queue asychronous
788 copies from the device back to the host after any work that has been issued.
789 Because the regions are still "live", increment an asynchronous reference
790 count to indicate that they should not be unmapped from host-side data
791 structures until the asynchronous copy has completed. */
793 attribute_hidden void
794 gomp_copy_from_async (struct target_mem_desc *tgt)
796 struct gomp_device_descr *devicep = tgt->device_descr;
797 size_t i;
799 gomp_mutex_lock (&devicep->lock);
801 for (i = 0; i < tgt->list_count; i++)
802 if (tgt->list[i].key == NULL)
804 else if (tgt->list[i].key->refcount > 1)
806 tgt->list[i].key->refcount--;
807 tgt->list[i].key->async_refcount++;
809 else
811 splay_tree_key k = tgt->list[i].key;
812 if (tgt->list[i].copy_from)
813 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
814 (void *) (k->tgt->tgt_start + k->tgt_offset),
815 k->host_end - k->host_start);
818 gomp_mutex_unlock (&devicep->lock);
821 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
822 variables back from device to host: if it is false, it is assumed that this
823 has been done already, i.e. by gomp_copy_from_async above. */
825 attribute_hidden void
826 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
828 struct gomp_device_descr *devicep = tgt->device_descr;
830 if (tgt->list_count == 0)
832 free (tgt);
833 return;
836 gomp_mutex_lock (&devicep->lock);
838 size_t i;
839 for (i = 0; i < tgt->list_count; i++)
841 splay_tree_key k = tgt->list[i].key;
842 if (k == NULL)
843 continue;
845 bool do_unmap = false;
846 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
847 k->refcount--;
848 else if (k->refcount == 1)
850 if (k->async_refcount > 0)
851 k->async_refcount--;
852 else
854 k->refcount--;
855 do_unmap = true;
859 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
860 || tgt->list[i].always_copy_from)
861 devicep->dev2host_func (devicep->target_id,
862 (void *) (k->host_start + tgt->list[i].offset),
863 (void *) (k->tgt->tgt_start + k->tgt_offset
864 + tgt->list[i].offset),
865 tgt->list[i].length);
866 if (do_unmap)
868 splay_tree_remove (&devicep->mem_map, k);
869 if (k->tgt->refcount > 1)
870 k->tgt->refcount--;
871 else
872 gomp_unmap_tgt (k->tgt);
876 if (tgt->refcount > 1)
877 tgt->refcount--;
878 else
879 gomp_unmap_tgt (tgt);
881 gomp_mutex_unlock (&devicep->lock);
884 static void
885 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
886 size_t *sizes, void *kinds, bool short_mapkind)
888 size_t i;
889 struct splay_tree_key_s cur_node;
890 const int typemask = short_mapkind ? 0xff : 0x7;
892 if (!devicep)
893 return;
895 if (mapnum == 0)
896 return;
898 gomp_mutex_lock (&devicep->lock);
899 for (i = 0; i < mapnum; i++)
900 if (sizes[i])
902 cur_node.host_start = (uintptr_t) hostaddrs[i];
903 cur_node.host_end = cur_node.host_start + sizes[i];
904 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
905 if (n)
907 int kind = get_kind (short_mapkind, kinds, i);
908 if (n->host_start > cur_node.host_start
909 || n->host_end < cur_node.host_end)
911 gomp_mutex_unlock (&devicep->lock);
912 gomp_fatal ("Trying to update [%p..%p) object when "
913 "only [%p..%p) is mapped",
914 (void *) cur_node.host_start,
915 (void *) cur_node.host_end,
916 (void *) n->host_start,
917 (void *) n->host_end);
919 if (GOMP_MAP_COPY_TO_P (kind & typemask))
920 devicep->host2dev_func (devicep->target_id,
921 (void *) (n->tgt->tgt_start
922 + n->tgt_offset
923 + cur_node.host_start
924 - n->host_start),
925 (void *) cur_node.host_start,
926 cur_node.host_end - cur_node.host_start);
927 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
928 devicep->dev2host_func (devicep->target_id,
929 (void *) cur_node.host_start,
930 (void *) (n->tgt->tgt_start
931 + n->tgt_offset
932 + cur_node.host_start
933 - n->host_start),
934 cur_node.host_end - cur_node.host_start);
937 gomp_mutex_unlock (&devicep->lock);
940 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
941 And insert to splay tree the mapping between addresses from HOST_TABLE and
942 from loaded target image. We rely in the host and device compiler
943 emitting variable and functions in the same order. */
945 static void
946 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
947 const void *host_table, const void *target_data,
948 bool is_register_lock)
950 void **host_func_table = ((void ***) host_table)[0];
951 void **host_funcs_end = ((void ***) host_table)[1];
952 void **host_var_table = ((void ***) host_table)[2];
953 void **host_vars_end = ((void ***) host_table)[3];
955 /* The func table contains only addresses, the var table contains addresses
956 and corresponding sizes. */
957 int num_funcs = host_funcs_end - host_func_table;
958 int num_vars = (host_vars_end - host_var_table) / 2;
960 /* Load image to device and get target addresses for the image. */
961 struct addr_pair *target_table = NULL;
962 int i, num_target_entries;
964 num_target_entries
965 = devicep->load_image_func (devicep->target_id, version,
966 target_data, &target_table);
968 if (num_target_entries != num_funcs + num_vars)
970 gomp_mutex_unlock (&devicep->lock);
971 if (is_register_lock)
972 gomp_mutex_unlock (&register_lock);
973 gomp_fatal ("Cannot map target functions or variables"
974 " (expected %u, have %u)", num_funcs + num_vars,
975 num_target_entries);
978 /* Insert host-target address mapping into splay tree. */
979 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
980 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
981 tgt->refcount = REFCOUNT_INFINITY;
982 tgt->tgt_start = 0;
983 tgt->tgt_end = 0;
984 tgt->to_free = NULL;
985 tgt->prev = NULL;
986 tgt->list_count = 0;
987 tgt->device_descr = devicep;
988 splay_tree_node array = tgt->array;
990 for (i = 0; i < num_funcs; i++)
992 splay_tree_key k = &array->key;
993 k->host_start = (uintptr_t) host_func_table[i];
994 k->host_end = k->host_start + 1;
995 k->tgt = tgt;
996 k->tgt_offset = target_table[i].start;
997 k->refcount = REFCOUNT_INFINITY;
998 k->async_refcount = 0;
999 array->left = NULL;
1000 array->right = NULL;
1001 splay_tree_insert (&devicep->mem_map, array);
1002 array++;
1005 for (i = 0; i < num_vars; i++)
1007 struct addr_pair *target_var = &target_table[num_funcs + i];
1008 if (target_var->end - target_var->start
1009 != (uintptr_t) host_var_table[i * 2 + 1])
1011 gomp_mutex_unlock (&devicep->lock);
1012 if (is_register_lock)
1013 gomp_mutex_unlock (&register_lock);
1014 gomp_fatal ("Can't map target variables (size mismatch)");
1017 splay_tree_key k = &array->key;
1018 k->host_start = (uintptr_t) host_var_table[i * 2];
1019 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
1020 k->tgt = tgt;
1021 k->tgt_offset = target_var->start;
1022 k->refcount = REFCOUNT_INFINITY;
1023 k->async_refcount = 0;
1024 array->left = NULL;
1025 array->right = NULL;
1026 splay_tree_insert (&devicep->mem_map, array);
1027 array++;
1030 free (target_table);
1033 /* Unload the mappings described by target_data from device DEVICE_P.
1034 The device must be locked. */
1036 static void
1037 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1038 unsigned version,
1039 const void *host_table, const void *target_data)
1041 void **host_func_table = ((void ***) host_table)[0];
1042 void **host_funcs_end = ((void ***) host_table)[1];
1043 void **host_var_table = ((void ***) host_table)[2];
1044 void **host_vars_end = ((void ***) host_table)[3];
1046 /* The func table contains only addresses, the var table contains addresses
1047 and corresponding sizes. */
1048 int num_funcs = host_funcs_end - host_func_table;
1049 int num_vars = (host_vars_end - host_var_table) / 2;
1051 unsigned j;
1052 struct splay_tree_key_s k;
1053 splay_tree_key node = NULL;
1055 /* Find mapping at start of node array */
1056 if (num_funcs || num_vars)
1058 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1059 : (uintptr_t) host_var_table[0]);
1060 k.host_end = k.host_start + 1;
1061 node = splay_tree_lookup (&devicep->mem_map, &k);
1064 devicep->unload_image_func (devicep->target_id, version, target_data);
1066 /* Remove mappings from splay tree. */
1067 for (j = 0; j < num_funcs; j++)
1069 k.host_start = (uintptr_t) host_func_table[j];
1070 k.host_end = k.host_start + 1;
1071 splay_tree_remove (&devicep->mem_map, &k);
1074 for (j = 0; j < num_vars; j++)
1076 k.host_start = (uintptr_t) host_var_table[j * 2];
1077 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
1078 splay_tree_remove (&devicep->mem_map, &k);
1081 if (node)
1083 free (node->tgt);
1084 free (node);
1088 /* This function should be called from every offload image while loading.
1089 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1090 the target, and TARGET_DATA needed by target plugin. */
1092 void
1093 GOMP_offload_register_ver (unsigned version, const void *host_table,
1094 int target_type, const void *target_data)
1096 int i;
1098 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1099 gomp_fatal ("Library too old for offload (version %u < %u)",
1100 GOMP_VERSION, GOMP_VERSION_LIB (version));
1102 gomp_mutex_lock (&register_lock);
1104 /* Load image to all initialized devices. */
1105 for (i = 0; i < num_devices; i++)
1107 struct gomp_device_descr *devicep = &devices[i];
1108 gomp_mutex_lock (&devicep->lock);
1109 if (devicep->type == target_type && devicep->is_initialized)
1110 gomp_load_image_to_device (devicep, version,
1111 host_table, target_data, true);
1112 gomp_mutex_unlock (&devicep->lock);
1115 /* Insert image to array of pending images. */
1116 offload_images
1117 = gomp_realloc_unlock (offload_images,
1118 (num_offload_images + 1)
1119 * sizeof (struct offload_image_descr));
1120 offload_images[num_offload_images].version = version;
1121 offload_images[num_offload_images].type = target_type;
1122 offload_images[num_offload_images].host_table = host_table;
1123 offload_images[num_offload_images].target_data = target_data;
1125 num_offload_images++;
1126 gomp_mutex_unlock (&register_lock);
1129 void
1130 GOMP_offload_register (const void *host_table, int target_type,
1131 const void *target_data)
1133 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1136 /* This function should be called from every offload image while unloading.
1137 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1138 the target, and TARGET_DATA needed by target plugin. */
1140 void
1141 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1142 int target_type, const void *target_data)
1144 int i;
1146 gomp_mutex_lock (&register_lock);
1148 /* Unload image from all initialized devices. */
1149 for (i = 0; i < num_devices; i++)
1151 struct gomp_device_descr *devicep = &devices[i];
1152 gomp_mutex_lock (&devicep->lock);
1153 if (devicep->type == target_type && devicep->is_initialized)
1154 gomp_unload_image_from_device (devicep, version,
1155 host_table, target_data);
1156 gomp_mutex_unlock (&devicep->lock);
1159 /* Remove image from array of pending images. */
1160 for (i = 0; i < num_offload_images; i++)
1161 if (offload_images[i].target_data == target_data)
1163 offload_images[i] = offload_images[--num_offload_images];
1164 break;
1167 gomp_mutex_unlock (&register_lock);
1170 void
1171 GOMP_offload_unregister (const void *host_table, int target_type,
1172 const void *target_data)
1174 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1177 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1178 must be locked on entry, and remains locked on return. */
1180 attribute_hidden void
1181 gomp_init_device (struct gomp_device_descr *devicep)
1183 int i;
1184 devicep->init_device_func (devicep->target_id);
1186 /* Load to device all images registered by the moment. */
1187 for (i = 0; i < num_offload_images; i++)
1189 struct offload_image_descr *image = &offload_images[i];
1190 if (image->type == devicep->type)
1191 gomp_load_image_to_device (devicep, image->version,
1192 image->host_table, image->target_data,
1193 false);
1196 devicep->is_initialized = true;
1199 attribute_hidden void
1200 gomp_unload_device (struct gomp_device_descr *devicep)
1202 if (devicep->is_initialized)
1204 unsigned i;
1206 /* Unload from device all images registered at the moment. */
1207 for (i = 0; i < num_offload_images; i++)
1209 struct offload_image_descr *image = &offload_images[i];
1210 if (image->type == devicep->type)
1211 gomp_unload_image_from_device (devicep, image->version,
1212 image->host_table,
1213 image->target_data);
1218 /* Free address mapping tables. MM must be locked on entry, and remains locked
1219 on return. */
1221 attribute_hidden void
1222 gomp_free_memmap (struct splay_tree_s *mem_map)
1224 while (mem_map->root)
1226 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1228 splay_tree_remove (mem_map, &mem_map->root->key);
1229 free (tgt->array);
1230 free (tgt);
1234 /* This function de-initializes the target device, specified by DEVICEP.
1235 DEVICEP must be locked on entry, and remains locked on return. */
1237 attribute_hidden void
1238 gomp_fini_device (struct gomp_device_descr *devicep)
1240 if (devicep->is_initialized)
1241 devicep->fini_device_func (devicep->target_id);
1243 devicep->is_initialized = false;
1246 /* Host fallback for GOMP_target{,_ext} routines. */
1248 static void
1249 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1251 struct gomp_thread old_thr, *thr = gomp_thread ();
1252 old_thr = *thr;
1253 memset (thr, '\0', sizeof (*thr));
1254 if (gomp_places_list)
1256 thr->place = old_thr.place;
1257 thr->ts.place_partition_len = gomp_places_list_len;
1259 fn (hostaddrs);
1260 gomp_free_thread (thr);
1261 *thr = old_thr;
1264 /* Host fallback with firstprivate map-type handling. */
1266 static void
1267 gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
1268 void **hostaddrs, size_t *sizes,
1269 unsigned short *kinds)
1271 size_t i, tgt_align = 0, tgt_size = 0;
1272 char *tgt = NULL;
1273 for (i = 0; i < mapnum; i++)
1274 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1276 size_t align = (size_t) 1 << (kinds[i] >> 8);
1277 if (tgt_align < align)
1278 tgt_align = align;
1279 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1280 tgt_size += sizes[i];
1282 if (tgt_align)
1284 tgt = gomp_alloca (tgt_size + tgt_align - 1);
1285 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1286 if (al)
1287 tgt += tgt_align - al;
1288 tgt_size = 0;
1289 for (i = 0; i < mapnum; i++)
1290 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1292 size_t align = (size_t) 1 << (kinds[i] >> 8);
1293 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1294 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1295 hostaddrs[i] = tgt + tgt_size;
1296 tgt_size = tgt_size + sizes[i];
1299 gomp_target_fallback (fn, hostaddrs);
1302 /* Helper function of GOMP_target{,_ext} routines. */
1304 static void *
1305 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1306 void (*host_fn) (void *))
1308 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1309 return (void *) host_fn;
1310 else
1312 gomp_mutex_lock (&devicep->lock);
1313 struct splay_tree_key_s k;
1314 k.host_start = (uintptr_t) host_fn;
1315 k.host_end = k.host_start + 1;
1316 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1317 gomp_mutex_unlock (&devicep->lock);
1318 if (tgt_fn == NULL)
1319 gomp_fatal ("Target function wasn't mapped");
1321 return (void *) tgt_fn->tgt_offset;
1325 /* Called when encountering a target directive. If DEVICE
1326 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1327 GOMP_DEVICE_HOST_FALLBACK (or any value
1328 larger than last available hw device), use host fallback.
1329 FN is address of host code, UNUSED is part of the current ABI, but
1330 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1331 with MAPNUM entries, with addresses of the host objects,
1332 sizes of the host objects (resp. for pointer kind pointer bias
1333 and assumed sizeof (void *) size) and kinds. */
1335 void
1336 GOMP_target (int device, void (*fn) (void *), const void *unused,
1337 size_t mapnum, void **hostaddrs, size_t *sizes,
1338 unsigned char *kinds)
1340 struct gomp_device_descr *devicep = resolve_device (device);
1342 if (devicep == NULL
1343 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1344 return gomp_target_fallback (fn, hostaddrs);
1346 void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
1348 struct target_mem_desc *tgt_vars
1349 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1350 GOMP_MAP_VARS_TARGET);
1351 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1352 gomp_unmap_vars (tgt_vars, true);
1355 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1356 and several arguments have been added:
1357 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1358 DEPEND is array of dependencies, see GOMP_task for details.
1359 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1360 that value, or 1 if teams construct is not present, or 0, if
1361 teams construct does not have num_teams clause and so the choice is
1362 implementation defined, and -1 if it can't be determined on the host
1363 what value will GOMP_teams have on the device.
1364 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1365 body with that value, or 0, if teams construct does not have thread_limit
1366 clause or the teams construct is not present, or -1 if it can't be
1367 determined on the host what value will GOMP_teams have on the device. */
1369 void
1370 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1371 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1372 unsigned int flags, void **depend, int num_teams,
1373 int thread_limit)
1375 struct gomp_device_descr *devicep = resolve_device (device);
1377 (void) num_teams;
1378 (void) thread_limit;
1380 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1382 struct gomp_thread *thr = gomp_thread ();
1383 /* Create a team if we don't have any around, as nowait
1384 target tasks make sense to run asynchronously even when
1385 outside of any parallel. */
1386 if (__builtin_expect (thr->ts.team == NULL, 0))
1388 struct gomp_team *team = gomp_new_team (1);
1389 struct gomp_task *task = thr->task;
1390 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1391 team->prev_ts = thr->ts;
1392 thr->ts.team = team;
1393 thr->ts.team_id = 0;
1394 thr->ts.work_share = &team->work_shares[0];
1395 thr->ts.last_work_share = NULL;
1396 #ifdef HAVE_SYNC_BUILTINS
1397 thr->ts.single_count = 0;
1398 #endif
1399 thr->ts.static_trip = 0;
1400 thr->task = &team->implicit_task[0];
1401 gomp_init_task (thr->task, NULL, icv);
1402 if (task)
1404 thr->task = task;
1405 gomp_end_task ();
1406 free (task);
1407 thr->task = &team->implicit_task[0];
1409 else
1410 pthread_setspecific (gomp_thread_destructor, thr);
1412 if (thr->ts.team
1413 && !thr->task->final_task)
1415 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1416 sizes, kinds, flags, depend,
1417 GOMP_TARGET_TASK_BEFORE_MAP);
1418 return;
1422 /* If there are depend clauses, but nowait is not present
1423 (or we are in a final task), block the parent task until the
1424 dependencies are resolved and then just continue with the rest
1425 of the function as if it is a merged task. */
1426 if (depend != NULL)
1428 struct gomp_thread *thr = gomp_thread ();
1429 if (thr->task && thr->task->depend_hash)
1430 gomp_task_maybe_wait_for_dependencies (depend);
1433 if (devicep == NULL
1434 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1436 gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
1437 return;
1440 void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
1442 struct target_mem_desc *tgt_vars
1443 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1444 GOMP_MAP_VARS_TARGET);
1445 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1446 gomp_unmap_vars (tgt_vars, true);
1449 /* Host fallback for GOMP_target_data{,_ext} routines. */
1451 static void
1452 gomp_target_data_fallback (void)
1454 struct gomp_task_icv *icv = gomp_icv (false);
1455 if (icv->target_data)
1457 /* Even when doing a host fallback, if there are any active
1458 #pragma omp target data constructs, need to remember the
1459 new #pragma omp target data, otherwise GOMP_target_end_data
1460 would get out of sync. */
1461 struct target_mem_desc *tgt
1462 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1463 GOMP_MAP_VARS_DATA);
1464 tgt->prev = icv->target_data;
1465 icv->target_data = tgt;
1469 void
1470 GOMP_target_data (int device, const void *unused, size_t mapnum,
1471 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1473 struct gomp_device_descr *devicep = resolve_device (device);
1475 if (devicep == NULL
1476 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1477 return gomp_target_data_fallback ();
1479 struct target_mem_desc *tgt
1480 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1481 GOMP_MAP_VARS_DATA);
1482 struct gomp_task_icv *icv = gomp_icv (true);
1483 tgt->prev = icv->target_data;
1484 icv->target_data = tgt;
1487 void
1488 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1489 size_t *sizes, unsigned short *kinds)
1491 struct gomp_device_descr *devicep = resolve_device (device);
1493 if (devicep == NULL
1494 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1495 return gomp_target_data_fallback ();
1497 struct target_mem_desc *tgt
1498 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1499 GOMP_MAP_VARS_DATA);
1500 struct gomp_task_icv *icv = gomp_icv (true);
1501 tgt->prev = icv->target_data;
1502 icv->target_data = tgt;
1505 void
1506 GOMP_target_end_data (void)
1508 struct gomp_task_icv *icv = gomp_icv (false);
1509 if (icv->target_data)
1511 struct target_mem_desc *tgt = icv->target_data;
1512 icv->target_data = tgt->prev;
1513 gomp_unmap_vars (tgt, true);
1517 void
1518 GOMP_target_update (int device, const void *unused, size_t mapnum,
1519 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1521 struct gomp_device_descr *devicep = resolve_device (device);
1523 if (devicep == NULL
1524 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1525 return;
1527 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1530 void
1531 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1532 size_t *sizes, unsigned short *kinds,
1533 unsigned int flags, void **depend)
1535 struct gomp_device_descr *devicep = resolve_device (device);
1537 /* If there are depend clauses, but nowait is not present,
1538 block the parent task until the dependencies are resolved
1539 and then just continue with the rest of the function as if it
1540 is a merged task. Until we are able to schedule task during
1541 variable mapping or unmapping, ignore nowait if depend clauses
1542 are not present. */
1543 if (depend != NULL)
1545 struct gomp_thread *thr = gomp_thread ();
1546 if (thr->task && thr->task->depend_hash)
1548 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1549 && thr->ts.team
1550 && !thr->task->final_task)
1552 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1553 mapnum, hostaddrs, sizes, kinds,
1554 flags | GOMP_TARGET_FLAG_UPDATE,
1555 depend, GOMP_TARGET_TASK_DATA))
1556 return;
1558 else
1560 struct gomp_team *team = thr->ts.team;
1561 /* If parallel or taskgroup has been cancelled, don't start new
1562 tasks. */
1563 if (team
1564 && (gomp_team_barrier_cancelled (&team->barrier)
1565 || (thr->task->taskgroup
1566 && thr->task->taskgroup->cancelled)))
1567 return;
1569 gomp_task_maybe_wait_for_dependencies (depend);
1574 if (devicep == NULL
1575 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1576 return;
1578 struct gomp_thread *thr = gomp_thread ();
1579 struct gomp_team *team = thr->ts.team;
1580 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1581 if (team
1582 && (gomp_team_barrier_cancelled (&team->barrier)
1583 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1584 return;
1586 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1589 static void
1590 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1591 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1593 const int typemask = 0xff;
1594 size_t i;
1595 gomp_mutex_lock (&devicep->lock);
1596 for (i = 0; i < mapnum; i++)
1598 struct splay_tree_key_s cur_node;
1599 unsigned char kind = kinds[i] & typemask;
1600 switch (kind)
1602 case GOMP_MAP_FROM:
1603 case GOMP_MAP_ALWAYS_FROM:
1604 case GOMP_MAP_DELETE:
1605 case GOMP_MAP_RELEASE:
1606 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1607 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1608 cur_node.host_start = (uintptr_t) hostaddrs[i];
1609 cur_node.host_end = cur_node.host_start + sizes[i];
1610 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1611 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1612 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1613 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1614 if (!k)
1615 continue;
1617 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1618 k->refcount--;
1619 if ((kind == GOMP_MAP_DELETE
1620 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1621 && k->refcount != REFCOUNT_INFINITY)
1622 k->refcount = 0;
1624 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1625 || kind == GOMP_MAP_ALWAYS_FROM)
1626 devicep->dev2host_func (devicep->target_id,
1627 (void *) cur_node.host_start,
1628 (void *) (k->tgt->tgt_start + k->tgt_offset
1629 + cur_node.host_start
1630 - k->host_start),
1631 cur_node.host_end - cur_node.host_start);
1632 if (k->refcount == 0)
1634 splay_tree_remove (&devicep->mem_map, k);
1635 if (k->tgt->refcount > 1)
1636 k->tgt->refcount--;
1637 else
1638 gomp_unmap_tgt (k->tgt);
1641 break;
1642 default:
1643 gomp_mutex_unlock (&devicep->lock);
1644 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1645 kind);
1649 gomp_mutex_unlock (&devicep->lock);
1652 void
1653 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1654 size_t *sizes, unsigned short *kinds,
1655 unsigned int flags, void **depend)
1657 struct gomp_device_descr *devicep = resolve_device (device);
1659 /* If there are depend clauses, but nowait is not present,
1660 block the parent task until the dependencies are resolved
1661 and then just continue with the rest of the function as if it
1662 is a merged task. Until we are able to schedule task during
1663 variable mapping or unmapping, ignore nowait if depend clauses
1664 are not present. */
1665 if (depend != NULL)
1667 struct gomp_thread *thr = gomp_thread ();
1668 if (thr->task && thr->task->depend_hash)
1670 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1671 && thr->ts.team
1672 && !thr->task->final_task)
1674 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1675 mapnum, hostaddrs, sizes, kinds,
1676 flags, depend,
1677 GOMP_TARGET_TASK_DATA))
1678 return;
1680 else
1682 struct gomp_team *team = thr->ts.team;
1683 /* If parallel or taskgroup has been cancelled, don't start new
1684 tasks. */
1685 if (team
1686 && (gomp_team_barrier_cancelled (&team->barrier)
1687 || (thr->task->taskgroup
1688 && thr->task->taskgroup->cancelled)))
1689 return;
1691 gomp_task_maybe_wait_for_dependencies (depend);
1696 if (devicep == NULL
1697 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1698 return;
1700 struct gomp_thread *thr = gomp_thread ();
1701 struct gomp_team *team = thr->ts.team;
1702 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1703 if (team
1704 && (gomp_team_barrier_cancelled (&team->barrier)
1705 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1706 return;
1708 size_t i;
1709 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1710 for (i = 0; i < mapnum; i++)
1711 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1713 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1714 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1715 i += sizes[i];
1717 else
1718 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1719 true, GOMP_MAP_VARS_ENTER_DATA);
1720 else
1721 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1724 bool
1725 gomp_target_task_fn (void *data)
1727 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1728 struct gomp_device_descr *devicep = ttask->devicep;
1730 if (ttask->fn != NULL)
1732 if (devicep == NULL
1733 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1735 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1736 gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
1737 ttask->hostaddrs, ttask->sizes,
1738 ttask->kinds);
1739 return false;
1742 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1744 gomp_unmap_vars (ttask->tgt, true);
1745 return false;
1748 void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
1749 ttask->tgt
1750 = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
1751 ttask->sizes, ttask->kinds, true,
1752 GOMP_MAP_VARS_TARGET);
1753 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1755 devicep->async_run_func (devicep->target_id, fn_addr,
1756 (void *) ttask->tgt->tgt_start, (void *) ttask);
1757 return true;
1759 else if (devicep == NULL
1760 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1761 return false;
1763 size_t i;
1764 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1765 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1766 ttask->kinds, true);
1767 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1768 for (i = 0; i < ttask->mapnum; i++)
1769 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1771 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1772 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1773 GOMP_MAP_VARS_ENTER_DATA);
1774 i += ttask->sizes[i];
1776 else
1777 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1778 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1779 else
1780 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1781 ttask->kinds);
1782 return false;
1785 void
1786 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1788 if (thread_limit)
1790 struct gomp_task_icv *icv = gomp_icv (true);
1791 icv->thread_limit_var
1792 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1794 (void) num_teams;
1797 void *
1798 omp_target_alloc (size_t size, int device_num)
1800 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1801 return malloc (size);
1803 if (device_num < 0)
1804 return NULL;
1806 struct gomp_device_descr *devicep = resolve_device (device_num);
1807 if (devicep == NULL)
1808 return NULL;
1810 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1811 return malloc (size);
1813 gomp_mutex_lock (&devicep->lock);
1814 void *ret = devicep->alloc_func (devicep->target_id, size);
1815 gomp_mutex_unlock (&devicep->lock);
1816 return ret;
1819 void
1820 omp_target_free (void *device_ptr, int device_num)
1822 if (device_ptr == NULL)
1823 return;
1825 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1827 free (device_ptr);
1828 return;
1831 if (device_num < 0)
1832 return;
1834 struct gomp_device_descr *devicep = resolve_device (device_num);
1835 if (devicep == NULL)
1836 return;
1838 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1840 free (device_ptr);
1841 return;
1844 gomp_mutex_lock (&devicep->lock);
1845 devicep->free_func (devicep->target_id, device_ptr);
1846 gomp_mutex_unlock (&devicep->lock);
1850 omp_target_is_present (void *ptr, int device_num)
1852 if (ptr == NULL)
1853 return 1;
1855 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1856 return 1;
1858 if (device_num < 0)
1859 return 0;
1861 struct gomp_device_descr *devicep = resolve_device (device_num);
1862 if (devicep == NULL)
1863 return 0;
1865 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1866 return 1;
1868 gomp_mutex_lock (&devicep->lock);
1869 struct splay_tree_s *mem_map = &devicep->mem_map;
1870 struct splay_tree_key_s cur_node;
1872 cur_node.host_start = (uintptr_t) ptr;
1873 cur_node.host_end = cur_node.host_start;
1874 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
1875 int ret = n != NULL;
1876 gomp_mutex_unlock (&devicep->lock);
1877 return ret;
1881 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
1882 size_t src_offset, int dst_device_num, int src_device_num)
1884 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
1886 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
1888 if (dst_device_num < 0)
1889 return EINVAL;
1891 dst_devicep = resolve_device (dst_device_num);
1892 if (dst_devicep == NULL)
1893 return EINVAL;
1895 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1896 dst_devicep = NULL;
1898 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
1900 if (src_device_num < 0)
1901 return EINVAL;
1903 src_devicep = resolve_device (src_device_num);
1904 if (src_devicep == NULL)
1905 return EINVAL;
1907 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1908 src_devicep = NULL;
1910 if (src_devicep == NULL && dst_devicep == NULL)
1912 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
1913 return 0;
1915 if (src_devicep == NULL)
1917 gomp_mutex_lock (&dst_devicep->lock);
1918 dst_devicep->host2dev_func (dst_devicep->target_id,
1919 (char *) dst + dst_offset,
1920 (char *) src + src_offset, length);
1921 gomp_mutex_unlock (&dst_devicep->lock);
1922 return 0;
1924 if (dst_devicep == NULL)
1926 gomp_mutex_lock (&src_devicep->lock);
1927 src_devicep->dev2host_func (src_devicep->target_id,
1928 (char *) dst + dst_offset,
1929 (char *) src + src_offset, length);
1930 gomp_mutex_unlock (&src_devicep->lock);
1931 return 0;
1933 if (src_devicep == dst_devicep)
1935 gomp_mutex_lock (&src_devicep->lock);
1936 src_devicep->dev2dev_func (src_devicep->target_id,
1937 (char *) dst + dst_offset,
1938 (char *) src + src_offset, length);
1939 gomp_mutex_unlock (&src_devicep->lock);
1940 return 0;
1942 return EINVAL;
1945 static int
1946 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
1947 int num_dims, const size_t *volume,
1948 const size_t *dst_offsets,
1949 const size_t *src_offsets,
1950 const size_t *dst_dimensions,
1951 const size_t *src_dimensions,
1952 struct gomp_device_descr *dst_devicep,
1953 struct gomp_device_descr *src_devicep)
1955 size_t dst_slice = element_size;
1956 size_t src_slice = element_size;
1957 size_t j, dst_off, src_off, length;
1958 int i, ret;
1960 if (num_dims == 1)
1962 if (__builtin_mul_overflow (element_size, volume[0], &length)
1963 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
1964 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
1965 return EINVAL;
1966 if (dst_devicep == NULL && src_devicep == NULL)
1967 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
1968 else if (src_devicep == NULL)
1969 dst_devicep->host2dev_func (dst_devicep->target_id,
1970 (char *) dst + dst_off,
1971 (char *) src + src_off, length);
1972 else if (dst_devicep == NULL)
1973 src_devicep->dev2host_func (src_devicep->target_id,
1974 (char *) dst + dst_off,
1975 (char *) src + src_off, length);
1976 else if (src_devicep == dst_devicep)
1977 src_devicep->dev2dev_func (src_devicep->target_id,
1978 (char *) dst + dst_off,
1979 (char *) src + src_off, length);
1980 else
1981 return EINVAL;
1982 return 0;
1985 /* FIXME: it would be nice to have some plugin function to handle
1986 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
1987 be handled in the generic recursion below, and for host-host it
1988 should be used even for any num_dims >= 2. */
1990 for (i = 1; i < num_dims; i++)
1991 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
1992 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
1993 return EINVAL;
1994 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
1995 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
1996 return EINVAL;
1997 for (j = 0; j < volume[0]; j++)
1999 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2000 (char *) src + src_off,
2001 element_size, num_dims - 1,
2002 volume + 1, dst_offsets + 1,
2003 src_offsets + 1, dst_dimensions + 1,
2004 src_dimensions + 1, dst_devicep,
2005 src_devicep);
2006 if (ret)
2007 return ret;
2008 dst_off += dst_slice;
2009 src_off += src_slice;
2011 return 0;
2015 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2016 int num_dims, const size_t *volume,
2017 const size_t *dst_offsets,
2018 const size_t *src_offsets,
2019 const size_t *dst_dimensions,
2020 const size_t *src_dimensions,
2021 int dst_device_num, int src_device_num)
2023 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2025 if (!dst && !src)
2026 return INT_MAX;
2028 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2030 if (dst_device_num < 0)
2031 return EINVAL;
2033 dst_devicep = resolve_device (dst_device_num);
2034 if (dst_devicep == NULL)
2035 return EINVAL;
2037 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2038 dst_devicep = NULL;
2040 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2042 if (src_device_num < 0)
2043 return EINVAL;
2045 src_devicep = resolve_device (src_device_num);
2046 if (src_devicep == NULL)
2047 return EINVAL;
2049 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2050 src_devicep = NULL;
2053 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2054 return EINVAL;
2056 if (src_devicep)
2057 gomp_mutex_lock (&src_devicep->lock);
2058 else if (dst_devicep)
2059 gomp_mutex_lock (&dst_devicep->lock);
2060 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2061 volume, dst_offsets, src_offsets,
2062 dst_dimensions, src_dimensions,
2063 dst_devicep, src_devicep);
2064 if (src_devicep)
2065 gomp_mutex_unlock (&src_devicep->lock);
2066 else if (dst_devicep)
2067 gomp_mutex_unlock (&dst_devicep->lock);
2068 return ret;
2072 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2073 size_t device_offset, int device_num)
2075 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2076 return EINVAL;
2078 if (device_num < 0)
2079 return EINVAL;
2081 struct gomp_device_descr *devicep = resolve_device (device_num);
2082 if (devicep == NULL)
2083 return EINVAL;
2085 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2086 return EINVAL;
2088 gomp_mutex_lock (&devicep->lock);
2090 struct splay_tree_s *mem_map = &devicep->mem_map;
2091 struct splay_tree_key_s cur_node;
2092 int ret = EINVAL;
2094 cur_node.host_start = (uintptr_t) host_ptr;
2095 cur_node.host_end = cur_node.host_start + size;
2096 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2097 if (n)
2099 if (n->tgt->tgt_start + n->tgt_offset
2100 == (uintptr_t) device_ptr + device_offset
2101 && n->host_start <= cur_node.host_start
2102 && n->host_end >= cur_node.host_end)
2103 ret = 0;
2105 else
2107 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2108 tgt->array = gomp_malloc (sizeof (*tgt->array));
2109 tgt->refcount = 1;
2110 tgt->tgt_start = 0;
2111 tgt->tgt_end = 0;
2112 tgt->to_free = NULL;
2113 tgt->prev = NULL;
2114 tgt->list_count = 0;
2115 tgt->device_descr = devicep;
2116 splay_tree_node array = tgt->array;
2117 splay_tree_key k = &array->key;
2118 k->host_start = cur_node.host_start;
2119 k->host_end = cur_node.host_end;
2120 k->tgt = tgt;
2121 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2122 k->refcount = REFCOUNT_INFINITY;
2123 k->async_refcount = 0;
2124 array->left = NULL;
2125 array->right = NULL;
2126 splay_tree_insert (&devicep->mem_map, array);
2127 ret = 0;
2129 gomp_mutex_unlock (&devicep->lock);
2130 return ret;
2134 omp_target_disassociate_ptr (void *ptr, int device_num)
2136 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2137 return EINVAL;
2139 if (device_num < 0)
2140 return EINVAL;
2142 struct gomp_device_descr *devicep = resolve_device (device_num);
2143 if (devicep == NULL)
2144 return EINVAL;
2146 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2147 return EINVAL;
2149 gomp_mutex_lock (&devicep->lock);
2151 struct splay_tree_s *mem_map = &devicep->mem_map;
2152 struct splay_tree_key_s cur_node;
2153 int ret = EINVAL;
2155 cur_node.host_start = (uintptr_t) ptr;
2156 cur_node.host_end = cur_node.host_start;
2157 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2158 if (n
2159 && n->host_start == cur_node.host_start
2160 && n->refcount == REFCOUNT_INFINITY
2161 && n->tgt->tgt_start == 0
2162 && n->tgt->to_free == NULL
2163 && n->tgt->refcount == 1
2164 && n->tgt->list_count == 0)
2166 splay_tree_remove (&devicep->mem_map, n);
2167 gomp_unmap_tgt (n->tgt);
2168 ret = 0;
2171 gomp_mutex_unlock (&devicep->lock);
2172 return ret;
2175 #ifdef PLUGIN_SUPPORT
2177 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2178 in PLUGIN_NAME.
2179 The handles of the found functions are stored in the corresponding fields
2180 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2182 static bool
2183 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2184 const char *plugin_name)
2186 const char *err = NULL, *last_missing = NULL;
2188 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2189 if (!plugin_handle)
2190 goto dl_fail;
2192 /* Check if all required functions are available in the plugin and store
2193 their handlers. None of the symbols can legitimately be NULL,
2194 so we don't need to check dlerror all the time. */
2195 #define DLSYM(f) \
2196 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2197 goto dl_fail
2198 /* Similar, but missing functions are not an error. Return false if
2199 failed, true otherwise. */
2200 #define DLSYM_OPT(f, n) \
2201 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2202 || (last_missing = #n, 0))
2204 DLSYM (version);
2205 if (device->version_func () != GOMP_VERSION)
2207 err = "plugin version mismatch";
2208 goto fail;
2211 DLSYM (get_name);
2212 DLSYM (get_caps);
2213 DLSYM (get_type);
2214 DLSYM (get_num_devices);
2215 DLSYM (init_device);
2216 DLSYM (fini_device);
2217 DLSYM (load_image);
2218 DLSYM (unload_image);
2219 DLSYM (alloc);
2220 DLSYM (free);
2221 DLSYM (dev2host);
2222 DLSYM (host2dev);
2223 device->capabilities = device->get_caps_func ();
2224 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2226 DLSYM (run);
2227 DLSYM (async_run);
2228 DLSYM (dev2dev);
2230 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2232 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2233 || !DLSYM_OPT (openacc.register_async_cleanup,
2234 openacc_register_async_cleanup)
2235 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2236 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2237 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2238 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2239 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2240 || !DLSYM_OPT (openacc.async_wait_all_async,
2241 openacc_async_wait_all_async)
2242 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2243 || !DLSYM_OPT (openacc.create_thread_data,
2244 openacc_create_thread_data)
2245 || !DLSYM_OPT (openacc.destroy_thread_data,
2246 openacc_destroy_thread_data))
2248 /* Require all the OpenACC handlers if we have
2249 GOMP_OFFLOAD_CAP_OPENACC_200. */
2250 err = "plugin missing OpenACC handler function";
2251 goto fail;
2254 unsigned cuda = 0;
2255 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2256 openacc_get_current_cuda_device);
2257 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2258 openacc_get_current_cuda_context);
2259 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2260 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2261 if (cuda && cuda != 4)
2263 /* Make sure all the CUDA functions are there if any of them are. */
2264 err = "plugin missing OpenACC CUDA handler function";
2265 goto fail;
2268 #undef DLSYM
2269 #undef DLSYM_OPT
2271 return 1;
2273 dl_fail:
2274 err = dlerror ();
2275 fail:
2276 gomp_error ("while loading %s: %s", plugin_name, err);
2277 if (last_missing)
2278 gomp_error ("missing function was %s", last_missing);
2279 if (plugin_handle)
2280 dlclose (plugin_handle);
2282 return 0;
2285 /* This function initializes the runtime needed for offloading.
2286 It parses the list of offload targets and tries to load the plugins for
2287 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2288 will be set, and the array DEVICES initialized, containing descriptors for
2289 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2290 by the others. */
2292 static void
2293 gomp_target_init (void)
2295 const char *prefix ="libgomp-plugin-";
2296 const char *suffix = SONAME_SUFFIX (1);
2297 const char *cur, *next;
2298 char *plugin_name;
2299 int i, new_num_devices;
2301 num_devices = 0;
2302 devices = NULL;
2304 cur = OFFLOAD_TARGETS;
2305 if (*cur)
2308 struct gomp_device_descr current_device;
2310 next = strchr (cur, ',');
2312 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2313 + strlen (prefix) + strlen (suffix));
2314 if (!plugin_name)
2316 num_devices = 0;
2317 break;
2320 strcpy (plugin_name, prefix);
2321 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2322 strcat (plugin_name, suffix);
2324 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2326 new_num_devices = current_device.get_num_devices_func ();
2327 if (new_num_devices >= 1)
2329 /* Augment DEVICES and NUM_DEVICES. */
2331 devices = realloc (devices, (num_devices + new_num_devices)
2332 * sizeof (struct gomp_device_descr));
2333 if (!devices)
2335 num_devices = 0;
2336 free (plugin_name);
2337 break;
2340 current_device.name = current_device.get_name_func ();
2341 /* current_device.capabilities has already been set. */
2342 current_device.type = current_device.get_type_func ();
2343 current_device.mem_map.root = NULL;
2344 current_device.is_initialized = false;
2345 current_device.openacc.data_environ = NULL;
2346 for (i = 0; i < new_num_devices; i++)
2348 current_device.target_id = i;
2349 devices[num_devices] = current_device;
2350 gomp_mutex_init (&devices[num_devices].lock);
2351 num_devices++;
2356 free (plugin_name);
2357 cur = next + 1;
2359 while (next);
2361 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2362 NUM_DEVICES_OPENMP. */
2363 struct gomp_device_descr *devices_s
2364 = malloc (num_devices * sizeof (struct gomp_device_descr));
2365 if (!devices_s)
2367 num_devices = 0;
2368 free (devices);
2369 devices = NULL;
2371 num_devices_openmp = 0;
2372 for (i = 0; i < num_devices; i++)
2373 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2374 devices_s[num_devices_openmp++] = devices[i];
2375 int num_devices_after_openmp = num_devices_openmp;
2376 for (i = 0; i < num_devices; i++)
2377 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2378 devices_s[num_devices_after_openmp++] = devices[i];
2379 free (devices);
2380 devices = devices_s;
2382 for (i = 0; i < num_devices; i++)
2384 /* The 'devices' array can be moved (by the realloc call) until we have
2385 found all the plugins, so registering with the OpenACC runtime (which
2386 takes a copy of the pointer argument) must be delayed until now. */
2387 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2388 goacc_register (&devices[i]);
2392 #else /* PLUGIN_SUPPORT */
2393 /* If dlfcn.h is unavailable we always fallback to host execution.
2394 GOMP_target* routines are just stubs for this case. */
2395 static void
2396 gomp_target_init (void)
2399 #endif /* PLUGIN_SUPPORT */