svn merge -r 217483:217500 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / libgomp / target.c
blob93fd4f48fbb96184cbc2b273db22a435b5d47062
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file contains the support of offloading. */
27 #include "config.h"
28 #include "libgomp.h"
29 #include "libgomp_target.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <assert.h>
40 #ifdef PLUGIN_SUPPORT
41 #include <dlfcn.h>
42 #endif
44 static void gomp_target_init (void);
46 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
48 /* This structure describes an offload image.
49 It contains type of the target device, pointer to host table descriptor, and
50 pointer to target data. */
51 struct offload_image_descr {
52 enum offload_target_type type;
53 void *host_table;
54 void *target_data;
57 /* Array of descriptors of offload images. */
58 static struct offload_image_descr *offload_images;
60 /* Total number of offload images. */
61 static int num_offload_images;
63 /* Array of descriptors for all available devices. */
64 static struct gomp_device_descr *devices;
66 /* Total number of available devices. */
67 static int num_devices;
69 /* The comparison function. */
71 attribute_hidden int
72 splay_compare (splay_tree_key x, splay_tree_key y)
74 if (x->host_start == x->host_end
75 && y->host_start == y->host_end)
76 return 0;
77 if (x->host_end <= y->host_start)
78 return -1;
79 if (x->host_start >= y->host_end)
80 return 1;
81 return 0;
84 #include "splay-tree.h"
86 attribute_hidden void
87 gomp_init_targets_once (void)
89 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
92 attribute_hidden int
93 gomp_get_num_devices (void)
95 gomp_init_targets_once ();
96 return num_devices;
99 static struct gomp_device_descr *
100 resolve_device (int device_id)
102 if (device_id == -1)
104 struct gomp_task_icv *icv = gomp_icv (false);
105 device_id = icv->default_device_var;
108 if (device_id < 0 || device_id >= gomp_get_num_devices ())
109 return NULL;
111 return &devices[device_id];
115 /* Handle the case where splay_tree_lookup found oldn for newn.
116 Helper function of gomp_map_vars. */
118 static inline void
119 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
120 unsigned char kind)
122 if ((!(kind & _GOMP_MAP_FLAG_SPECIAL)
123 && (kind & _GOMP_MAP_FLAG_FORCE))
124 || oldn->host_start > newn->host_start
125 || oldn->host_end < newn->host_end)
126 gomp_fatal ("Trying to map into device [%p..%p) object when "
127 "[%p..%p) is already mapped",
128 (void *) newn->host_start, (void *) newn->host_end,
129 (void *) oldn->host_start, (void *) oldn->host_end);
130 oldn->refcount++;
133 static int
134 get_kind (bool is_openacc, void *kinds, int idx)
136 return is_openacc ? ((unsigned short *) kinds)[idx]
137 : ((unsigned char *) kinds)[idx];
140 attribute_hidden struct target_mem_desc *
141 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
142 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
143 bool is_openacc, bool is_target)
145 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
146 const int rshift = is_openacc ? 8 : 3;
147 const int typemask = is_openacc ? 0xff : 0x7;
148 struct gomp_memory_mapping *mm = &devicep->mem_map;
149 struct splay_tree_key_s cur_node;
150 struct target_mem_desc *tgt
151 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
152 tgt->list_count = mapnum;
153 tgt->refcount = 1;
154 tgt->device_descr = devicep;
155 tgt->mem_map = mm;
157 /* From gcc/fortran/trans-types.c */
158 struct descriptor_dimension
160 long stride;
161 long lbound;
162 long ubound;
165 struct gfc_array_descriptor
167 void *data;
168 long offset;
169 long dtype;
170 struct descriptor_dimension dimension[];
173 #define GFC_DTYPE_RANK_MASK 0x07
174 #define GFC_DTYPE_TYPE_MASK 0x38
175 #define GFC_DTYPE_TYPE_SHIFT 3
176 #define GFC_DTYPE_SIZE_SHIFT 6
178 if (mapnum == 0)
179 return tgt;
181 tgt_align = sizeof (void *);
182 tgt_size = 0;
183 if (is_target)
185 size_t align = 4 * sizeof (void *);
186 tgt_align = align;
187 tgt_size = mapnum * sizeof (void *);
189 gomp_mutex_lock (&mm->lock);
190 for (i = 0; i < mapnum; i++)
192 int kind = get_kind (is_openacc, kinds, i);
193 if (hostaddrs[i] == NULL)
195 tgt->list[i] = NULL;
196 continue;
198 cur_node.host_start = (uintptr_t) hostaddrs[i];
199 if (!GOMP_MAP_POINTER_P (kind & typemask))
200 cur_node.host_end = cur_node.host_start + sizes[i];
201 else
202 cur_node.host_end = cur_node.host_start + sizeof (void *);
203 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
204 if (n)
206 tgt->list[i] = n;
207 gomp_map_vars_existing (n, &cur_node, kind & typemask);
209 else
211 tgt->list[i] = NULL;
213 if ((kind & typemask) == GOMP_MAP_TO_PSET)
215 struct gfc_array_descriptor *gad;
216 size_t rank;
217 int j;
218 bool alloc_arrays = true;
220 for (j = i - 1; j >= 0; j--)
222 if (hostaddrs[j] == *(void**)hostaddrs[i])
224 alloc_arrays = false;
225 break;
229 gad = (struct gfc_array_descriptor *) cur_node.host_start;
230 rank = gad->dtype & GFC_DTYPE_RANK_MASK;
232 cur_node.host_start = (uintptr_t)gad->data;
233 cur_node.host_end = cur_node.host_start +
234 sizeof (struct gfc_array_descriptor) +
235 (sizeof (struct descriptor_dimension) * rank);
237 if (alloc_arrays)
239 size_t tsize;
241 tsize = gad->dtype >> GFC_DTYPE_SIZE_SHIFT;
243 for (j = 0; j < rank; j++)
245 cur_node.host_end += tsize *
246 (gad->dimension[j].ubound -
247 gad->dimension[j].lbound + 1);
252 size_t align = (size_t) 1 << (kind >> rshift);
253 not_found_cnt++;
254 if (tgt_align < align)
255 tgt_align = align;
256 tgt_size = (tgt_size + align - 1) & ~(align - 1);
257 tgt_size += cur_node.host_end - cur_node.host_start;
258 if ((kind & typemask) == GOMP_MAP_TO_PSET)
260 size_t j;
261 for (j = i + 1; j < mapnum; j++)
262 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
263 & typemask))
264 break;
265 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
266 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
267 > cur_node.host_end))
268 break;
269 else
271 tgt->list[j] = NULL;
272 i++;
278 if (devaddrs)
280 if (mapnum != 1)
281 gomp_fatal ("unexpected aggregation");
282 tgt->to_free = devaddrs[0];
283 tgt->tgt_start = (uintptr_t) tgt->to_free;
284 tgt->tgt_end = tgt->tgt_start + sizes[0];
286 else if (not_found_cnt || is_target)
288 /* Allocate tgt_align aligned tgt_size block of memory. */
289 /* FIXME: Perhaps change interface to allocate properly aligned
290 memory. */
291 tgt->to_free = devicep->alloc_func (devicep->target_id,
292 tgt_size + tgt_align - 1);
293 tgt->tgt_start = (uintptr_t) tgt->to_free;
294 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
295 tgt->tgt_end = tgt->tgt_start + tgt_size;
297 else
299 tgt->to_free = NULL;
300 tgt->tgt_start = 0;
301 tgt->tgt_end = 0;
304 tgt_size = 0;
305 if (is_target)
306 tgt_size = mapnum * sizeof (void *);
308 tgt->array = NULL;
309 if (not_found_cnt)
311 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
312 splay_tree_node array = tgt->array;
313 size_t j;
315 for (i = 0; i < mapnum; i++)
316 if (tgt->list[i] == NULL)
318 int kind = get_kind (is_openacc, kinds, i);
319 if (hostaddrs[i] == NULL)
320 continue;
321 splay_tree_key k = &array->key;
322 k->host_start = (uintptr_t) hostaddrs[i];
323 if (!GOMP_MAP_POINTER_P (kind & typemask))
324 k->host_end = k->host_start + sizes[i];
325 else
326 k->host_end = k->host_start + sizeof (void *);
327 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
328 if (n)
330 tgt->list[i] = n;
331 gomp_map_vars_existing (n, k, kind & typemask);
333 else
335 size_t align = (size_t) 1 << (kind >> rshift);
336 tgt->list[i] = k;
337 tgt_size = (tgt_size + align - 1) & ~(align - 1);
338 k->tgt = tgt;
339 k->tgt_offset = tgt_size;
340 tgt_size += k->host_end - k->host_start;
341 k->copy_from = GOMP_MAP_COPYFROM_P (kind & typemask)
342 || GOMP_MAP_TOFROM_P (kind & typemask);
343 k->refcount = 1;
344 k->async_refcount = 0;
345 tgt->refcount++;
346 array->left = NULL;
347 array->right = NULL;
349 splay_tree_insert (&mm->splay_tree, array);
351 switch (kind & typemask)
353 case GOMP_MAP_ALLOC:
354 case GOMP_MAP_ALLOC_FROM:
355 case GOMP_MAP_FORCE_ALLOC:
356 case GOMP_MAP_FORCE_FROM:
357 break;
358 case GOMP_MAP_ALLOC_TO:
359 case GOMP_MAP_ALLOC_TOFROM:
360 case GOMP_MAP_FORCE_TO:
361 case GOMP_MAP_FORCE_TOFROM:
362 /* Copy from host to device memory. */
363 /* FIXME: Perhaps add some smarts, like if copying
364 several adjacent fields from host to target, use some
365 host buffer to avoid sending each var individually. */
366 devicep->host2dev_func (devicep->target_id,
367 (void *) (tgt->tgt_start
368 + k->tgt_offset),
369 (void *) k->host_start,
370 k->host_end - k->host_start);
371 break;
372 case GOMP_MAP_POINTER:
373 cur_node.host_start
374 = (uintptr_t) *(void **) k->host_start;
375 if (cur_node.host_start == (uintptr_t) NULL)
377 cur_node.tgt_offset = (uintptr_t) NULL;
378 /* Copy from host to device memory. */
379 /* FIXME: see above FIXME comment. */
380 devicep->host2dev_func (devicep->target_id,
381 (void *) (tgt->tgt_start
382 + k->tgt_offset),
383 (void *) &cur_node.tgt_offset,
384 sizeof (void *));
385 break;
387 /* Add bias to the pointer value. */
388 cur_node.host_start += sizes[i];
389 cur_node.host_end = cur_node.host_start + 1;
390 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
391 if (n == NULL)
393 /* Could be possibly zero size array section. */
394 cur_node.host_end--;
395 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
396 if (n == NULL)
398 cur_node.host_start--;
399 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
400 cur_node.host_start++;
403 if (n == NULL)
404 gomp_fatal ("Pointer target of array section "
405 "wasn't mapped");
407 cur_node.host_start -= n->host_start;
408 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
409 + cur_node.host_start;
410 /* At this point tgt_offset is target address of the
411 array section. Now subtract bias to get what we want
412 to initialize the pointer with. */
413 cur_node.tgt_offset -= sizes[i];
414 /* Copy from host to device memory. */
415 /* FIXME: see above FIXME comment. */
416 devicep->host2dev_func (devicep->target_id,
417 (void *) (tgt->tgt_start
418 + k->tgt_offset),
419 (void *) &cur_node.tgt_offset,
420 sizeof (void *));
421 break;
422 case GOMP_MAP_TO_PSET:
424 /* Copy from host to device memory. */
425 /* FIXME: see above FIXME comment. */
426 devicep->host2dev_func (devicep->target_id,
427 (void *) (tgt->tgt_start
428 + k->tgt_offset),
429 (void *) k->host_start,
430 (k->host_end - k->host_start));
431 devicep->host2dev_func (devicep->target_id,
432 (void *) (tgt->tgt_start
433 + k->tgt_offset),
434 (void *) &tgt->tgt_start,
435 sizeof (void *));
437 for (j = i + 1; j < mapnum; j++)
438 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
439 & typemask))
440 break;
441 else if ((uintptr_t) hostaddrs[j] < k->host_start
442 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
443 > k->host_end))
444 break;
445 else
447 tgt->list[j] = k;
448 k->refcount++;
449 cur_node.host_start
450 = (uintptr_t) *(void **) hostaddrs[j];
451 if (cur_node.host_start == (uintptr_t) NULL)
453 cur_node.tgt_offset = (uintptr_t) NULL;
454 /* Copy from host to device memory. */
455 /* FIXME: see above FIXME comment. */
456 devicep->host2dev_func (devicep->target_id,
457 (void *) (tgt->tgt_start
458 + k->tgt_offset
459 + ((uintptr_t) hostaddrs[j]
460 - k->host_start)),
461 (void *) &cur_node.tgt_offset,
462 sizeof (void *));
463 i++;
464 continue;
466 /* Add bias to the pointer value. */
467 cur_node.host_start += sizes[j];
468 cur_node.host_end = cur_node.host_start + 1;
469 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
470 if (n == NULL)
472 /* Could be possibly zero size array
473 section. */
474 cur_node.host_end--;
475 n = splay_tree_lookup (&mm->splay_tree,
476 &cur_node);
477 if (n == NULL)
479 cur_node.host_start--;
480 n = splay_tree_lookup (&mm->splay_tree,
481 &cur_node);
482 cur_node.host_start++;
485 if (n == NULL)
486 gomp_fatal ("Pointer target of array section "
487 "wasn't mapped");
488 cur_node.host_start -= n->host_start;
489 cur_node.tgt_offset = n->tgt->tgt_start
490 + n->tgt_offset
491 + cur_node.host_start;
492 /* At this point tgt_offset is target address of the
493 array section. Now subtract bias to get what we
494 want to initialize the pointer with. */
495 cur_node.tgt_offset -= sizes[j];
496 /* Copy from host to device memory. */
497 /* FIXME: see above FIXME comment. */
499 devicep->host2dev_func (devicep->target_id,
500 (void *) (tgt->tgt_start
501 + k->tgt_offset
502 + ((uintptr_t) hostaddrs[j]
503 - k->host_start)),
504 (void *) &cur_node.tgt_offset,
505 sizeof (void *));
506 i++;
508 break;
510 case GOMP_MAP_FORCE_PRESENT:
512 /* We already looked up the memory region above and it
513 was missing. */
514 size_t size = k->host_end - k->host_start;
515 gomp_fatal ("present clause: !acc_is_present (%p, "
516 "%zd (0x%zx))", (void *) k->host_start,
517 size, size);
519 break;
520 case GOMP_MAP_FORCE_DEVICEPTR:
521 assert (k->host_end - k->host_start == sizeof (void *));
523 devicep->host2dev_func (devicep->target_id,
524 (void *) (tgt->tgt_start
525 + k->tgt_offset),
526 (void *) k->host_start,
527 sizeof (void *));
528 break;
529 case GOMP_MAP_FORCE_PRIVATE:
530 abort ();
531 case GOMP_MAP_FORCE_FIRSTPRIVATE:
532 abort ();
533 default:
534 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
535 kind);
537 array++;
542 #undef GFC_DTYPE_RANK_MASK
543 #undef GFC_DTYPE_TYPE_MASK
544 #undef GFC_DTYPE_TYPE_SHIFT
545 #undef GFC_DTYPE_SIZE_SHIFT
547 if (is_target)
549 for (i = 0; i < mapnum; i++)
551 if (tgt->list[i] == NULL)
552 cur_node.tgt_offset = (uintptr_t) NULL;
553 else
554 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
555 + tgt->list[i]->tgt_offset;
556 /* Copy from host to device memory. */
557 /* FIXME: see above FIXME comment. */
558 devicep->host2dev_func (devicep->target_id,
559 (void *) (tgt->tgt_start
560 + i * sizeof (void *)),
561 (void *) &cur_node.tgt_offset,
562 sizeof (void *));
566 gomp_mutex_unlock (&mm->lock);
567 return tgt;
570 static void
571 gomp_unmap_tgt (struct target_mem_desc *tgt)
573 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
574 if (tgt->tgt_end)
575 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
577 free (tgt->array);
578 free (tgt);
581 /* Decrease the refcount for a set of mapped variables, and queue asychronous
582 copies from the device back to the host after any work that has been issued.
583 Because the regions are still "live", increment an asynchronous reference
584 count to indicate that they should not be unmapped from host-side data
585 structures until the asynchronous copy has completed. */
587 attribute_hidden void
588 gomp_copy_from_async (struct target_mem_desc *tgt)
590 struct gomp_device_descr *devicep = tgt->device_descr;
591 struct gomp_memory_mapping *mm = tgt->mem_map;
592 size_t i;
594 gomp_mutex_lock (&mm->lock);
596 for (i = 0; i < tgt->list_count; i++)
597 if (tgt->list[i] == NULL)
599 else if (tgt->list[i]->refcount > 1)
601 tgt->list[i]->refcount--;
602 tgt->list[i]->async_refcount++;
604 else
606 splay_tree_key k = tgt->list[i];
607 if (k->copy_from)
608 /* Copy from device to host memory. */
609 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
610 (void *) (k->tgt->tgt_start + k->tgt_offset),
611 k->host_end - k->host_start);
614 gomp_mutex_unlock (&mm->lock);
617 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
618 variables back from device to host: if it is false, it is assumed that this
619 has been done already, i.e. by gomp_copy_from_async above. */
621 attribute_hidden void
622 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
624 struct gomp_device_descr *devicep = tgt->device_descr;
625 struct gomp_memory_mapping *mm = tgt->mem_map;
627 if (tgt->list_count == 0)
629 free (tgt);
630 return;
633 size_t i;
634 gomp_mutex_lock (&mm->lock);
635 for (i = 0; i < tgt->list_count; i++)
636 if (tgt->list[i] == NULL)
638 else if (tgt->list[i]->refcount > 1)
639 tgt->list[i]->refcount--;
640 else if (tgt->list[i]->async_refcount > 0)
641 tgt->list[i]->async_refcount--;
642 else
644 splay_tree_key k = tgt->list[i];
645 if (k->copy_from && do_copyfrom)
646 /* Copy from device to host memory. */
647 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
648 (void *) (k->tgt->tgt_start + k->tgt_offset),
649 k->host_end - k->host_start);
650 splay_tree_remove (&mm->splay_tree, k);
651 if (k->tgt->refcount > 1)
652 k->tgt->refcount--;
653 else
654 gomp_unmap_tgt (k->tgt);
657 if (tgt->refcount > 1)
658 tgt->refcount--;
659 else
660 gomp_unmap_tgt (tgt);
661 gomp_mutex_unlock (&mm->lock);
664 static void
665 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
666 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
667 bool is_openacc)
669 size_t i;
670 struct splay_tree_key_s cur_node;
671 const int typemask = is_openacc ? 0xff : 0x7;
673 if (!devicep)
674 return;
676 if (mapnum == 0)
677 return;
679 gomp_mutex_lock (&mm->lock);
680 for (i = 0; i < mapnum; i++)
681 if (sizes[i])
683 cur_node.host_start = (uintptr_t) hostaddrs[i];
684 cur_node.host_end = cur_node.host_start + sizes[i];
685 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
686 &cur_node);
687 if (n)
689 int kind = get_kind (is_openacc, kinds, i);
690 if (n->host_start > cur_node.host_start
691 || n->host_end < cur_node.host_end)
692 gomp_fatal ("Trying to update [%p..%p) object when"
693 "only [%p..%p) is mapped",
694 (void *) cur_node.host_start,
695 (void *) cur_node.host_end,
696 (void *) n->host_start,
697 (void *) n->host_end);
698 if (GOMP_MAP_COPYTO_P (kind & typemask))
699 /* Copy from host to device memory. */
700 devicep->host2dev_func (devicep->target_id,
701 (void *) (n->tgt->tgt_start
702 + n->tgt_offset
703 + cur_node.host_start
704 - n->host_start),
705 (void *) cur_node.host_start,
706 cur_node.host_end - cur_node.host_start);
707 else if (GOMP_MAP_COPYFROM_P (kind & typemask))
708 /* Copy from device to host memory. */
709 devicep->dev2host_func (devicep->target_id,
710 (void *) cur_node.host_start,
711 (void *) (n->tgt->tgt_start
712 + n->tgt_offset
713 + cur_node.host_start
714 - n->host_start),
715 cur_node.host_end - cur_node.host_start);
717 else
718 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
719 (void *) cur_node.host_start,
720 (void *) cur_node.host_end);
722 gomp_mutex_unlock (&mm->lock);
725 static void gomp_register_image_for_device (struct gomp_device_descr *device,
726 struct offload_image_descr *image);
728 /* This function should be called from every offload image.
729 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
730 the target, and TARGET_DATA needed by target plugin. */
732 void
733 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
734 void *target_data)
736 offload_images = gomp_realloc (offload_images,
737 (num_offload_images + 1)
738 * sizeof (struct offload_image_descr));
740 if (offload_images == NULL)
741 return;
743 offload_images[num_offload_images].type = target_type;
744 offload_images[num_offload_images].host_table = host_table;
745 offload_images[num_offload_images].target_data = target_data;
747 num_offload_images++;
750 /* This function initializes the target device, specified by DEVICEP. */
752 attribute_hidden void
753 gomp_init_device (struct gomp_device_descr *devicep)
755 /* Initialize the target device. */
756 devicep->init_device_func (devicep->target_id);
758 devicep->is_initialized = true;
761 attribute_hidden void
762 gomp_init_tables (const struct gomp_device_descr *devicep,
763 struct gomp_memory_mapping *mm)
765 /* Get address mapping table for device. */
766 struct mapping_table *table = NULL;
767 int i, num_entries = devicep->get_table_func (devicep->target_id, &table);
769 /* Insert host-target address mapping into dev_splay_tree. */
770 for (i = 0; i < num_entries; i++)
772 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
773 tgt->refcount = 1;
774 tgt->array = gomp_malloc (sizeof (*tgt->array));
775 tgt->tgt_start = table[i].tgt_start;
776 tgt->tgt_end = table[i].tgt_end;
777 tgt->to_free = NULL;
778 tgt->list_count = 0;
779 tgt->device_descr = (struct gomp_device_descr *) devicep;
780 splay_tree_node node = tgt->array;
781 splay_tree_key k = &node->key;
782 k->host_start = table[i].host_start;
783 k->host_end = table[i].host_end;
784 k->tgt_offset = 0;
785 k->refcount = 1;
786 k->copy_from = false;
787 k->tgt = tgt;
788 node->left = NULL;
789 node->right = NULL;
790 splay_tree_insert (&mm->splay_tree, node);
793 free (table);
794 mm->is_initialized = true;
797 static void
798 gomp_init_dev_tables (struct gomp_device_descr *devicep)
800 gomp_init_device (devicep);
801 gomp_init_tables (devicep, &devicep->mem_map);
805 attribute_hidden void
806 gomp_free_memmap (struct gomp_device_descr *devicep)
808 struct gomp_memory_mapping *mm = &devicep->mem_map;
810 while (mm->splay_tree.root)
812 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
814 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
815 free (tgt->array);
816 free (tgt);
819 mm->is_initialized = false;
822 attribute_hidden void
823 gomp_fini_device (struct gomp_device_descr *devicep)
825 if (devicep->is_initialized)
826 devicep->fini_device_func (devicep->target_id);
828 devicep->is_initialized = false;
831 /* Called when encountering a target directive. If DEVICE
832 is -1, it means use device-var ICV. If it is -2 (or any other value
833 larger than last available hw device, use host fallback.
834 FN is address of host code, OFFLOAD_TABLE contains value of the
835 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
836 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
837 with MAPNUM entries, with addresses of the host objects,
838 sizes of the host objects (resp. for pointer kind pointer bias
839 and assumed sizeof (void *) size) and kinds. */
841 void
842 GOMP_target (int device, void (*fn) (void *), const void *offload_table,
843 size_t mapnum, void **hostaddrs, size_t *sizes,
844 unsigned char *kinds)
846 struct gomp_device_descr *devicep = resolve_device (device);
847 struct gomp_memory_mapping *mm = &devicep->mem_map;
849 if (devicep != NULL && !devicep->is_initialized)
850 gomp_init_dev_tables (devicep);
852 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
854 /* Host fallback. */
855 struct gomp_thread old_thr, *thr = gomp_thread ();
856 old_thr = *thr;
857 memset (thr, '\0', sizeof (*thr));
858 if (gomp_places_list)
860 thr->place = old_thr.place;
861 thr->ts.place_partition_len = gomp_places_list_len;
863 fn (hostaddrs);
864 gomp_free_thread (thr);
865 *thr = old_thr;
866 return;
869 void *fn_addr;
871 if (devicep->capabilities & TARGET_CAP_NATIVE_EXEC)
872 fn_addr = (void *) fn;
873 else
875 gomp_mutex_lock (&mm->lock);
876 if (!devicep->is_initialized)
877 gomp_init_dev_tables (devicep);
878 struct splay_tree_key_s k;
879 k.host_start = (uintptr_t) fn;
880 k.host_end = k.host_start + 1;
881 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map.splay_tree,
882 &k);
883 if (tgt_fn == NULL)
884 gomp_fatal ("Target function wasn't mapped");
885 gomp_mutex_unlock (&mm->lock);
887 fn_addr = (void *) tgt_fn->tgt->tgt_start;
890 struct target_mem_desc *tgt_vars
891 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
892 true);
893 struct gomp_thread old_thr, *thr = gomp_thread ();
894 old_thr = *thr;
895 memset (thr, '\0', sizeof (*thr));
896 if (gomp_places_list)
898 thr->place = old_thr.place;
899 thr->ts.place_partition_len = gomp_places_list_len;
901 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
902 gomp_free_thread (thr);
903 *thr = old_thr;
904 gomp_unmap_vars (tgt_vars, true);
907 void
908 GOMP_target_data (int device, const void *offload_table, size_t mapnum,
909 void **hostaddrs, size_t *sizes, unsigned char *kinds)
911 struct gomp_device_descr *devicep = resolve_device (device);
912 struct gomp_memory_mapping *mm = &devicep->mem_map;
914 if (devicep != NULL && !devicep->is_initialized)
915 gomp_init_dev_tables (devicep);
917 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
919 /* Host fallback. */
920 struct gomp_task_icv *icv = gomp_icv (false);
921 if (icv->target_data)
923 /* Even when doing a host fallback, if there are any active
924 #pragma omp target data constructs, need to remember the
925 new #pragma omp target data, otherwise GOMP_target_end_data
926 would get out of sync. */
927 struct target_mem_desc *tgt
928 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
929 tgt->prev = icv->target_data;
930 icv->target_data = tgt;
932 return;
935 gomp_mutex_lock (&mm->lock);
936 if (!devicep->is_initialized)
937 gomp_init_dev_tables (devicep);
938 gomp_mutex_unlock (&mm->lock);
940 struct target_mem_desc *tgt
941 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
942 false);
943 struct gomp_task_icv *icv = gomp_icv (true);
944 tgt->prev = icv->target_data;
945 icv->target_data = tgt;
948 void
949 GOMP_target_end_data (void)
951 struct gomp_task_icv *icv = gomp_icv (false);
952 if (icv->target_data)
954 struct target_mem_desc *tgt = icv->target_data;
955 icv->target_data = tgt->prev;
956 gomp_unmap_vars (tgt, true);
960 void
961 GOMP_target_update (int device, const void *offload_table, size_t mapnum,
962 void **hostaddrs, size_t *sizes, unsigned char *kinds)
964 struct gomp_device_descr *devicep = resolve_device (device);
965 struct gomp_memory_mapping *mm = &devicep->mem_map;
967 gomp_mutex_lock (&mm->lock);
968 if (devicep != NULL && !devicep->is_initialized)
969 gomp_init_device (devicep);
970 gomp_mutex_unlock (&mm->lock);
972 if (devicep != NULL && !(devicep->capabilities & TARGET_CAP_OPENMP_400))
973 return;
975 gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds,
976 false);
979 void
980 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
982 if (thread_limit)
984 struct gomp_task_icv *icv = gomp_icv (true);
985 icv->thread_limit_var
986 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
988 (void) num_teams;
991 #ifdef PLUGIN_SUPPORT
993 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
994 in PLUGIN_NAME.
995 The handles of the found functions are stored in the corresponding fields
996 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
998 static bool
999 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1000 const char *plugin_name)
1002 char *err = NULL, *last_missing = NULL;
1003 int optional_present, optional_total;
1005 /* Clear any existing error. */
1006 dlerror ();
1008 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1009 if (!plugin_handle)
1011 err = dlerror ();
1012 goto out;
1015 /* Check if all required functions are available in the plugin and store
1016 their handlers. */
1017 #define DLSYM(f) \
1018 do \
1020 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1021 err = dlerror (); \
1022 if (err != NULL) \
1023 goto out; \
1025 while (0)
1026 /* Similar, but missing functions are not an error. */
1027 #define DLSYM_OPT(f, n) \
1028 do \
1030 char *tmp_err; \
1031 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1032 tmp_err = dlerror (); \
1033 if (tmp_err == NULL) \
1034 optional_present++; \
1035 else \
1036 last_missing = #n; \
1037 optional_total++; \
1039 while (0)
1041 DLSYM (get_name);
1042 DLSYM (get_caps);
1043 DLSYM (get_type);
1044 DLSYM (get_num_devices);
1045 DLSYM (register_image);
1046 DLSYM (init_device);
1047 DLSYM (fini_device);
1048 DLSYM (get_table);
1049 DLSYM (alloc);
1050 DLSYM (free);
1051 DLSYM (dev2host);
1052 DLSYM (host2dev);
1053 device->capabilities = device->get_caps_func ();
1054 if (device->capabilities & TARGET_CAP_OPENMP_400)
1055 DLSYM (run);
1056 if (device->capabilities & TARGET_CAP_OPENACC_200)
1058 optional_present = optional_total = 0;
1059 DLSYM_OPT (openacc.exec, openacc_parallel);
1060 DLSYM_OPT (openacc.open_device, openacc_open_device);
1061 DLSYM_OPT (openacc.close_device, openacc_close_device);
1062 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
1063 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
1064 DLSYM_OPT (openacc.register_async_cleanup,
1065 openacc_register_async_cleanup);
1066 DLSYM_OPT (openacc.async_test, openacc_async_test);
1067 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1068 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1069 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1070 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1071 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1072 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1073 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1074 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1075 /* Require all the OpenACC handlers if we have TARGET_CAP_OPENACC_200. */
1076 if (optional_present != optional_total)
1078 err = "plugin missing OpenACC handler function";
1079 goto out;
1081 optional_present = optional_total = 0;
1082 DLSYM_OPT (openacc.cuda.get_current_device,
1083 openacc_get_current_cuda_device);
1084 DLSYM_OPT (openacc.cuda.get_current_context,
1085 openacc_get_current_cuda_context);
1086 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1087 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1088 /* Make sure all the CUDA functions are there if any of them are. */
1089 if (optional_present && optional_present != optional_total)
1091 err = "plugin missing OpenACC CUDA handler function";
1092 goto out;
1095 #undef DLSYM
1096 #undef DLSYM_OPT
1098 out:
1099 if (err != NULL)
1101 gomp_error ("while loading %s: %s", plugin_name, err);
1102 if (last_missing)
1103 gomp_error ("missing function was %s", last_missing);
1104 if (plugin_handle)
1105 dlclose (plugin_handle);
1107 return err == NULL;
1110 /* This function adds a compatible offload image IMAGE to an accelerator device
1111 DEVICE. */
1113 static void
1114 gomp_register_image_for_device (struct gomp_device_descr *device,
1115 struct offload_image_descr *image)
1117 if (!device->offload_regions_registered
1118 && (device->type == image->type
1119 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1121 device->register_image_func (image->host_table, image->target_data);
1122 device->offload_regions_registered = true;
1126 /* This function initializes the runtime needed for offloading.
1127 It parses the list of offload targets and tries to load the plugins for these
1128 targets. Result of the function is properly initialized variable NUM_DEVICES
1129 and array DEVICES, containing descriptors for corresponding devices. */
1131 static void
1132 gomp_target_init (void)
1134 const char *prefix ="libgomp-plugin-";
1135 const char *suffix = ".so.1";
1136 const char *cur, *next;
1137 char *plugin_name;
1138 int i, new_num_devices;
1140 num_devices = 0;
1141 devices = NULL;
1143 cur = OFFLOAD_TARGETS;
1144 if (*cur)
1147 struct gomp_device_descr current_device;
1149 next = strchr (cur, ',');
1151 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1152 + strlen (prefix) + strlen (suffix));
1153 if (!plugin_name)
1155 num_devices = 0;
1156 break;
1159 strcpy (plugin_name, prefix);
1160 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1161 strcat (plugin_name, suffix);
1163 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1165 new_num_devices = current_device.get_num_devices_func ();
1166 if (new_num_devices >= 1)
1168 devices = realloc (devices, (num_devices + new_num_devices)
1169 * sizeof (struct gomp_device_descr));
1170 if (!devices)
1172 num_devices = 0;
1173 free (plugin_name);
1174 break;
1177 current_device.type = current_device.get_type_func ();
1178 current_device.name = current_device.get_name_func ();
1179 current_device.is_initialized = false;
1180 current_device.offload_regions_registered = false;
1181 current_device.mem_map.splay_tree.root = NULL;
1182 current_device.mem_map.is_initialized = false;
1183 current_device.target_data = NULL;
1184 current_device.openacc.data_environ = NULL;
1185 for (i = 0; i < new_num_devices; i++)
1187 current_device.id = num_devices + 1;
1188 current_device.target_id = i;
1189 devices[num_devices] = current_device;
1190 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1191 num_devices++;
1196 free (plugin_name);
1197 cur = next + 1;
1199 while (next);
1201 /* Prefer a device with TARGET_CAP_OPENMP_400 for ICV default-device-var. */
1202 if (num_devices > 1)
1204 int d = gomp_icv (false)->default_device_var;
1206 if (!(devices[d].capabilities & TARGET_CAP_OPENMP_400))
1208 for (i = 0; i < num_devices; i++)
1210 if (devices[i].capabilities & TARGET_CAP_OPENMP_400)
1212 struct gomp_device_descr device_tmp = devices[d];
1213 devices[d] = devices[i];
1214 devices[d].id = d + 1;
1215 devices[i] = device_tmp;
1216 devices[i].id = i + 1;
1218 break;
1224 for (i = 0; i < num_devices; i++)
1226 int j;
1228 for (j = 0; j < num_offload_images; j++)
1229 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1231 /* The 'devices' array can be moved (by the realloc call) until we have
1232 found all the plugins, so registering with the OpenACC runtime (which
1233 takes a copy of the pointer argument) must be delayed until now. */
1234 if (devices[i].capabilities & TARGET_CAP_OPENACC_200)
1235 ACC_register (&devices[i]);
1238 free (offload_images);
1239 offload_images = NULL;
1240 num_offload_images = 0;
1243 #else /* PLUGIN_SUPPORT */
1244 /* If dlfcn.h is unavailable we always fallback to host execution.
1245 GOMP_target* routines are just stubs for this case. */
1246 static void
1247 gomp_target_init (void)
1250 #endif /* PLUGIN_SUPPORT */