1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock
;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr
{
59 enum offload_target_type type
;
64 /* Array of descriptors of offload images. */
65 static struct offload_image_descr
*offload_images
;
67 /* Total number of offload images. */
68 static int num_offload_images
;
70 /* Array of descriptors for all available devices. */
71 static struct gomp_device_descr
*devices
;
73 /* Total number of available devices. */
74 static int num_devices
;
76 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 static int num_devices_openmp
;
79 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 gomp_realloc_unlock (void *old
, size_t size
)
84 void *ret
= realloc (old
, size
);
87 gomp_mutex_unlock (®ister_lock
);
88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
93 /* The comparison function. */
96 splay_compare (splay_tree_key x
, splay_tree_key y
)
98 if (x
->host_start
== x
->host_end
99 && y
->host_start
== y
->host_end
)
101 if (x
->host_end
<= y
->host_start
)
103 if (x
->host_start
>= y
->host_end
)
108 #include "splay-tree.h"
110 attribute_hidden
void
111 gomp_init_targets_once (void)
113 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
117 gomp_get_num_devices (void)
119 gomp_init_targets_once ();
120 return num_devices_openmp
;
123 static struct gomp_device_descr
*
124 resolve_device (int device_id
)
126 if (device_id
== GOMP_DEVICE_ICV
)
128 struct gomp_task_icv
*icv
= gomp_icv (false);
129 device_id
= icv
->default_device_var
;
132 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
135 return &devices
[device_id
];
139 /* Handle the case where splay_tree_lookup found oldn for newn.
140 Helper function of gomp_map_vars. */
143 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
144 splay_tree_key newn
, unsigned char kind
)
146 if ((kind
& GOMP_MAP_FLAG_FORCE
)
147 || oldn
->host_start
> newn
->host_start
148 || oldn
->host_end
< newn
->host_end
)
150 gomp_mutex_unlock (&devicep
->lock
);
151 gomp_fatal ("Trying to map into device [%p..%p) object when "
152 "[%p..%p) is already mapped",
153 (void *) newn
->host_start
, (void *) newn
->host_end
,
154 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
160 get_kind (bool is_openacc
, void *kinds
, int idx
)
162 return is_openacc
? ((unsigned short *) kinds
)[idx
]
163 : ((unsigned char *) kinds
)[idx
];
166 attribute_hidden
struct target_mem_desc
*
167 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
168 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
169 bool is_openacc
, bool is_target
)
171 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
172 const int rshift
= is_openacc
? 8 : 3;
173 const int typemask
= is_openacc
? 0xff : 0x7;
174 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
175 struct splay_tree_key_s cur_node
;
176 struct target_mem_desc
*tgt
177 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
178 tgt
->list_count
= mapnum
;
180 tgt
->device_descr
= devicep
;
181 tgt
->mem_map
= mem_map
;
186 tgt_align
= sizeof (void *);
190 size_t align
= 4 * sizeof (void *);
192 tgt_size
= mapnum
* sizeof (void *);
195 gomp_mutex_lock (&devicep
->lock
);
197 for (i
= 0; i
< mapnum
; i
++)
199 int kind
= get_kind (is_openacc
, kinds
, i
);
200 if (hostaddrs
[i
] == NULL
)
205 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
206 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
207 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
209 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
210 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
214 gomp_map_vars_existing (devicep
, n
, &cur_node
, kind
& typemask
);
220 size_t align
= (size_t) 1 << (kind
>> rshift
);
222 if (tgt_align
< align
)
224 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
225 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
226 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
229 for (j
= i
+ 1; j
< mapnum
; j
++)
230 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
233 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
234 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
235 > cur_node
.host_end
))
250 gomp_mutex_unlock (&devicep
->lock
);
251 gomp_fatal ("unexpected aggregation");
253 tgt
->to_free
= devaddrs
[0];
254 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
255 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
257 else if (not_found_cnt
|| is_target
)
259 /* Allocate tgt_align aligned tgt_size block of memory. */
260 /* FIXME: Perhaps change interface to allocate properly aligned
262 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
263 tgt_size
+ tgt_align
- 1);
264 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
265 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
266 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
277 tgt_size
= mapnum
* sizeof (void *);
282 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
283 splay_tree_node array
= tgt
->array
;
286 for (i
= 0; i
< mapnum
; i
++)
287 if (tgt
->list
[i
] == NULL
)
289 int kind
= get_kind (is_openacc
, kinds
, i
);
290 if (hostaddrs
[i
] == NULL
)
292 splay_tree_key k
= &array
->key
;
293 k
->host_start
= (uintptr_t) hostaddrs
[i
];
294 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
295 k
->host_end
= k
->host_start
+ sizes
[i
];
297 k
->host_end
= k
->host_start
+ sizeof (void *);
298 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
302 gomp_map_vars_existing (devicep
, n
, k
, kind
& typemask
);
306 size_t align
= (size_t) 1 << (kind
>> rshift
);
308 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
310 k
->tgt_offset
= tgt_size
;
311 tgt_size
+= k
->host_end
- k
->host_start
;
312 k
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
314 k
->async_refcount
= 0;
318 splay_tree_insert (mem_map
, array
);
319 switch (kind
& typemask
)
323 case GOMP_MAP_FORCE_ALLOC
:
324 case GOMP_MAP_FORCE_FROM
:
327 case GOMP_MAP_TOFROM
:
328 case GOMP_MAP_FORCE_TO
:
329 case GOMP_MAP_FORCE_TOFROM
:
330 /* FIXME: Perhaps add some smarts, like if copying
331 several adjacent fields from host to target, use some
332 host buffer to avoid sending each var individually. */
333 devicep
->host2dev_func (devicep
->target_id
,
334 (void *) (tgt
->tgt_start
336 (void *) k
->host_start
,
337 k
->host_end
- k
->host_start
);
339 case GOMP_MAP_POINTER
:
341 = (uintptr_t) *(void **) k
->host_start
;
342 if (cur_node
.host_start
== (uintptr_t) NULL
)
344 cur_node
.tgt_offset
= (uintptr_t) NULL
;
345 /* FIXME: see above FIXME comment. */
346 devicep
->host2dev_func (devicep
->target_id
,
347 (void *) (tgt
->tgt_start
349 (void *) &cur_node
.tgt_offset
,
353 /* Add bias to the pointer value. */
354 cur_node
.host_start
+= sizes
[i
];
355 cur_node
.host_end
= cur_node
.host_start
+ 1;
356 n
= splay_tree_lookup (mem_map
, &cur_node
);
359 /* Could be possibly zero size array section. */
361 n
= splay_tree_lookup (mem_map
, &cur_node
);
364 cur_node
.host_start
--;
365 n
= splay_tree_lookup (mem_map
, &cur_node
);
366 cur_node
.host_start
++;
371 gomp_mutex_unlock (&devicep
->lock
);
372 gomp_fatal ("Pointer target of array section "
375 cur_node
.host_start
-= n
->host_start
;
376 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
377 + cur_node
.host_start
;
378 /* At this point tgt_offset is target address of the
379 array section. Now subtract bias to get what we want
380 to initialize the pointer with. */
381 cur_node
.tgt_offset
-= sizes
[i
];
382 /* FIXME: see above FIXME comment. */
383 devicep
->host2dev_func (devicep
->target_id
,
384 (void *) (tgt
->tgt_start
386 (void *) &cur_node
.tgt_offset
,
389 case GOMP_MAP_TO_PSET
:
390 /* FIXME: see above FIXME comment. */
391 devicep
->host2dev_func (devicep
->target_id
,
392 (void *) (tgt
->tgt_start
394 (void *) k
->host_start
,
395 k
->host_end
- k
->host_start
);
397 for (j
= i
+ 1; j
< mapnum
; j
++)
398 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
401 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
402 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
410 = (uintptr_t) *(void **) hostaddrs
[j
];
411 if (cur_node
.host_start
== (uintptr_t) NULL
)
413 cur_node
.tgt_offset
= (uintptr_t) NULL
;
414 /* FIXME: see above FIXME comment. */
415 devicep
->host2dev_func (devicep
->target_id
,
416 (void *) (tgt
->tgt_start
+ k
->tgt_offset
417 + ((uintptr_t) hostaddrs
[j
]
419 (void *) &cur_node
.tgt_offset
,
424 /* Add bias to the pointer value. */
425 cur_node
.host_start
+= sizes
[j
];
426 cur_node
.host_end
= cur_node
.host_start
+ 1;
427 n
= splay_tree_lookup (mem_map
, &cur_node
);
430 /* Could be possibly zero size array section. */
432 n
= splay_tree_lookup (mem_map
, &cur_node
);
435 cur_node
.host_start
--;
436 n
= splay_tree_lookup (mem_map
, &cur_node
);
437 cur_node
.host_start
++;
442 gomp_mutex_unlock (&devicep
->lock
);
443 gomp_fatal ("Pointer target of array section "
446 cur_node
.host_start
-= n
->host_start
;
447 cur_node
.tgt_offset
= n
->tgt
->tgt_start
449 + cur_node
.host_start
;
450 /* At this point tgt_offset is target address of the
451 array section. Now subtract bias to get what we
452 want to initialize the pointer with. */
453 cur_node
.tgt_offset
-= sizes
[j
];
454 /* FIXME: see above FIXME comment. */
455 devicep
->host2dev_func (devicep
->target_id
,
456 (void *) (tgt
->tgt_start
+ k
->tgt_offset
457 + ((uintptr_t) hostaddrs
[j
]
459 (void *) &cur_node
.tgt_offset
,
464 case GOMP_MAP_FORCE_PRESENT
:
466 /* We already looked up the memory region above and it
468 size_t size
= k
->host_end
- k
->host_start
;
469 gomp_mutex_unlock (&devicep
->lock
);
470 #ifdef HAVE_INTTYPES_H
471 gomp_fatal ("present clause: !acc_is_present (%p, "
472 "%"PRIu64
" (0x%"PRIx64
"))",
473 (void *) k
->host_start
,
474 (uint64_t) size
, (uint64_t) size
);
476 gomp_fatal ("present clause: !acc_is_present (%p, "
477 "%lu (0x%lx))", (void *) k
->host_start
,
478 (unsigned long) size
, (unsigned long) size
);
482 case GOMP_MAP_FORCE_DEVICEPTR
:
483 assert (k
->host_end
- k
->host_start
== sizeof (void *));
485 devicep
->host2dev_func (devicep
->target_id
,
486 (void *) (tgt
->tgt_start
488 (void *) k
->host_start
,
492 gomp_mutex_unlock (&devicep
->lock
);
493 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
503 for (i
= 0; i
< mapnum
; i
++)
505 if (tgt
->list
[i
] == NULL
)
506 cur_node
.tgt_offset
= (uintptr_t) NULL
;
508 cur_node
.tgt_offset
= tgt
->list
[i
]->tgt
->tgt_start
509 + tgt
->list
[i
]->tgt_offset
;
510 /* FIXME: see above FIXME comment. */
511 devicep
->host2dev_func (devicep
->target_id
,
512 (void *) (tgt
->tgt_start
513 + i
* sizeof (void *)),
514 (void *) &cur_node
.tgt_offset
,
519 gomp_mutex_unlock (&devicep
->lock
);
524 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
526 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
528 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
534 /* Decrease the refcount for a set of mapped variables, and queue asychronous
535 copies from the device back to the host after any work that has been issued.
536 Because the regions are still "live", increment an asynchronous reference
537 count to indicate that they should not be unmapped from host-side data
538 structures until the asynchronous copy has completed. */
540 attribute_hidden
void
541 gomp_copy_from_async (struct target_mem_desc
*tgt
)
543 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
546 gomp_mutex_lock (&devicep
->lock
);
548 for (i
= 0; i
< tgt
->list_count
; i
++)
549 if (tgt
->list
[i
] == NULL
)
551 else if (tgt
->list
[i
]->refcount
> 1)
553 tgt
->list
[i
]->refcount
--;
554 tgt
->list
[i
]->async_refcount
++;
558 splay_tree_key k
= tgt
->list
[i
];
560 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
561 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
562 k
->host_end
- k
->host_start
);
565 gomp_mutex_unlock (&devicep
->lock
);
568 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
569 variables back from device to host: if it is false, it is assumed that this
570 has been done already, i.e. by gomp_copy_from_async above. */
572 attribute_hidden
void
573 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
575 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
577 if (tgt
->list_count
== 0)
583 gomp_mutex_lock (&devicep
->lock
);
586 for (i
= 0; i
< tgt
->list_count
; i
++)
587 if (tgt
->list
[i
] == NULL
)
589 else if (tgt
->list
[i
]->refcount
> 1)
590 tgt
->list
[i
]->refcount
--;
591 else if (tgt
->list
[i
]->async_refcount
> 0)
592 tgt
->list
[i
]->async_refcount
--;
595 splay_tree_key k
= tgt
->list
[i
];
596 if (k
->copy_from
&& do_copyfrom
)
597 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
598 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
599 k
->host_end
- k
->host_start
);
600 splay_tree_remove (tgt
->mem_map
, k
);
601 if (k
->tgt
->refcount
> 1)
604 gomp_unmap_tgt (k
->tgt
);
607 if (tgt
->refcount
> 1)
610 gomp_unmap_tgt (tgt
);
612 gomp_mutex_unlock (&devicep
->lock
);
616 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
617 size_t *sizes
, void *kinds
, bool is_openacc
)
620 struct splay_tree_key_s cur_node
;
621 const int typemask
= is_openacc
? 0xff : 0x7;
629 gomp_mutex_lock (&devicep
->lock
);
630 for (i
= 0; i
< mapnum
; i
++)
633 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
634 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
635 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
638 int kind
= get_kind (is_openacc
, kinds
, i
);
639 if (n
->host_start
> cur_node
.host_start
640 || n
->host_end
< cur_node
.host_end
)
642 gomp_mutex_unlock (&devicep
->lock
);
643 gomp_fatal ("Trying to update [%p..%p) object when "
644 "only [%p..%p) is mapped",
645 (void *) cur_node
.host_start
,
646 (void *) cur_node
.host_end
,
647 (void *) n
->host_start
,
648 (void *) n
->host_end
);
650 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
651 devicep
->host2dev_func (devicep
->target_id
,
652 (void *) (n
->tgt
->tgt_start
654 + cur_node
.host_start
656 (void *) cur_node
.host_start
,
657 cur_node
.host_end
- cur_node
.host_start
);
658 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
659 devicep
->dev2host_func (devicep
->target_id
,
660 (void *) cur_node
.host_start
,
661 (void *) (n
->tgt
->tgt_start
663 + cur_node
.host_start
665 cur_node
.host_end
- cur_node
.host_start
);
669 gomp_mutex_unlock (&devicep
->lock
);
670 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
671 (void *) cur_node
.host_start
,
672 (void *) cur_node
.host_end
);
675 gomp_mutex_unlock (&devicep
->lock
);
678 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
679 And insert to splay tree the mapping between addresses from HOST_TABLE and
680 from loaded target image. */
683 gomp_offload_image_to_device (struct gomp_device_descr
*devicep
,
684 void *host_table
, void *target_data
,
685 bool is_register_lock
)
687 void **host_func_table
= ((void ***) host_table
)[0];
688 void **host_funcs_end
= ((void ***) host_table
)[1];
689 void **host_var_table
= ((void ***) host_table
)[2];
690 void **host_vars_end
= ((void ***) host_table
)[3];
692 /* The func table contains only addresses, the var table contains addresses
693 and corresponding sizes. */
694 int num_funcs
= host_funcs_end
- host_func_table
;
695 int num_vars
= (host_vars_end
- host_var_table
) / 2;
697 /* Load image to device and get target addresses for the image. */
698 struct addr_pair
*target_table
= NULL
;
699 int i
, num_target_entries
700 = devicep
->load_image_func (devicep
->target_id
, target_data
, &target_table
);
702 if (num_target_entries
!= num_funcs
+ num_vars
)
704 gomp_mutex_unlock (&devicep
->lock
);
705 if (is_register_lock
)
706 gomp_mutex_unlock (®ister_lock
);
707 gomp_fatal ("Can't map target functions or variables");
710 /* Insert host-target address mapping into splay tree. */
711 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
712 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
719 tgt
->device_descr
= devicep
;
720 splay_tree_node array
= tgt
->array
;
722 for (i
= 0; i
< num_funcs
; i
++)
724 splay_tree_key k
= &array
->key
;
725 k
->host_start
= (uintptr_t) host_func_table
[i
];
726 k
->host_end
= k
->host_start
+ 1;
728 k
->tgt_offset
= target_table
[i
].start
;
730 k
->async_refcount
= 0;
731 k
->copy_from
= false;
734 splay_tree_insert (&devicep
->mem_map
, array
);
738 for (i
= 0; i
< num_vars
; i
++)
740 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
741 if (target_var
->end
- target_var
->start
742 != (uintptr_t) host_var_table
[i
* 2 + 1])
744 gomp_mutex_unlock (&devicep
->lock
);
745 if (is_register_lock
)
746 gomp_mutex_unlock (®ister_lock
);
747 gomp_fatal ("Can't map target variables (size mismatch)");
750 splay_tree_key k
= &array
->key
;
751 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
752 k
->host_end
= k
->host_start
+ (uintptr_t) host_var_table
[i
* 2 + 1];
754 k
->tgt_offset
= target_var
->start
;
756 k
->async_refcount
= 0;
757 k
->copy_from
= false;
760 splay_tree_insert (&devicep
->mem_map
, array
);
767 /* This function should be called from every offload image while loading.
768 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
769 the target, and TARGET_DATA needed by target plugin. */
772 GOMP_offload_register (void *host_table
, enum offload_target_type target_type
,
776 gomp_mutex_lock (®ister_lock
);
778 /* Load image to all initialized devices. */
779 for (i
= 0; i
< num_devices
; i
++)
781 struct gomp_device_descr
*devicep
= &devices
[i
];
782 gomp_mutex_lock (&devicep
->lock
);
783 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
784 gomp_offload_image_to_device (devicep
, host_table
, target_data
, true);
785 gomp_mutex_unlock (&devicep
->lock
);
788 /* Insert image to array of pending images. */
790 = gomp_realloc_unlock (offload_images
,
791 (num_offload_images
+ 1)
792 * sizeof (struct offload_image_descr
));
793 offload_images
[num_offload_images
].type
= target_type
;
794 offload_images
[num_offload_images
].host_table
= host_table
;
795 offload_images
[num_offload_images
].target_data
= target_data
;
797 num_offload_images
++;
798 gomp_mutex_unlock (®ister_lock
);
801 /* This function should be called from every offload image while unloading.
802 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
803 the target, and TARGET_DATA needed by target plugin. */
806 GOMP_offload_unregister (void *host_table
, enum offload_target_type target_type
,
809 void **host_func_table
= ((void ***) host_table
)[0];
810 void **host_funcs_end
= ((void ***) host_table
)[1];
811 void **host_var_table
= ((void ***) host_table
)[2];
812 void **host_vars_end
= ((void ***) host_table
)[3];
815 /* The func table contains only addresses, the var table contains addresses
816 and corresponding sizes. */
817 int num_funcs
= host_funcs_end
- host_func_table
;
818 int num_vars
= (host_vars_end
- host_var_table
) / 2;
820 gomp_mutex_lock (®ister_lock
);
822 /* Unload image from all initialized devices. */
823 for (i
= 0; i
< num_devices
; i
++)
826 struct gomp_device_descr
*devicep
= &devices
[i
];
827 gomp_mutex_lock (&devicep
->lock
);
828 if (devicep
->type
!= target_type
|| !devicep
->is_initialized
)
830 gomp_mutex_unlock (&devicep
->lock
);
834 devicep
->unload_image_func (devicep
->target_id
, target_data
);
836 /* Remove mapping from splay tree. */
837 struct splay_tree_key_s k
;
838 splay_tree_key node
= NULL
;
841 k
.host_start
= (uintptr_t) host_func_table
[0];
842 k
.host_end
= k
.host_start
+ 1;
843 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
845 else if (num_vars
> 0)
847 k
.host_start
= (uintptr_t) host_var_table
[0];
848 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[1];
849 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
852 for (j
= 0; j
< num_funcs
; j
++)
854 k
.host_start
= (uintptr_t) host_func_table
[j
];
855 k
.host_end
= k
.host_start
+ 1;
856 splay_tree_remove (&devicep
->mem_map
, &k
);
859 for (j
= 0; j
< num_vars
; j
++)
861 k
.host_start
= (uintptr_t) host_var_table
[j
* 2];
862 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[j
* 2 + 1];
863 splay_tree_remove (&devicep
->mem_map
, &k
);
872 gomp_mutex_unlock (&devicep
->lock
);
875 /* Remove image from array of pending images. */
876 for (i
= 0; i
< num_offload_images
; i
++)
877 if (offload_images
[i
].target_data
== target_data
)
879 offload_images
[i
] = offload_images
[--num_offload_images
];
883 gomp_mutex_unlock (®ister_lock
);
886 /* This function initializes the target device, specified by DEVICEP. DEVICEP
887 must be locked on entry, and remains locked on return. */
889 attribute_hidden
void
890 gomp_init_device (struct gomp_device_descr
*devicep
)
893 devicep
->init_device_func (devicep
->target_id
);
895 /* Load to device all images registered by the moment. */
896 for (i
= 0; i
< num_offload_images
; i
++)
898 struct offload_image_descr
*image
= &offload_images
[i
];
899 if (image
->type
== devicep
->type
)
900 gomp_offload_image_to_device (devicep
, image
->host_table
,
901 image
->target_data
, false);
904 devicep
->is_initialized
= true;
907 /* Free address mapping tables. MM must be locked on entry, and remains locked
910 attribute_hidden
void
911 gomp_free_memmap (struct splay_tree_s
*mem_map
)
913 while (mem_map
->root
)
915 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
917 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
923 /* This function de-initializes the target device, specified by DEVICEP.
924 DEVICEP must be locked on entry, and remains locked on return. */
926 attribute_hidden
void
927 gomp_fini_device (struct gomp_device_descr
*devicep
)
929 if (devicep
->is_initialized
)
930 devicep
->fini_device_func (devicep
->target_id
);
932 devicep
->is_initialized
= false;
935 /* Called when encountering a target directive. If DEVICE
936 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
937 GOMP_DEVICE_HOST_FALLBACK (or any value
938 larger than last available hw device), use host fallback.
939 FN is address of host code, UNUSED is part of the current ABI, but
940 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
941 with MAPNUM entries, with addresses of the host objects,
942 sizes of the host objects (resp. for pointer kind pointer bias
943 and assumed sizeof (void *) size) and kinds. */
946 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
947 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
948 unsigned char *kinds
)
950 struct gomp_device_descr
*devicep
= resolve_device (device
);
953 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
956 struct gomp_thread old_thr
, *thr
= gomp_thread ();
958 memset (thr
, '\0', sizeof (*thr
));
959 if (gomp_places_list
)
961 thr
->place
= old_thr
.place
;
962 thr
->ts
.place_partition_len
= gomp_places_list_len
;
965 gomp_free_thread (thr
);
970 gomp_mutex_lock (&devicep
->lock
);
971 if (!devicep
->is_initialized
)
972 gomp_init_device (devicep
);
973 gomp_mutex_unlock (&devicep
->lock
);
977 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
978 fn_addr
= (void *) fn
;
981 gomp_mutex_lock (&devicep
->lock
);
982 struct splay_tree_key_s k
;
983 k
.host_start
= (uintptr_t) fn
;
984 k
.host_end
= k
.host_start
+ 1;
985 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
988 gomp_mutex_unlock (&devicep
->lock
);
989 gomp_fatal ("Target function wasn't mapped");
991 gomp_mutex_unlock (&devicep
->lock
);
993 fn_addr
= (void *) tgt_fn
->tgt_offset
;
996 struct target_mem_desc
*tgt_vars
997 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
999 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1001 memset (thr
, '\0', sizeof (*thr
));
1002 if (gomp_places_list
)
1004 thr
->place
= old_thr
.place
;
1005 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1007 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1008 gomp_free_thread (thr
);
1010 gomp_unmap_vars (tgt_vars
, true);
1014 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1015 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1017 struct gomp_device_descr
*devicep
= resolve_device (device
);
1020 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1022 /* Host fallback. */
1023 struct gomp_task_icv
*icv
= gomp_icv (false);
1024 if (icv
->target_data
)
1026 /* Even when doing a host fallback, if there are any active
1027 #pragma omp target data constructs, need to remember the
1028 new #pragma omp target data, otherwise GOMP_target_end_data
1029 would get out of sync. */
1030 struct target_mem_desc
*tgt
1031 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false, false);
1032 tgt
->prev
= icv
->target_data
;
1033 icv
->target_data
= tgt
;
1038 gomp_mutex_lock (&devicep
->lock
);
1039 if (!devicep
->is_initialized
)
1040 gomp_init_device (devicep
);
1041 gomp_mutex_unlock (&devicep
->lock
);
1043 struct target_mem_desc
*tgt
1044 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1046 struct gomp_task_icv
*icv
= gomp_icv (true);
1047 tgt
->prev
= icv
->target_data
;
1048 icv
->target_data
= tgt
;
1052 GOMP_target_end_data (void)
1054 struct gomp_task_icv
*icv
= gomp_icv (false);
1055 if (icv
->target_data
)
1057 struct target_mem_desc
*tgt
= icv
->target_data
;
1058 icv
->target_data
= tgt
->prev
;
1059 gomp_unmap_vars (tgt
, true);
1064 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1065 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1067 struct gomp_device_descr
*devicep
= resolve_device (device
);
1070 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1073 gomp_mutex_lock (&devicep
->lock
);
1074 if (!devicep
->is_initialized
)
1075 gomp_init_device (devicep
);
1076 gomp_mutex_unlock (&devicep
->lock
);
1078 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1082 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1086 struct gomp_task_icv
*icv
= gomp_icv (true);
1087 icv
->thread_limit_var
1088 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1093 #ifdef PLUGIN_SUPPORT
1095 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1097 The handles of the found functions are stored in the corresponding fields
1098 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1101 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
1102 const char *plugin_name
)
1104 const char *err
= NULL
, *last_missing
= NULL
;
1105 int optional_present
, optional_total
;
1107 /* Clear any existing error. */
1110 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
1117 /* Check if all required functions are available in the plugin and store
1122 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1128 /* Similar, but missing functions are not an error. */
1129 #define DLSYM_OPT(f, n) \
1132 const char *tmp_err; \
1133 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1134 tmp_err = dlerror (); \
1135 if (tmp_err == NULL) \
1136 optional_present++; \
1138 last_missing = #n; \
1146 DLSYM (get_num_devices
);
1147 DLSYM (init_device
);
1148 DLSYM (fini_device
);
1150 DLSYM (unload_image
);
1155 device
->capabilities
= device
->get_caps_func ();
1156 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1158 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1160 optional_present
= optional_total
= 0;
1161 DLSYM_OPT (openacc
.exec
, openacc_parallel
);
1162 DLSYM_OPT (openacc
.open_device
, openacc_open_device
);
1163 DLSYM_OPT (openacc
.close_device
, openacc_close_device
);
1164 DLSYM_OPT (openacc
.get_device_num
, openacc_get_device_num
);
1165 DLSYM_OPT (openacc
.set_device_num
, openacc_set_device_num
);
1166 DLSYM_OPT (openacc
.register_async_cleanup
,
1167 openacc_register_async_cleanup
);
1168 DLSYM_OPT (openacc
.async_test
, openacc_async_test
);
1169 DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
);
1170 DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
);
1171 DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
);
1172 DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
);
1173 DLSYM_OPT (openacc
.async_wait_all_async
, openacc_async_wait_all_async
);
1174 DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
);
1175 DLSYM_OPT (openacc
.create_thread_data
, openacc_create_thread_data
);
1176 DLSYM_OPT (openacc
.destroy_thread_data
, openacc_destroy_thread_data
);
1177 /* Require all the OpenACC handlers if we have
1178 GOMP_OFFLOAD_CAP_OPENACC_200. */
1179 if (optional_present
!= optional_total
)
1181 err
= "plugin missing OpenACC handler function";
1184 optional_present
= optional_total
= 0;
1185 DLSYM_OPT (openacc
.cuda
.get_current_device
,
1186 openacc_get_current_cuda_device
);
1187 DLSYM_OPT (openacc
.cuda
.get_current_context
,
1188 openacc_get_current_cuda_context
);
1189 DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
1190 DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
1191 /* Make sure all the CUDA functions are there if any of them are. */
1192 if (optional_present
&& optional_present
!= optional_total
)
1194 err
= "plugin missing OpenACC CUDA handler function";
1204 gomp_error ("while loading %s: %s", plugin_name
, err
);
1206 gomp_error ("missing function was %s", last_missing
);
1208 dlclose (plugin_handle
);
1213 /* This function initializes the runtime needed for offloading.
1214 It parses the list of offload targets and tries to load the plugins for
1215 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1216 will be set, and the array DEVICES initialized, containing descriptors for
1217 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1221 gomp_target_init (void)
1223 const char *prefix
="libgomp-plugin-";
1224 const char *suffix
= SONAME_SUFFIX (1);
1225 const char *cur
, *next
;
1227 int i
, new_num_devices
;
1232 cur
= OFFLOAD_TARGETS
;
1236 struct gomp_device_descr current_device
;
1238 next
= strchr (cur
, ',');
1240 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
1241 + strlen (prefix
) + strlen (suffix
));
1248 strcpy (plugin_name
, prefix
);
1249 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
1250 strcat (plugin_name
, suffix
);
1252 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
1254 new_num_devices
= current_device
.get_num_devices_func ();
1255 if (new_num_devices
>= 1)
1257 /* Augment DEVICES and NUM_DEVICES. */
1259 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
1260 * sizeof (struct gomp_device_descr
));
1268 current_device
.name
= current_device
.get_name_func ();
1269 /* current_device.capabilities has already been set. */
1270 current_device
.type
= current_device
.get_type_func ();
1271 current_device
.mem_map
.root
= NULL
;
1272 current_device
.is_initialized
= false;
1273 current_device
.openacc
.data_environ
= NULL
;
1274 current_device
.openacc
.target_data
= NULL
;
1275 for (i
= 0; i
< new_num_devices
; i
++)
1277 current_device
.target_id
= i
;
1278 devices
[num_devices
] = current_device
;
1279 gomp_mutex_init (&devices
[num_devices
].lock
);
1290 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1291 NUM_DEVICES_OPENMP. */
1292 struct gomp_device_descr
*devices_s
1293 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
1300 num_devices_openmp
= 0;
1301 for (i
= 0; i
< num_devices
; i
++)
1302 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1303 devices_s
[num_devices_openmp
++] = devices
[i
];
1304 int num_devices_after_openmp
= num_devices_openmp
;
1305 for (i
= 0; i
< num_devices
; i
++)
1306 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1307 devices_s
[num_devices_after_openmp
++] = devices
[i
];
1309 devices
= devices_s
;
1311 for (i
= 0; i
< num_devices
; i
++)
1313 /* The 'devices' array can be moved (by the realloc call) until we have
1314 found all the plugins, so registering with the OpenACC runtime (which
1315 takes a copy of the pointer argument) must be delayed until now. */
1316 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1317 goacc_register (&devices
[i
]);
1321 #else /* PLUGIN_SUPPORT */
1322 /* If dlfcn.h is unavailable we always fallback to host execution.
1323 GOMP_target* routines are just stubs for this case. */
1325 gomp_target_init (void)
1328 #endif /* PLUGIN_SUPPORT */