1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock
;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr
{
60 enum offload_target_type type
;
61 const void *host_table
;
62 const void *target_data
;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr
*offload_images
;
68 /* Total number of offload images. */
69 static int num_offload_images
;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr
*devices
;
74 /* Total number of available devices. */
75 static int num_devices
;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp
;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 gomp_realloc_unlock (void *old
, size_t size
)
85 void *ret
= realloc (old
, size
);
88 gomp_mutex_unlock (®ister_lock
);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
94 /* The comparison function. */
97 splay_compare (splay_tree_key x
, splay_tree_key y
)
99 if (x
->host_start
== x
->host_end
100 && y
->host_start
== y
->host_end
)
102 if (x
->host_end
<= y
->host_start
)
104 if (x
->host_start
>= y
->host_end
)
109 #include "splay-tree.h"
111 attribute_hidden
void
112 gomp_init_targets_once (void)
114 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
118 gomp_get_num_devices (void)
120 gomp_init_targets_once ();
121 return num_devices_openmp
;
124 static struct gomp_device_descr
*
125 resolve_device (int device_id
)
127 if (device_id
== GOMP_DEVICE_ICV
)
129 struct gomp_task_icv
*icv
= gomp_icv (false);
130 device_id
= icv
->default_device_var
;
133 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
136 return &devices
[device_id
];
140 /* Handle the case where splay_tree_lookup found oldn for newn.
141 Helper function of gomp_map_vars. */
144 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
145 splay_tree_key newn
, unsigned char kind
)
147 if ((kind
& GOMP_MAP_FLAG_FORCE
)
148 || oldn
->host_start
> newn
->host_start
149 || oldn
->host_end
< newn
->host_end
)
151 gomp_mutex_unlock (&devicep
->lock
);
152 gomp_fatal ("Trying to map into device [%p..%p) object when "
153 "[%p..%p) is already mapped",
154 (void *) newn
->host_start
, (void *) newn
->host_end
,
155 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
161 get_kind (bool is_openacc
, void *kinds
, int idx
)
163 return is_openacc
? ((unsigned short *) kinds
)[idx
]
164 : ((unsigned char *) kinds
)[idx
];
168 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
169 uintptr_t target_offset
, uintptr_t bias
)
171 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
172 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
173 struct splay_tree_key_s cur_node
;
175 cur_node
.host_start
= host_ptr
;
176 if (cur_node
.host_start
== (uintptr_t) NULL
)
178 cur_node
.tgt_offset
= (uintptr_t) NULL
;
179 /* FIXME: see comment about coalescing host/dev transfers below. */
180 devicep
->host2dev_func (devicep
->target_id
,
181 (void *) (tgt
->tgt_start
+ target_offset
),
182 (void *) &cur_node
.tgt_offset
,
186 /* Add bias to the pointer value. */
187 cur_node
.host_start
+= bias
;
188 cur_node
.host_end
= cur_node
.host_start
+ 1;
189 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
192 /* Could be possibly zero size array section. */
194 n
= splay_tree_lookup (mem_map
, &cur_node
);
197 cur_node
.host_start
--;
198 n
= splay_tree_lookup (mem_map
, &cur_node
);
199 cur_node
.host_start
++;
204 gomp_mutex_unlock (&devicep
->lock
);
205 gomp_fatal ("Pointer target of array section wasn't mapped");
207 cur_node
.host_start
-= n
->host_start
;
209 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
210 /* At this point tgt_offset is target address of the
211 array section. Now subtract bias to get what we want
212 to initialize the pointer with. */
213 cur_node
.tgt_offset
-= bias
;
214 /* FIXME: see comment about coalescing host/dev transfers below. */
215 devicep
->host2dev_func (devicep
->target_id
,
216 (void *) (tgt
->tgt_start
+ target_offset
),
217 (void *) &cur_node
.tgt_offset
,
221 attribute_hidden
struct target_mem_desc
*
222 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
223 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
224 bool is_openacc
, bool is_target
)
226 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
227 const int rshift
= is_openacc
? 8 : 3;
228 const int typemask
= is_openacc
? 0xff : 0x7;
229 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
230 struct splay_tree_key_s cur_node
;
231 struct target_mem_desc
*tgt
232 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
233 tgt
->list_count
= mapnum
;
235 tgt
->device_descr
= devicep
;
240 tgt_align
= sizeof (void *);
244 size_t align
= 4 * sizeof (void *);
246 tgt_size
= mapnum
* sizeof (void *);
249 gomp_mutex_lock (&devicep
->lock
);
251 for (i
= 0; i
< mapnum
; i
++)
253 int kind
= get_kind (is_openacc
, kinds
, i
);
254 if (hostaddrs
[i
] == NULL
)
259 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
260 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
261 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
263 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
264 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
268 gomp_map_vars_existing (devicep
, n
, &cur_node
, kind
& typemask
);
274 size_t align
= (size_t) 1 << (kind
>> rshift
);
276 if (tgt_align
< align
)
278 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
279 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
280 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
283 for (j
= i
+ 1; j
< mapnum
; j
++)
284 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
287 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
288 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
289 > cur_node
.host_end
))
304 gomp_mutex_unlock (&devicep
->lock
);
305 gomp_fatal ("unexpected aggregation");
307 tgt
->to_free
= devaddrs
[0];
308 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
309 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
311 else if (not_found_cnt
|| is_target
)
313 /* Allocate tgt_align aligned tgt_size block of memory. */
314 /* FIXME: Perhaps change interface to allocate properly aligned
316 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
317 tgt_size
+ tgt_align
- 1);
318 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
319 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
320 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
331 tgt_size
= mapnum
* sizeof (void *);
336 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
337 splay_tree_node array
= tgt
->array
;
340 for (i
= 0; i
< mapnum
; i
++)
341 if (tgt
->list
[i
] == NULL
)
343 int kind
= get_kind (is_openacc
, kinds
, i
);
344 if (hostaddrs
[i
] == NULL
)
346 splay_tree_key k
= &array
->key
;
347 k
->host_start
= (uintptr_t) hostaddrs
[i
];
348 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
349 k
->host_end
= k
->host_start
+ sizes
[i
];
351 k
->host_end
= k
->host_start
+ sizeof (void *);
352 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
356 gomp_map_vars_existing (devicep
, n
, k
, kind
& typemask
);
360 size_t align
= (size_t) 1 << (kind
>> rshift
);
362 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
364 k
->tgt_offset
= tgt_size
;
365 tgt_size
+= k
->host_end
- k
->host_start
;
366 k
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
368 k
->async_refcount
= 0;
372 splay_tree_insert (mem_map
, array
);
373 switch (kind
& typemask
)
377 case GOMP_MAP_FORCE_ALLOC
:
378 case GOMP_MAP_FORCE_FROM
:
381 case GOMP_MAP_TOFROM
:
382 case GOMP_MAP_FORCE_TO
:
383 case GOMP_MAP_FORCE_TOFROM
:
384 /* FIXME: Perhaps add some smarts, like if copying
385 several adjacent fields from host to target, use some
386 host buffer to avoid sending each var individually. */
387 devicep
->host2dev_func (devicep
->target_id
,
388 (void *) (tgt
->tgt_start
390 (void *) k
->host_start
,
391 k
->host_end
- k
->host_start
);
393 case GOMP_MAP_POINTER
:
394 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
395 k
->tgt_offset
, sizes
[i
]);
397 case GOMP_MAP_TO_PSET
:
398 /* FIXME: see above FIXME comment. */
399 devicep
->host2dev_func (devicep
->target_id
,
400 (void *) (tgt
->tgt_start
402 (void *) k
->host_start
,
403 k
->host_end
- k
->host_start
);
405 for (j
= i
+ 1; j
< mapnum
; j
++)
406 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
409 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
410 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
417 gomp_map_pointer (tgt
,
418 (uintptr_t) *(void **) hostaddrs
[j
],
420 + ((uintptr_t) hostaddrs
[j
]
426 case GOMP_MAP_FORCE_PRESENT
:
428 /* We already looked up the memory region above and it
430 size_t size
= k
->host_end
- k
->host_start
;
431 gomp_mutex_unlock (&devicep
->lock
);
432 #ifdef HAVE_INTTYPES_H
433 gomp_fatal ("present clause: !acc_is_present (%p, "
434 "%"PRIu64
" (0x%"PRIx64
"))",
435 (void *) k
->host_start
,
436 (uint64_t) size
, (uint64_t) size
);
438 gomp_fatal ("present clause: !acc_is_present (%p, "
439 "%lu (0x%lx))", (void *) k
->host_start
,
440 (unsigned long) size
, (unsigned long) size
);
444 case GOMP_MAP_FORCE_DEVICEPTR
:
445 assert (k
->host_end
- k
->host_start
== sizeof (void *));
447 devicep
->host2dev_func (devicep
->target_id
,
448 (void *) (tgt
->tgt_start
450 (void *) k
->host_start
,
454 gomp_mutex_unlock (&devicep
->lock
);
455 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
465 for (i
= 0; i
< mapnum
; i
++)
467 if (tgt
->list
[i
] == NULL
)
468 cur_node
.tgt_offset
= (uintptr_t) NULL
;
470 cur_node
.tgt_offset
= tgt
->list
[i
]->tgt
->tgt_start
471 + tgt
->list
[i
]->tgt_offset
;
472 /* FIXME: see above FIXME comment. */
473 devicep
->host2dev_func (devicep
->target_id
,
474 (void *) (tgt
->tgt_start
475 + i
* sizeof (void *)),
476 (void *) &cur_node
.tgt_offset
,
481 gomp_mutex_unlock (&devicep
->lock
);
486 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
488 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
490 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
496 /* Decrease the refcount for a set of mapped variables, and queue asychronous
497 copies from the device back to the host after any work that has been issued.
498 Because the regions are still "live", increment an asynchronous reference
499 count to indicate that they should not be unmapped from host-side data
500 structures until the asynchronous copy has completed. */
502 attribute_hidden
void
503 gomp_copy_from_async (struct target_mem_desc
*tgt
)
505 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
508 gomp_mutex_lock (&devicep
->lock
);
510 for (i
= 0; i
< tgt
->list_count
; i
++)
511 if (tgt
->list
[i
] == NULL
)
513 else if (tgt
->list
[i
]->refcount
> 1)
515 tgt
->list
[i
]->refcount
--;
516 tgt
->list
[i
]->async_refcount
++;
520 splay_tree_key k
= tgt
->list
[i
];
522 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
523 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
524 k
->host_end
- k
->host_start
);
527 gomp_mutex_unlock (&devicep
->lock
);
530 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
531 variables back from device to host: if it is false, it is assumed that this
532 has been done already, i.e. by gomp_copy_from_async above. */
534 attribute_hidden
void
535 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
537 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
539 if (tgt
->list_count
== 0)
545 gomp_mutex_lock (&devicep
->lock
);
548 for (i
= 0; i
< tgt
->list_count
; i
++)
549 if (tgt
->list
[i
] == NULL
)
551 else if (tgt
->list
[i
]->refcount
> 1)
552 tgt
->list
[i
]->refcount
--;
553 else if (tgt
->list
[i
]->async_refcount
> 0)
554 tgt
->list
[i
]->async_refcount
--;
557 splay_tree_key k
= tgt
->list
[i
];
558 if (k
->copy_from
&& do_copyfrom
)
559 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
560 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
561 k
->host_end
- k
->host_start
);
562 splay_tree_remove (&devicep
->mem_map
, k
);
563 if (k
->tgt
->refcount
> 1)
566 gomp_unmap_tgt (k
->tgt
);
569 if (tgt
->refcount
> 1)
572 gomp_unmap_tgt (tgt
);
574 gomp_mutex_unlock (&devicep
->lock
);
578 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
579 size_t *sizes
, void *kinds
, bool is_openacc
)
582 struct splay_tree_key_s cur_node
;
583 const int typemask
= is_openacc
? 0xff : 0x7;
591 gomp_mutex_lock (&devicep
->lock
);
592 for (i
= 0; i
< mapnum
; i
++)
595 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
596 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
597 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
600 int kind
= get_kind (is_openacc
, kinds
, i
);
601 if (n
->host_start
> cur_node
.host_start
602 || n
->host_end
< cur_node
.host_end
)
604 gomp_mutex_unlock (&devicep
->lock
);
605 gomp_fatal ("Trying to update [%p..%p) object when "
606 "only [%p..%p) is mapped",
607 (void *) cur_node
.host_start
,
608 (void *) cur_node
.host_end
,
609 (void *) n
->host_start
,
610 (void *) n
->host_end
);
612 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
613 devicep
->host2dev_func (devicep
->target_id
,
614 (void *) (n
->tgt
->tgt_start
616 + cur_node
.host_start
618 (void *) cur_node
.host_start
,
619 cur_node
.host_end
- cur_node
.host_start
);
620 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
621 devicep
->dev2host_func (devicep
->target_id
,
622 (void *) cur_node
.host_start
,
623 (void *) (n
->tgt
->tgt_start
625 + cur_node
.host_start
627 cur_node
.host_end
- cur_node
.host_start
);
631 gomp_mutex_unlock (&devicep
->lock
);
632 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
633 (void *) cur_node
.host_start
,
634 (void *) cur_node
.host_end
);
637 gomp_mutex_unlock (&devicep
->lock
);
640 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
641 And insert to splay tree the mapping between addresses from HOST_TABLE and
642 from loaded target image. We rely in the host and device compiler
643 emitting variable and functions in the same order. */
646 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
647 const void *host_table
, const void *target_data
,
648 bool is_register_lock
)
650 void **host_func_table
= ((void ***) host_table
)[0];
651 void **host_funcs_end
= ((void ***) host_table
)[1];
652 void **host_var_table
= ((void ***) host_table
)[2];
653 void **host_vars_end
= ((void ***) host_table
)[3];
655 /* The func table contains only addresses, the var table contains addresses
656 and corresponding sizes. */
657 int num_funcs
= host_funcs_end
- host_func_table
;
658 int num_vars
= (host_vars_end
- host_var_table
) / 2;
660 /* Load image to device and get target addresses for the image. */
661 struct addr_pair
*target_table
= NULL
;
662 int i
, num_target_entries
;
665 = devicep
->load_image_func (devicep
->target_id
, version
,
666 target_data
, &target_table
);
668 if (num_target_entries
!= num_funcs
+ num_vars
)
670 gomp_mutex_unlock (&devicep
->lock
);
671 if (is_register_lock
)
672 gomp_mutex_unlock (®ister_lock
);
673 gomp_fatal ("Cannot map target functions or variables"
674 " (expected %u, have %u)", num_funcs
+ num_vars
,
678 /* Insert host-target address mapping into splay tree. */
679 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
680 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
687 tgt
->device_descr
= devicep
;
688 splay_tree_node array
= tgt
->array
;
690 for (i
= 0; i
< num_funcs
; i
++)
692 splay_tree_key k
= &array
->key
;
693 k
->host_start
= (uintptr_t) host_func_table
[i
];
694 k
->host_end
= k
->host_start
+ 1;
696 k
->tgt_offset
= target_table
[i
].start
;
698 k
->async_refcount
= 0;
699 k
->copy_from
= false;
702 splay_tree_insert (&devicep
->mem_map
, array
);
706 for (i
= 0; i
< num_vars
; i
++)
708 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
709 if (target_var
->end
- target_var
->start
710 != (uintptr_t) host_var_table
[i
* 2 + 1])
712 gomp_mutex_unlock (&devicep
->lock
);
713 if (is_register_lock
)
714 gomp_mutex_unlock (®ister_lock
);
715 gomp_fatal ("Can't map target variables (size mismatch)");
718 splay_tree_key k
= &array
->key
;
719 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
720 k
->host_end
= k
->host_start
+ (uintptr_t) host_var_table
[i
* 2 + 1];
722 k
->tgt_offset
= target_var
->start
;
724 k
->async_refcount
= 0;
725 k
->copy_from
= false;
728 splay_tree_insert (&devicep
->mem_map
, array
);
735 /* Unload the mappings described by target_data from device DEVICE_P.
736 The device must be locked. */
739 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
741 const void *host_table
, const void *target_data
)
743 void **host_func_table
= ((void ***) host_table
)[0];
744 void **host_funcs_end
= ((void ***) host_table
)[1];
745 void **host_var_table
= ((void ***) host_table
)[2];
746 void **host_vars_end
= ((void ***) host_table
)[3];
748 /* The func table contains only addresses, the var table contains addresses
749 and corresponding sizes. */
750 int num_funcs
= host_funcs_end
- host_func_table
;
751 int num_vars
= (host_vars_end
- host_var_table
) / 2;
754 struct splay_tree_key_s k
;
755 splay_tree_key node
= NULL
;
757 /* Find mapping at start of node array */
758 if (num_funcs
|| num_vars
)
760 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
761 : (uintptr_t) host_var_table
[0]);
762 k
.host_end
= k
.host_start
+ 1;
763 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
766 devicep
->unload_image_func (devicep
->target_id
, version
, target_data
);
768 /* Remove mappings from splay tree. */
769 for (j
= 0; j
< num_funcs
; j
++)
771 k
.host_start
= (uintptr_t) host_func_table
[j
];
772 k
.host_end
= k
.host_start
+ 1;
773 splay_tree_remove (&devicep
->mem_map
, &k
);
776 for (j
= 0; j
< num_vars
; j
++)
778 k
.host_start
= (uintptr_t) host_var_table
[j
* 2];
779 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[j
* 2 + 1];
780 splay_tree_remove (&devicep
->mem_map
, &k
);
790 /* This function should be called from every offload image while loading.
791 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
792 the target, and TARGET_DATA needed by target plugin. */
795 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
796 int target_type
, const void *target_data
)
800 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
801 gomp_fatal ("Library too old for offload (version %u < %u)",
802 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
804 gomp_mutex_lock (®ister_lock
);
806 /* Load image to all initialized devices. */
807 for (i
= 0; i
< num_devices
; i
++)
809 struct gomp_device_descr
*devicep
= &devices
[i
];
810 gomp_mutex_lock (&devicep
->lock
);
811 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
812 gomp_load_image_to_device (devicep
, version
,
813 host_table
, target_data
, true);
814 gomp_mutex_unlock (&devicep
->lock
);
817 /* Insert image to array of pending images. */
819 = gomp_realloc_unlock (offload_images
,
820 (num_offload_images
+ 1)
821 * sizeof (struct offload_image_descr
));
822 offload_images
[num_offload_images
].version
= version
;
823 offload_images
[num_offload_images
].type
= target_type
;
824 offload_images
[num_offload_images
].host_table
= host_table
;
825 offload_images
[num_offload_images
].target_data
= target_data
;
827 num_offload_images
++;
828 gomp_mutex_unlock (®ister_lock
);
832 GOMP_offload_register (const void *host_table
, int target_type
,
833 const void *target_data
)
835 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
838 /* This function should be called from every offload image while unloading.
839 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
840 the target, and TARGET_DATA needed by target plugin. */
843 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
844 int target_type
, const void *target_data
)
848 gomp_mutex_lock (®ister_lock
);
850 /* Unload image from all initialized devices. */
851 for (i
= 0; i
< num_devices
; i
++)
853 struct gomp_device_descr
*devicep
= &devices
[i
];
854 gomp_mutex_lock (&devicep
->lock
);
855 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
856 gomp_unload_image_from_device (devicep
, version
,
857 host_table
, target_data
);
858 gomp_mutex_unlock (&devicep
->lock
);
861 /* Remove image from array of pending images. */
862 for (i
= 0; i
< num_offload_images
; i
++)
863 if (offload_images
[i
].target_data
== target_data
)
865 offload_images
[i
] = offload_images
[--num_offload_images
];
869 gomp_mutex_unlock (®ister_lock
);
873 GOMP_offload_unregister (const void *host_table
, int target_type
,
874 const void *target_data
)
876 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
879 /* This function initializes the target device, specified by DEVICEP. DEVICEP
880 must be locked on entry, and remains locked on return. */
882 attribute_hidden
void
883 gomp_init_device (struct gomp_device_descr
*devicep
)
886 devicep
->init_device_func (devicep
->target_id
);
888 /* Load to device all images registered by the moment. */
889 for (i
= 0; i
< num_offload_images
; i
++)
891 struct offload_image_descr
*image
= &offload_images
[i
];
892 if (image
->type
== devicep
->type
)
893 gomp_load_image_to_device (devicep
, image
->version
,
894 image
->host_table
, image
->target_data
,
898 devicep
->is_initialized
= true;
901 attribute_hidden
void
902 gomp_unload_device (struct gomp_device_descr
*devicep
)
904 if (devicep
->is_initialized
)
908 /* Unload from device all images registered at the moment. */
909 for (i
= 0; i
< num_offload_images
; i
++)
911 struct offload_image_descr
*image
= &offload_images
[i
];
912 if (image
->type
== devicep
->type
)
913 gomp_unload_image_from_device (devicep
, image
->version
,
920 /* Free address mapping tables. MM must be locked on entry, and remains locked
923 attribute_hidden
void
924 gomp_free_memmap (struct splay_tree_s
*mem_map
)
926 while (mem_map
->root
)
928 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
930 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
936 /* This function de-initializes the target device, specified by DEVICEP.
937 DEVICEP must be locked on entry, and remains locked on return. */
939 attribute_hidden
void
940 gomp_fini_device (struct gomp_device_descr
*devicep
)
942 if (devicep
->is_initialized
)
943 devicep
->fini_device_func (devicep
->target_id
);
945 devicep
->is_initialized
= false;
948 /* Called when encountering a target directive. If DEVICE
949 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
950 GOMP_DEVICE_HOST_FALLBACK (or any value
951 larger than last available hw device), use host fallback.
952 FN is address of host code, UNUSED is part of the current ABI, but
953 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
954 with MAPNUM entries, with addresses of the host objects,
955 sizes of the host objects (resp. for pointer kind pointer bias
956 and assumed sizeof (void *) size) and kinds. */
959 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
960 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
961 unsigned char *kinds
)
963 struct gomp_device_descr
*devicep
= resolve_device (device
);
966 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
969 struct gomp_thread old_thr
, *thr
= gomp_thread ();
971 memset (thr
, '\0', sizeof (*thr
));
972 if (gomp_places_list
)
974 thr
->place
= old_thr
.place
;
975 thr
->ts
.place_partition_len
= gomp_places_list_len
;
978 gomp_free_thread (thr
);
983 gomp_mutex_lock (&devicep
->lock
);
984 if (!devicep
->is_initialized
)
985 gomp_init_device (devicep
);
986 gomp_mutex_unlock (&devicep
->lock
);
990 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
991 fn_addr
= (void *) fn
;
994 gomp_mutex_lock (&devicep
->lock
);
995 struct splay_tree_key_s k
;
996 k
.host_start
= (uintptr_t) fn
;
997 k
.host_end
= k
.host_start
+ 1;
998 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1001 gomp_mutex_unlock (&devicep
->lock
);
1002 gomp_fatal ("Target function wasn't mapped");
1004 gomp_mutex_unlock (&devicep
->lock
);
1006 fn_addr
= (void *) tgt_fn
->tgt_offset
;
1009 struct target_mem_desc
*tgt_vars
1010 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1012 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1014 memset (thr
, '\0', sizeof (*thr
));
1015 if (gomp_places_list
)
1017 thr
->place
= old_thr
.place
;
1018 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1020 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1021 gomp_free_thread (thr
);
1023 gomp_unmap_vars (tgt_vars
, true);
1027 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1028 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1030 struct gomp_device_descr
*devicep
= resolve_device (device
);
1033 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1035 /* Host fallback. */
1036 struct gomp_task_icv
*icv
= gomp_icv (false);
1037 if (icv
->target_data
)
1039 /* Even when doing a host fallback, if there are any active
1040 #pragma omp target data constructs, need to remember the
1041 new #pragma omp target data, otherwise GOMP_target_end_data
1042 would get out of sync. */
1043 struct target_mem_desc
*tgt
1044 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false, false);
1045 tgt
->prev
= icv
->target_data
;
1046 icv
->target_data
= tgt
;
1051 gomp_mutex_lock (&devicep
->lock
);
1052 if (!devicep
->is_initialized
)
1053 gomp_init_device (devicep
);
1054 gomp_mutex_unlock (&devicep
->lock
);
1056 struct target_mem_desc
*tgt
1057 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1059 struct gomp_task_icv
*icv
= gomp_icv (true);
1060 tgt
->prev
= icv
->target_data
;
1061 icv
->target_data
= tgt
;
1065 GOMP_target_end_data (void)
1067 struct gomp_task_icv
*icv
= gomp_icv (false);
1068 if (icv
->target_data
)
1070 struct target_mem_desc
*tgt
= icv
->target_data
;
1071 icv
->target_data
= tgt
->prev
;
1072 gomp_unmap_vars (tgt
, true);
1077 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1078 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1080 struct gomp_device_descr
*devicep
= resolve_device (device
);
1083 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1086 gomp_mutex_lock (&devicep
->lock
);
1087 if (!devicep
->is_initialized
)
1088 gomp_init_device (devicep
);
1089 gomp_mutex_unlock (&devicep
->lock
);
1091 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1095 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1099 struct gomp_task_icv
*icv
= gomp_icv (true);
1100 icv
->thread_limit_var
1101 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1106 #ifdef PLUGIN_SUPPORT
1108 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1110 The handles of the found functions are stored in the corresponding fields
1111 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1114 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
1115 const char *plugin_name
)
1117 const char *err
= NULL
, *last_missing
= NULL
;
1119 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
1123 /* Check if all required functions are available in the plugin and store
1124 their handlers. None of the symbols can legitimately be NULL,
1125 so we don't need to check dlerror all the time. */
1127 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
1129 /* Similar, but missing functions are not an error. Return false if
1130 failed, true otherwise. */
1131 #define DLSYM_OPT(f, n) \
1132 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
1133 || (last_missing = #n, 0))
1136 if (device
->version_func () != GOMP_VERSION
)
1138 err
= "plugin version mismatch";
1145 DLSYM (get_num_devices
);
1146 DLSYM (init_device
);
1147 DLSYM (fini_device
);
1149 DLSYM (unload_image
);
1154 device
->capabilities
= device
->get_caps_func ();
1155 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1157 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1159 if (!DLSYM_OPT (openacc
.exec
, openacc_parallel
)
1160 || !DLSYM_OPT (openacc
.register_async_cleanup
,
1161 openacc_register_async_cleanup
)
1162 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
1163 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
1164 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
1165 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
1166 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
1167 || !DLSYM_OPT (openacc
.async_wait_all_async
,
1168 openacc_async_wait_all_async
)
1169 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
1170 || !DLSYM_OPT (openacc
.create_thread_data
,
1171 openacc_create_thread_data
)
1172 || !DLSYM_OPT (openacc
.destroy_thread_data
,
1173 openacc_destroy_thread_data
))
1175 /* Require all the OpenACC handlers if we have
1176 GOMP_OFFLOAD_CAP_OPENACC_200. */
1177 err
= "plugin missing OpenACC handler function";
1182 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
1183 openacc_get_current_cuda_device
);
1184 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
1185 openacc_get_current_cuda_context
);
1186 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
1187 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
1188 if (cuda
&& cuda
!= 4)
1190 /* Make sure all the CUDA functions are there if any of them are. */
1191 err
= "plugin missing OpenACC CUDA handler function";
1203 gomp_error ("while loading %s: %s", plugin_name
, err
);
1205 gomp_error ("missing function was %s", last_missing
);
1207 dlclose (plugin_handle
);
1212 /* This function initializes the runtime needed for offloading.
1213 It parses the list of offload targets and tries to load the plugins for
1214 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1215 will be set, and the array DEVICES initialized, containing descriptors for
1216 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1220 gomp_target_init (void)
1222 const char *prefix
="libgomp-plugin-";
1223 const char *suffix
= SONAME_SUFFIX (1);
1224 const char *cur
, *next
;
1226 int i
, new_num_devices
;
1231 cur
= OFFLOAD_TARGETS
;
1235 struct gomp_device_descr current_device
;
1237 next
= strchr (cur
, ',');
1239 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
1240 + strlen (prefix
) + strlen (suffix
));
1247 strcpy (plugin_name
, prefix
);
1248 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
1249 strcat (plugin_name
, suffix
);
1251 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
1253 new_num_devices
= current_device
.get_num_devices_func ();
1254 if (new_num_devices
>= 1)
1256 /* Augment DEVICES and NUM_DEVICES. */
1258 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
1259 * sizeof (struct gomp_device_descr
));
1267 current_device
.name
= current_device
.get_name_func ();
1268 /* current_device.capabilities has already been set. */
1269 current_device
.type
= current_device
.get_type_func ();
1270 current_device
.mem_map
.root
= NULL
;
1271 current_device
.is_initialized
= false;
1272 current_device
.openacc
.data_environ
= NULL
;
1273 for (i
= 0; i
< new_num_devices
; i
++)
1275 current_device
.target_id
= i
;
1276 devices
[num_devices
] = current_device
;
1277 gomp_mutex_init (&devices
[num_devices
].lock
);
1288 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1289 NUM_DEVICES_OPENMP. */
1290 struct gomp_device_descr
*devices_s
1291 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
1298 num_devices_openmp
= 0;
1299 for (i
= 0; i
< num_devices
; i
++)
1300 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1301 devices_s
[num_devices_openmp
++] = devices
[i
];
1302 int num_devices_after_openmp
= num_devices_openmp
;
1303 for (i
= 0; i
< num_devices
; i
++)
1304 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1305 devices_s
[num_devices_after_openmp
++] = devices
[i
];
1307 devices
= devices_s
;
1309 for (i
= 0; i
< num_devices
; i
++)
1311 /* The 'devices' array can be moved (by the realloc call) until we have
1312 found all the plugins, so registering with the OpenACC runtime (which
1313 takes a copy of the pointer argument) must be delayed until now. */
1314 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1315 goacc_register (&devices
[i
]);
1319 #else /* PLUGIN_SUPPORT */
1320 /* If dlfcn.h is unavailable we always fallback to host execution.
1321 GOMP_target* routines are just stubs for this case. */
1323 gomp_target_init (void)
1326 #endif /* PLUGIN_SUPPORT */