1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock
;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr
{
59 enum offload_target_type type
;
64 /* Array of descriptors of offload images. */
65 static struct offload_image_descr
*offload_images
;
67 /* Total number of offload images. */
68 static int num_offload_images
;
70 /* Array of descriptors for all available devices. */
71 static struct gomp_device_descr
*devices
;
73 /* Total number of available devices. */
74 static int num_devices
;
76 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 static int num_devices_openmp
;
79 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 gomp_realloc_unlock (void *old
, size_t size
)
84 void *ret
= realloc (old
, size
);
87 gomp_mutex_unlock (®ister_lock
);
88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
93 /* The comparison function. */
96 splay_compare (splay_tree_key x
, splay_tree_key y
)
98 if (x
->host_start
== x
->host_end
99 && y
->host_start
== y
->host_end
)
101 if (x
->host_end
<= y
->host_start
)
103 if (x
->host_start
>= y
->host_end
)
108 #include "splay-tree.h"
110 attribute_hidden
void
111 gomp_init_targets_once (void)
113 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
117 gomp_get_num_devices (void)
119 gomp_init_targets_once ();
120 return num_devices_openmp
;
123 static struct gomp_device_descr
*
124 resolve_device (int device_id
)
126 if (device_id
== GOMP_DEVICE_ICV
)
128 struct gomp_task_icv
*icv
= gomp_icv (false);
129 device_id
= icv
->default_device_var
;
132 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
135 return &devices
[device_id
];
139 /* Handle the case where splay_tree_lookup found oldn for newn.
140 Helper function of gomp_map_vars. */
143 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
144 splay_tree_key newn
, unsigned char kind
)
146 if ((kind
& GOMP_MAP_FLAG_FORCE
)
147 || oldn
->host_start
> newn
->host_start
148 || oldn
->host_end
< newn
->host_end
)
150 gomp_mutex_unlock (&devicep
->lock
);
151 gomp_fatal ("Trying to map into device [%p..%p) object when "
152 "[%p..%p) is already mapped",
153 (void *) newn
->host_start
, (void *) newn
->host_end
,
154 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
160 get_kind (bool is_openacc
, void *kinds
, int idx
)
162 return is_openacc
? ((unsigned short *) kinds
)[idx
]
163 : ((unsigned char *) kinds
)[idx
];
166 attribute_hidden
struct target_mem_desc
*
167 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
168 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
169 bool is_openacc
, bool is_target
)
171 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
172 const int rshift
= is_openacc
? 8 : 3;
173 const int typemask
= is_openacc
? 0xff : 0x7;
174 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
175 struct splay_tree_key_s cur_node
;
176 struct target_mem_desc
*tgt
177 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
178 tgt
->list_count
= mapnum
;
180 tgt
->device_descr
= devicep
;
185 tgt_align
= sizeof (void *);
189 size_t align
= 4 * sizeof (void *);
191 tgt_size
= mapnum
* sizeof (void *);
194 gomp_mutex_lock (&devicep
->lock
);
196 for (i
= 0; i
< mapnum
; i
++)
198 int kind
= get_kind (is_openacc
, kinds
, i
);
199 if (hostaddrs
[i
] == NULL
)
204 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
205 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
206 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
208 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
209 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
213 gomp_map_vars_existing (devicep
, n
, &cur_node
, kind
& typemask
);
219 size_t align
= (size_t) 1 << (kind
>> rshift
);
221 if (tgt_align
< align
)
223 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
224 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
225 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
228 for (j
= i
+ 1; j
< mapnum
; j
++)
229 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
232 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
233 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
234 > cur_node
.host_end
))
249 gomp_mutex_unlock (&devicep
->lock
);
250 gomp_fatal ("unexpected aggregation");
252 tgt
->to_free
= devaddrs
[0];
253 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
254 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
256 else if (not_found_cnt
|| is_target
)
258 /* Allocate tgt_align aligned tgt_size block of memory. */
259 /* FIXME: Perhaps change interface to allocate properly aligned
261 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
262 tgt_size
+ tgt_align
- 1);
263 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
264 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
265 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
276 tgt_size
= mapnum
* sizeof (void *);
281 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
282 splay_tree_node array
= tgt
->array
;
285 for (i
= 0; i
< mapnum
; i
++)
286 if (tgt
->list
[i
] == NULL
)
288 int kind
= get_kind (is_openacc
, kinds
, i
);
289 if (hostaddrs
[i
] == NULL
)
291 splay_tree_key k
= &array
->key
;
292 k
->host_start
= (uintptr_t) hostaddrs
[i
];
293 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
294 k
->host_end
= k
->host_start
+ sizes
[i
];
296 k
->host_end
= k
->host_start
+ sizeof (void *);
297 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
301 gomp_map_vars_existing (devicep
, n
, k
, kind
& typemask
);
305 size_t align
= (size_t) 1 << (kind
>> rshift
);
307 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
309 k
->tgt_offset
= tgt_size
;
310 tgt_size
+= k
->host_end
- k
->host_start
;
311 k
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
313 k
->async_refcount
= 0;
317 splay_tree_insert (mem_map
, array
);
318 switch (kind
& typemask
)
322 case GOMP_MAP_FORCE_ALLOC
:
323 case GOMP_MAP_FORCE_FROM
:
326 case GOMP_MAP_TOFROM
:
327 case GOMP_MAP_FORCE_TO
:
328 case GOMP_MAP_FORCE_TOFROM
:
329 /* FIXME: Perhaps add some smarts, like if copying
330 several adjacent fields from host to target, use some
331 host buffer to avoid sending each var individually. */
332 devicep
->host2dev_func (devicep
->target_id
,
333 (void *) (tgt
->tgt_start
335 (void *) k
->host_start
,
336 k
->host_end
- k
->host_start
);
338 case GOMP_MAP_POINTER
:
340 = (uintptr_t) *(void **) k
->host_start
;
341 if (cur_node
.host_start
== (uintptr_t) NULL
)
343 cur_node
.tgt_offset
= (uintptr_t) NULL
;
344 /* FIXME: see above FIXME comment. */
345 devicep
->host2dev_func (devicep
->target_id
,
346 (void *) (tgt
->tgt_start
348 (void *) &cur_node
.tgt_offset
,
352 /* Add bias to the pointer value. */
353 cur_node
.host_start
+= sizes
[i
];
354 cur_node
.host_end
= cur_node
.host_start
+ 1;
355 n
= splay_tree_lookup (mem_map
, &cur_node
);
358 /* Could be possibly zero size array section. */
360 n
= splay_tree_lookup (mem_map
, &cur_node
);
363 cur_node
.host_start
--;
364 n
= splay_tree_lookup (mem_map
, &cur_node
);
365 cur_node
.host_start
++;
370 gomp_mutex_unlock (&devicep
->lock
);
371 gomp_fatal ("Pointer target of array section "
374 cur_node
.host_start
-= n
->host_start
;
375 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
376 + cur_node
.host_start
;
377 /* At this point tgt_offset is target address of the
378 array section. Now subtract bias to get what we want
379 to initialize the pointer with. */
380 cur_node
.tgt_offset
-= sizes
[i
];
381 /* FIXME: see above FIXME comment. */
382 devicep
->host2dev_func (devicep
->target_id
,
383 (void *) (tgt
->tgt_start
385 (void *) &cur_node
.tgt_offset
,
388 case GOMP_MAP_TO_PSET
:
389 /* FIXME: see above FIXME comment. */
390 devicep
->host2dev_func (devicep
->target_id
,
391 (void *) (tgt
->tgt_start
393 (void *) k
->host_start
,
394 k
->host_end
- k
->host_start
);
396 for (j
= i
+ 1; j
< mapnum
; j
++)
397 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
400 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
401 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
409 = (uintptr_t) *(void **) hostaddrs
[j
];
410 if (cur_node
.host_start
== (uintptr_t) NULL
)
412 cur_node
.tgt_offset
= (uintptr_t) NULL
;
413 /* FIXME: see above FIXME comment. */
414 devicep
->host2dev_func (devicep
->target_id
,
415 (void *) (tgt
->tgt_start
+ k
->tgt_offset
416 + ((uintptr_t) hostaddrs
[j
]
418 (void *) &cur_node
.tgt_offset
,
423 /* Add bias to the pointer value. */
424 cur_node
.host_start
+= sizes
[j
];
425 cur_node
.host_end
= cur_node
.host_start
+ 1;
426 n
= splay_tree_lookup (mem_map
, &cur_node
);
429 /* Could be possibly zero size array section. */
431 n
= splay_tree_lookup (mem_map
, &cur_node
);
434 cur_node
.host_start
--;
435 n
= splay_tree_lookup (mem_map
, &cur_node
);
436 cur_node
.host_start
++;
441 gomp_mutex_unlock (&devicep
->lock
);
442 gomp_fatal ("Pointer target of array section "
445 cur_node
.host_start
-= n
->host_start
;
446 cur_node
.tgt_offset
= n
->tgt
->tgt_start
448 + cur_node
.host_start
;
449 /* At this point tgt_offset is target address of the
450 array section. Now subtract bias to get what we
451 want to initialize the pointer with. */
452 cur_node
.tgt_offset
-= sizes
[j
];
453 /* FIXME: see above FIXME comment. */
454 devicep
->host2dev_func (devicep
->target_id
,
455 (void *) (tgt
->tgt_start
+ k
->tgt_offset
456 + ((uintptr_t) hostaddrs
[j
]
458 (void *) &cur_node
.tgt_offset
,
463 case GOMP_MAP_FORCE_PRESENT
:
465 /* We already looked up the memory region above and it
467 size_t size
= k
->host_end
- k
->host_start
;
468 gomp_mutex_unlock (&devicep
->lock
);
469 #ifdef HAVE_INTTYPES_H
470 gomp_fatal ("present clause: !acc_is_present (%p, "
471 "%"PRIu64
" (0x%"PRIx64
"))",
472 (void *) k
->host_start
,
473 (uint64_t) size
, (uint64_t) size
);
475 gomp_fatal ("present clause: !acc_is_present (%p, "
476 "%lu (0x%lx))", (void *) k
->host_start
,
477 (unsigned long) size
, (unsigned long) size
);
481 case GOMP_MAP_FORCE_DEVICEPTR
:
482 assert (k
->host_end
- k
->host_start
== sizeof (void *));
484 devicep
->host2dev_func (devicep
->target_id
,
485 (void *) (tgt
->tgt_start
487 (void *) k
->host_start
,
491 gomp_mutex_unlock (&devicep
->lock
);
492 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
502 for (i
= 0; i
< mapnum
; i
++)
504 if (tgt
->list
[i
] == NULL
)
505 cur_node
.tgt_offset
= (uintptr_t) NULL
;
507 cur_node
.tgt_offset
= tgt
->list
[i
]->tgt
->tgt_start
508 + tgt
->list
[i
]->tgt_offset
;
509 /* FIXME: see above FIXME comment. */
510 devicep
->host2dev_func (devicep
->target_id
,
511 (void *) (tgt
->tgt_start
512 + i
* sizeof (void *)),
513 (void *) &cur_node
.tgt_offset
,
518 gomp_mutex_unlock (&devicep
->lock
);
523 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
525 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
527 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
533 /* Decrease the refcount for a set of mapped variables, and queue asychronous
534 copies from the device back to the host after any work that has been issued.
535 Because the regions are still "live", increment an asynchronous reference
536 count to indicate that they should not be unmapped from host-side data
537 structures until the asynchronous copy has completed. */
539 attribute_hidden
void
540 gomp_copy_from_async (struct target_mem_desc
*tgt
)
542 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
545 gomp_mutex_lock (&devicep
->lock
);
547 for (i
= 0; i
< tgt
->list_count
; i
++)
548 if (tgt
->list
[i
] == NULL
)
550 else if (tgt
->list
[i
]->refcount
> 1)
552 tgt
->list
[i
]->refcount
--;
553 tgt
->list
[i
]->async_refcount
++;
557 splay_tree_key k
= tgt
->list
[i
];
559 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
560 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
561 k
->host_end
- k
->host_start
);
564 gomp_mutex_unlock (&devicep
->lock
);
567 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
568 variables back from device to host: if it is false, it is assumed that this
569 has been done already, i.e. by gomp_copy_from_async above. */
571 attribute_hidden
void
572 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
574 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
576 if (tgt
->list_count
== 0)
582 gomp_mutex_lock (&devicep
->lock
);
585 for (i
= 0; i
< tgt
->list_count
; i
++)
586 if (tgt
->list
[i
] == NULL
)
588 else if (tgt
->list
[i
]->refcount
> 1)
589 tgt
->list
[i
]->refcount
--;
590 else if (tgt
->list
[i
]->async_refcount
> 0)
591 tgt
->list
[i
]->async_refcount
--;
594 splay_tree_key k
= tgt
->list
[i
];
595 if (k
->copy_from
&& do_copyfrom
)
596 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
597 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
598 k
->host_end
- k
->host_start
);
599 splay_tree_remove (&devicep
->mem_map
, k
);
600 if (k
->tgt
->refcount
> 1)
603 gomp_unmap_tgt (k
->tgt
);
606 if (tgt
->refcount
> 1)
609 gomp_unmap_tgt (tgt
);
611 gomp_mutex_unlock (&devicep
->lock
);
615 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
616 size_t *sizes
, void *kinds
, bool is_openacc
)
619 struct splay_tree_key_s cur_node
;
620 const int typemask
= is_openacc
? 0xff : 0x7;
628 gomp_mutex_lock (&devicep
->lock
);
629 for (i
= 0; i
< mapnum
; i
++)
632 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
633 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
634 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
637 int kind
= get_kind (is_openacc
, kinds
, i
);
638 if (n
->host_start
> cur_node
.host_start
639 || n
->host_end
< cur_node
.host_end
)
641 gomp_mutex_unlock (&devicep
->lock
);
642 gomp_fatal ("Trying to update [%p..%p) object when "
643 "only [%p..%p) is mapped",
644 (void *) cur_node
.host_start
,
645 (void *) cur_node
.host_end
,
646 (void *) n
->host_start
,
647 (void *) n
->host_end
);
649 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
650 devicep
->host2dev_func (devicep
->target_id
,
651 (void *) (n
->tgt
->tgt_start
653 + cur_node
.host_start
655 (void *) cur_node
.host_start
,
656 cur_node
.host_end
- cur_node
.host_start
);
657 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
658 devicep
->dev2host_func (devicep
->target_id
,
659 (void *) cur_node
.host_start
,
660 (void *) (n
->tgt
->tgt_start
662 + cur_node
.host_start
664 cur_node
.host_end
- cur_node
.host_start
);
668 gomp_mutex_unlock (&devicep
->lock
);
669 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
670 (void *) cur_node
.host_start
,
671 (void *) cur_node
.host_end
);
674 gomp_mutex_unlock (&devicep
->lock
);
677 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
678 And insert to splay tree the mapping between addresses from HOST_TABLE and
679 from loaded target image. */
682 gomp_offload_image_to_device (struct gomp_device_descr
*devicep
,
683 void *host_table
, void *target_data
,
684 bool is_register_lock
)
686 void **host_func_table
= ((void ***) host_table
)[0];
687 void **host_funcs_end
= ((void ***) host_table
)[1];
688 void **host_var_table
= ((void ***) host_table
)[2];
689 void **host_vars_end
= ((void ***) host_table
)[3];
691 /* The func table contains only addresses, the var table contains addresses
692 and corresponding sizes. */
693 int num_funcs
= host_funcs_end
- host_func_table
;
694 int num_vars
= (host_vars_end
- host_var_table
) / 2;
696 /* Load image to device and get target addresses for the image. */
697 struct addr_pair
*target_table
= NULL
;
698 int i
, num_target_entries
699 = devicep
->load_image_func (devicep
->target_id
, target_data
, &target_table
);
701 if (num_target_entries
!= num_funcs
+ num_vars
)
703 gomp_mutex_unlock (&devicep
->lock
);
704 if (is_register_lock
)
705 gomp_mutex_unlock (®ister_lock
);
706 gomp_fatal ("Can't map target functions or variables");
709 /* Insert host-target address mapping into splay tree. */
710 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
711 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
718 tgt
->device_descr
= devicep
;
719 splay_tree_node array
= tgt
->array
;
721 for (i
= 0; i
< num_funcs
; i
++)
723 splay_tree_key k
= &array
->key
;
724 k
->host_start
= (uintptr_t) host_func_table
[i
];
725 k
->host_end
= k
->host_start
+ 1;
727 k
->tgt_offset
= target_table
[i
].start
;
729 k
->async_refcount
= 0;
730 k
->copy_from
= false;
733 splay_tree_insert (&devicep
->mem_map
, array
);
737 for (i
= 0; i
< num_vars
; i
++)
739 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
740 if (target_var
->end
- target_var
->start
741 != (uintptr_t) host_var_table
[i
* 2 + 1])
743 gomp_mutex_unlock (&devicep
->lock
);
744 if (is_register_lock
)
745 gomp_mutex_unlock (®ister_lock
);
746 gomp_fatal ("Can't map target variables (size mismatch)");
749 splay_tree_key k
= &array
->key
;
750 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
751 k
->host_end
= k
->host_start
+ (uintptr_t) host_var_table
[i
* 2 + 1];
753 k
->tgt_offset
= target_var
->start
;
755 k
->async_refcount
= 0;
756 k
->copy_from
= false;
759 splay_tree_insert (&devicep
->mem_map
, array
);
766 /* This function should be called from every offload image while loading.
767 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
768 the target, and TARGET_DATA needed by target plugin. */
771 GOMP_offload_register (void *host_table
, enum offload_target_type target_type
,
775 gomp_mutex_lock (®ister_lock
);
777 /* Load image to all initialized devices. */
778 for (i
= 0; i
< num_devices
; i
++)
780 struct gomp_device_descr
*devicep
= &devices
[i
];
781 gomp_mutex_lock (&devicep
->lock
);
782 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
783 gomp_offload_image_to_device (devicep
, host_table
, target_data
, true);
784 gomp_mutex_unlock (&devicep
->lock
);
787 /* Insert image to array of pending images. */
789 = gomp_realloc_unlock (offload_images
,
790 (num_offload_images
+ 1)
791 * sizeof (struct offload_image_descr
));
792 offload_images
[num_offload_images
].type
= target_type
;
793 offload_images
[num_offload_images
].host_table
= host_table
;
794 offload_images
[num_offload_images
].target_data
= target_data
;
796 num_offload_images
++;
797 gomp_mutex_unlock (®ister_lock
);
800 /* This function should be called from every offload image while unloading.
801 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
802 the target, and TARGET_DATA needed by target plugin. */
805 GOMP_offload_unregister (void *host_table
, enum offload_target_type target_type
,
808 void **host_func_table
= ((void ***) host_table
)[0];
809 void **host_funcs_end
= ((void ***) host_table
)[1];
810 void **host_var_table
= ((void ***) host_table
)[2];
811 void **host_vars_end
= ((void ***) host_table
)[3];
814 /* The func table contains only addresses, the var table contains addresses
815 and corresponding sizes. */
816 int num_funcs
= host_funcs_end
- host_func_table
;
817 int num_vars
= (host_vars_end
- host_var_table
) / 2;
819 gomp_mutex_lock (®ister_lock
);
821 /* Unload image from all initialized devices. */
822 for (i
= 0; i
< num_devices
; i
++)
825 struct gomp_device_descr
*devicep
= &devices
[i
];
826 gomp_mutex_lock (&devicep
->lock
);
827 if (devicep
->type
!= target_type
|| !devicep
->is_initialized
)
829 gomp_mutex_unlock (&devicep
->lock
);
833 devicep
->unload_image_func (devicep
->target_id
, target_data
);
835 /* Remove mapping from splay tree. */
836 struct splay_tree_key_s k
;
837 splay_tree_key node
= NULL
;
840 k
.host_start
= (uintptr_t) host_func_table
[0];
841 k
.host_end
= k
.host_start
+ 1;
842 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
844 else if (num_vars
> 0)
846 k
.host_start
= (uintptr_t) host_var_table
[0];
847 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[1];
848 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
851 for (j
= 0; j
< num_funcs
; j
++)
853 k
.host_start
= (uintptr_t) host_func_table
[j
];
854 k
.host_end
= k
.host_start
+ 1;
855 splay_tree_remove (&devicep
->mem_map
, &k
);
858 for (j
= 0; j
< num_vars
; j
++)
860 k
.host_start
= (uintptr_t) host_var_table
[j
* 2];
861 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[j
* 2 + 1];
862 splay_tree_remove (&devicep
->mem_map
, &k
);
871 gomp_mutex_unlock (&devicep
->lock
);
874 /* Remove image from array of pending images. */
875 for (i
= 0; i
< num_offload_images
; i
++)
876 if (offload_images
[i
].target_data
== target_data
)
878 offload_images
[i
] = offload_images
[--num_offload_images
];
882 gomp_mutex_unlock (®ister_lock
);
885 /* This function initializes the target device, specified by DEVICEP. DEVICEP
886 must be locked on entry, and remains locked on return. */
888 attribute_hidden
void
889 gomp_init_device (struct gomp_device_descr
*devicep
)
892 devicep
->init_device_func (devicep
->target_id
);
894 /* Load to device all images registered by the moment. */
895 for (i
= 0; i
< num_offload_images
; i
++)
897 struct offload_image_descr
*image
= &offload_images
[i
];
898 if (image
->type
== devicep
->type
)
899 gomp_offload_image_to_device (devicep
, image
->host_table
,
900 image
->target_data
, false);
903 devicep
->is_initialized
= true;
906 /* Free address mapping tables. MM must be locked on entry, and remains locked
909 attribute_hidden
void
910 gomp_free_memmap (struct splay_tree_s
*mem_map
)
912 while (mem_map
->root
)
914 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
916 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
922 /* This function de-initializes the target device, specified by DEVICEP.
923 DEVICEP must be locked on entry, and remains locked on return. */
925 attribute_hidden
void
926 gomp_fini_device (struct gomp_device_descr
*devicep
)
928 if (devicep
->is_initialized
)
929 devicep
->fini_device_func (devicep
->target_id
);
931 devicep
->is_initialized
= false;
934 /* Called when encountering a target directive. If DEVICE
935 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
936 GOMP_DEVICE_HOST_FALLBACK (or any value
937 larger than last available hw device), use host fallback.
938 FN is address of host code, UNUSED is part of the current ABI, but
939 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
940 with MAPNUM entries, with addresses of the host objects,
941 sizes of the host objects (resp. for pointer kind pointer bias
942 and assumed sizeof (void *) size) and kinds. */
945 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
946 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
947 unsigned char *kinds
)
949 struct gomp_device_descr
*devicep
= resolve_device (device
);
952 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
955 struct gomp_thread old_thr
, *thr
= gomp_thread ();
957 memset (thr
, '\0', sizeof (*thr
));
958 if (gomp_places_list
)
960 thr
->place
= old_thr
.place
;
961 thr
->ts
.place_partition_len
= gomp_places_list_len
;
964 gomp_free_thread (thr
);
969 gomp_mutex_lock (&devicep
->lock
);
970 if (!devicep
->is_initialized
)
971 gomp_init_device (devicep
);
972 gomp_mutex_unlock (&devicep
->lock
);
976 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
977 fn_addr
= (void *) fn
;
980 gomp_mutex_lock (&devicep
->lock
);
981 struct splay_tree_key_s k
;
982 k
.host_start
= (uintptr_t) fn
;
983 k
.host_end
= k
.host_start
+ 1;
984 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
987 gomp_mutex_unlock (&devicep
->lock
);
988 gomp_fatal ("Target function wasn't mapped");
990 gomp_mutex_unlock (&devicep
->lock
);
992 fn_addr
= (void *) tgt_fn
->tgt_offset
;
995 struct target_mem_desc
*tgt_vars
996 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
998 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1000 memset (thr
, '\0', sizeof (*thr
));
1001 if (gomp_places_list
)
1003 thr
->place
= old_thr
.place
;
1004 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1006 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1007 gomp_free_thread (thr
);
1009 gomp_unmap_vars (tgt_vars
, true);
1013 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1014 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1016 struct gomp_device_descr
*devicep
= resolve_device (device
);
1019 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1021 /* Host fallback. */
1022 struct gomp_task_icv
*icv
= gomp_icv (false);
1023 if (icv
->target_data
)
1025 /* Even when doing a host fallback, if there are any active
1026 #pragma omp target data constructs, need to remember the
1027 new #pragma omp target data, otherwise GOMP_target_end_data
1028 would get out of sync. */
1029 struct target_mem_desc
*tgt
1030 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false, false);
1031 tgt
->prev
= icv
->target_data
;
1032 icv
->target_data
= tgt
;
1037 gomp_mutex_lock (&devicep
->lock
);
1038 if (!devicep
->is_initialized
)
1039 gomp_init_device (devicep
);
1040 gomp_mutex_unlock (&devicep
->lock
);
1042 struct target_mem_desc
*tgt
1043 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1045 struct gomp_task_icv
*icv
= gomp_icv (true);
1046 tgt
->prev
= icv
->target_data
;
1047 icv
->target_data
= tgt
;
1051 GOMP_target_end_data (void)
1053 struct gomp_task_icv
*icv
= gomp_icv (false);
1054 if (icv
->target_data
)
1056 struct target_mem_desc
*tgt
= icv
->target_data
;
1057 icv
->target_data
= tgt
->prev
;
1058 gomp_unmap_vars (tgt
, true);
1063 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1064 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1066 struct gomp_device_descr
*devicep
= resolve_device (device
);
1069 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1072 gomp_mutex_lock (&devicep
->lock
);
1073 if (!devicep
->is_initialized
)
1074 gomp_init_device (devicep
);
1075 gomp_mutex_unlock (&devicep
->lock
);
1077 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1081 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1085 struct gomp_task_icv
*icv
= gomp_icv (true);
1086 icv
->thread_limit_var
1087 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1092 #ifdef PLUGIN_SUPPORT
1094 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1096 The handles of the found functions are stored in the corresponding fields
1097 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1100 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
1101 const char *plugin_name
)
1103 const char *err
= NULL
, *last_missing
= NULL
;
1104 int optional_present
, optional_total
;
1106 /* Clear any existing error. */
1109 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
1116 /* Check if all required functions are available in the plugin and store
1121 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1127 /* Similar, but missing functions are not an error. */
1128 #define DLSYM_OPT(f, n) \
1131 const char *tmp_err; \
1132 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1133 tmp_err = dlerror (); \
1134 if (tmp_err == NULL) \
1135 optional_present++; \
1137 last_missing = #n; \
1145 DLSYM (get_num_devices
);
1146 DLSYM (init_device
);
1147 DLSYM (fini_device
);
1149 DLSYM (unload_image
);
1154 device
->capabilities
= device
->get_caps_func ();
1155 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1157 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1159 optional_present
= optional_total
= 0;
1160 DLSYM_OPT (openacc
.exec
, openacc_parallel
);
1161 DLSYM_OPT (openacc
.register_async_cleanup
,
1162 openacc_register_async_cleanup
);
1163 DLSYM_OPT (openacc
.async_test
, openacc_async_test
);
1164 DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
);
1165 DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
);
1166 DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
);
1167 DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
);
1168 DLSYM_OPT (openacc
.async_wait_all_async
, openacc_async_wait_all_async
);
1169 DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
);
1170 DLSYM_OPT (openacc
.create_thread_data
, openacc_create_thread_data
);
1171 DLSYM_OPT (openacc
.destroy_thread_data
, openacc_destroy_thread_data
);
1172 /* Require all the OpenACC handlers if we have
1173 GOMP_OFFLOAD_CAP_OPENACC_200. */
1174 if (optional_present
!= optional_total
)
1176 err
= "plugin missing OpenACC handler function";
1179 optional_present
= optional_total
= 0;
1180 DLSYM_OPT (openacc
.cuda
.get_current_device
,
1181 openacc_get_current_cuda_device
);
1182 DLSYM_OPT (openacc
.cuda
.get_current_context
,
1183 openacc_get_current_cuda_context
);
1184 DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
1185 DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
1186 /* Make sure all the CUDA functions are there if any of them are. */
1187 if (optional_present
&& optional_present
!= optional_total
)
1189 err
= "plugin missing OpenACC CUDA handler function";
1199 gomp_error ("while loading %s: %s", plugin_name
, err
);
1201 gomp_error ("missing function was %s", last_missing
);
1203 dlclose (plugin_handle
);
1208 /* This function initializes the runtime needed for offloading.
1209 It parses the list of offload targets and tries to load the plugins for
1210 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1211 will be set, and the array DEVICES initialized, containing descriptors for
1212 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1216 gomp_target_init (void)
1218 const char *prefix
="libgomp-plugin-";
1219 const char *suffix
= SONAME_SUFFIX (1);
1220 const char *cur
, *next
;
1222 int i
, new_num_devices
;
1227 cur
= OFFLOAD_TARGETS
;
1231 struct gomp_device_descr current_device
;
1233 next
= strchr (cur
, ',');
1235 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
1236 + strlen (prefix
) + strlen (suffix
));
1243 strcpy (plugin_name
, prefix
);
1244 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
1245 strcat (plugin_name
, suffix
);
1247 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
1249 new_num_devices
= current_device
.get_num_devices_func ();
1250 if (new_num_devices
>= 1)
1252 /* Augment DEVICES and NUM_DEVICES. */
1254 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
1255 * sizeof (struct gomp_device_descr
));
1263 current_device
.name
= current_device
.get_name_func ();
1264 /* current_device.capabilities has already been set. */
1265 current_device
.type
= current_device
.get_type_func ();
1266 current_device
.mem_map
.root
= NULL
;
1267 current_device
.is_initialized
= false;
1268 current_device
.openacc
.data_environ
= NULL
;
1269 for (i
= 0; i
< new_num_devices
; i
++)
1271 current_device
.target_id
= i
;
1272 devices
[num_devices
] = current_device
;
1273 gomp_mutex_init (&devices
[num_devices
].lock
);
1284 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1285 NUM_DEVICES_OPENMP. */
1286 struct gomp_device_descr
*devices_s
1287 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
1294 num_devices_openmp
= 0;
1295 for (i
= 0; i
< num_devices
; i
++)
1296 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1297 devices_s
[num_devices_openmp
++] = devices
[i
];
1298 int num_devices_after_openmp
= num_devices_openmp
;
1299 for (i
= 0; i
< num_devices
; i
++)
1300 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1301 devices_s
[num_devices_after_openmp
++] = devices
[i
];
1303 devices
= devices_s
;
1305 for (i
= 0; i
< num_devices
; i
++)
1307 /* The 'devices' array can be moved (by the realloc call) until we have
1308 found all the plugins, so registering with the OpenACC runtime (which
1309 takes a copy of the pointer argument) must be delayed until now. */
1310 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1311 goacc_register (&devices
[i
]);
1315 #else /* PLUGIN_SUPPORT */
1316 /* If dlfcn.h is unavailable we always fallback to host execution.
1317 GOMP_target* routines are just stubs for this case. */
1319 gomp_target_init (void)
1322 #endif /* PLUGIN_SUPPORT */