1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "libgomp_target.h"
31 #include "oacc-plugin.h"
33 #include "gomp-constants.h"
45 static void gomp_target_init (void);
47 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
49 /* This structure describes an offload image.
50 It contains type of the target device, pointer to host table descriptor, and
51 pointer to target data. */
52 struct offload_image_descr
{
53 enum offload_target_type type
;
58 /* Array of descriptors of offload images. */
59 static struct offload_image_descr
*offload_images
;
61 /* Total number of offload images. */
62 static int num_offload_images
;
64 /* Array of descriptors for all available devices. */
65 static struct gomp_device_descr
*devices
;
67 /* Total number of available devices. */
68 static int num_devices
;
70 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
71 static int num_devices_openmp
;
73 /* The comparison function. */
76 splay_compare (splay_tree_key x
, splay_tree_key y
)
78 if (x
->host_start
== x
->host_end
79 && y
->host_start
== y
->host_end
)
81 if (x
->host_end
<= y
->host_start
)
83 if (x
->host_start
>= y
->host_end
)
88 #include "splay-tree.h"
91 gomp_init_targets_once (void)
93 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
97 gomp_get_num_devices (void)
99 gomp_init_targets_once ();
100 return num_devices_openmp
;
103 static struct gomp_device_descr
*
104 resolve_device (int device_id
)
106 if (device_id
== GOMP_DEVICE_ICV
)
108 struct gomp_task_icv
*icv
= gomp_icv (false);
109 device_id
= icv
->default_device_var
;
112 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
115 return &devices
[device_id
];
119 /* Handle the case where splay_tree_lookup found oldn for newn.
120 Helper function of gomp_map_vars. */
123 gomp_map_vars_existing (splay_tree_key oldn
, splay_tree_key newn
,
126 if ((kind
& GOMP_MAP_FLAG_FORCE
)
127 || oldn
->host_start
> newn
->host_start
128 || oldn
->host_end
< newn
->host_end
)
129 gomp_fatal ("Trying to map into device [%p..%p) object when "
130 "[%p..%p) is already mapped",
131 (void *) newn
->host_start
, (void *) newn
->host_end
,
132 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
137 get_kind (bool is_openacc
, void *kinds
, int idx
)
139 return is_openacc
? ((unsigned short *) kinds
)[idx
]
140 : ((unsigned char *) kinds
)[idx
];
143 attribute_hidden
struct target_mem_desc
*
144 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
145 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
146 bool is_openacc
, bool is_target
)
148 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
149 const int rshift
= is_openacc
? 8 : 3;
150 const int typemask
= is_openacc
? 0xff : 0x7;
151 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
152 struct splay_tree_key_s cur_node
;
153 struct target_mem_desc
*tgt
154 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
155 tgt
->list_count
= mapnum
;
157 tgt
->device_descr
= devicep
;
163 tgt_align
= sizeof (void *);
167 size_t align
= 4 * sizeof (void *);
169 tgt_size
= mapnum
* sizeof (void *);
171 gomp_mutex_lock (&mm
->lock
);
172 for (i
= 0; i
< mapnum
; i
++)
174 int kind
= get_kind (is_openacc
, kinds
, i
);
175 if (hostaddrs
[i
] == NULL
)
180 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
181 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
182 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
184 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
185 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
189 gomp_map_vars_existing (n
, &cur_node
, kind
& typemask
);
195 size_t align
= (size_t) 1 << (kind
>> rshift
);
197 if (tgt_align
< align
)
199 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
200 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
201 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
204 for (j
= i
+ 1; j
< mapnum
; j
++)
205 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
208 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
209 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
210 > cur_node
.host_end
))
224 gomp_fatal ("unexpected aggregation");
225 tgt
->to_free
= devaddrs
[0];
226 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
227 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
229 else if (not_found_cnt
|| is_target
)
231 /* Allocate tgt_align aligned tgt_size block of memory. */
232 /* FIXME: Perhaps change interface to allocate properly aligned
234 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
235 tgt_size
+ tgt_align
- 1);
236 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
237 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
238 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
249 tgt_size
= mapnum
* sizeof (void *);
254 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
255 splay_tree_node array
= tgt
->array
;
258 for (i
= 0; i
< mapnum
; i
++)
259 if (tgt
->list
[i
] == NULL
)
261 int kind
= get_kind (is_openacc
, kinds
, i
);
262 if (hostaddrs
[i
] == NULL
)
264 splay_tree_key k
= &array
->key
;
265 k
->host_start
= (uintptr_t) hostaddrs
[i
];
266 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
267 k
->host_end
= k
->host_start
+ sizes
[i
];
269 k
->host_end
= k
->host_start
+ sizeof (void *);
270 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
, k
);
274 gomp_map_vars_existing (n
, k
, kind
& typemask
);
278 size_t align
= (size_t) 1 << (kind
>> rshift
);
280 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
282 k
->tgt_offset
= tgt_size
;
283 tgt_size
+= k
->host_end
- k
->host_start
;
284 k
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
286 k
->async_refcount
= 0;
291 splay_tree_insert (&mm
->splay_tree
, array
);
293 switch (kind
& typemask
)
297 case GOMP_MAP_FORCE_ALLOC
:
298 case GOMP_MAP_FORCE_FROM
:
301 case GOMP_MAP_TOFROM
:
302 case GOMP_MAP_FORCE_TO
:
303 case GOMP_MAP_FORCE_TOFROM
:
304 /* Copy from host to device memory. */
305 /* FIXME: Perhaps add some smarts, like if copying
306 several adjacent fields from host to target, use some
307 host buffer to avoid sending each var individually. */
308 devicep
->host2dev_func (devicep
->target_id
,
309 (void *) (tgt
->tgt_start
311 (void *) k
->host_start
,
312 k
->host_end
- k
->host_start
);
314 case GOMP_MAP_POINTER
:
316 = (uintptr_t) *(void **) k
->host_start
;
317 if (cur_node
.host_start
== (uintptr_t) NULL
)
319 cur_node
.tgt_offset
= (uintptr_t) NULL
;
320 /* Copy from host to device memory. */
321 /* FIXME: see above FIXME comment. */
322 devicep
->host2dev_func (devicep
->target_id
,
323 (void *) (tgt
->tgt_start
325 (void *) &cur_node
.tgt_offset
,
329 /* Add bias to the pointer value. */
330 cur_node
.host_start
+= sizes
[i
];
331 cur_node
.host_end
= cur_node
.host_start
+ 1;
332 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
335 /* Could be possibly zero size array section. */
337 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
340 cur_node
.host_start
--;
341 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
342 cur_node
.host_start
++;
346 gomp_fatal ("Pointer target of array section "
349 cur_node
.host_start
-= n
->host_start
;
350 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
351 + cur_node
.host_start
;
352 /* At this point tgt_offset is target address of the
353 array section. Now subtract bias to get what we want
354 to initialize the pointer with. */
355 cur_node
.tgt_offset
-= sizes
[i
];
356 /* Copy from host to device memory. */
357 /* FIXME: see above FIXME comment. */
358 devicep
->host2dev_func (devicep
->target_id
,
359 (void *) (tgt
->tgt_start
361 (void *) &cur_node
.tgt_offset
,
364 case GOMP_MAP_TO_PSET
:
365 /* Copy from host to device memory. */
366 /* FIXME: see above FIXME comment. */
367 devicep
->host2dev_func (devicep
->target_id
,
368 (void *) (tgt
->tgt_start
370 (void *) k
->host_start
,
371 k
->host_end
- k
->host_start
);
373 for (j
= i
+ 1; j
< mapnum
; j
++)
374 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
377 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
378 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
386 = (uintptr_t) *(void **) hostaddrs
[j
];
387 if (cur_node
.host_start
== (uintptr_t) NULL
)
389 cur_node
.tgt_offset
= (uintptr_t) NULL
;
390 /* Copy from host to device memory. */
391 /* FIXME: see above FIXME comment. */
392 devicep
->host2dev_func (devicep
->target_id
,
393 (void *) (tgt
->tgt_start
+ k
->tgt_offset
394 + ((uintptr_t) hostaddrs
[j
]
396 (void *) &cur_node
.tgt_offset
,
401 /* Add bias to the pointer value. */
402 cur_node
.host_start
+= sizes
[j
];
403 cur_node
.host_end
= cur_node
.host_start
+ 1;
404 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
407 /* Could be possibly zero size array section. */
409 n
= splay_tree_lookup (&mm
->splay_tree
,
413 cur_node
.host_start
--;
414 n
= splay_tree_lookup (&mm
->splay_tree
,
416 cur_node
.host_start
++;
420 gomp_fatal ("Pointer target of array section "
422 cur_node
.host_start
-= n
->host_start
;
423 cur_node
.tgt_offset
= n
->tgt
->tgt_start
425 + cur_node
.host_start
;
426 /* At this point tgt_offset is target address of the
427 array section. Now subtract bias to get what we
428 want to initialize the pointer with. */
429 cur_node
.tgt_offset
-= sizes
[j
];
430 /* Copy from host to device memory. */
431 /* FIXME: see above FIXME comment. */
432 devicep
->host2dev_func (devicep
->target_id
,
433 (void *) (tgt
->tgt_start
+ k
->tgt_offset
434 + ((uintptr_t) hostaddrs
[j
]
436 (void *) &cur_node
.tgt_offset
,
441 case GOMP_MAP_FORCE_PRESENT
:
443 /* We already looked up the memory region above and it
445 size_t size
= k
->host_end
- k
->host_start
;
446 gomp_fatal ("present clause: !acc_is_present (%p, "
447 "%zd (0x%zx))", (void *) k
->host_start
,
451 case GOMP_MAP_FORCE_DEVICEPTR
:
452 assert (k
->host_end
- k
->host_start
== sizeof (void *));
454 devicep
->host2dev_func (devicep
->target_id
,
455 (void *) (tgt
->tgt_start
457 (void *) k
->host_start
,
461 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
471 for (i
= 0; i
< mapnum
; i
++)
473 if (tgt
->list
[i
] == NULL
)
474 cur_node
.tgt_offset
= (uintptr_t) NULL
;
476 cur_node
.tgt_offset
= tgt
->list
[i
]->tgt
->tgt_start
477 + tgt
->list
[i
]->tgt_offset
;
478 /* Copy from host to device memory. */
479 /* FIXME: see above FIXME comment. */
480 devicep
->host2dev_func (devicep
->target_id
,
481 (void *) (tgt
->tgt_start
482 + i
* sizeof (void *)),
483 (void *) &cur_node
.tgt_offset
,
488 gomp_mutex_unlock (&mm
->lock
);
493 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
495 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
497 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
503 /* Decrease the refcount for a set of mapped variables, and queue asychronous
504 copies from the device back to the host after any work that has been issued.
505 Because the regions are still "live", increment an asynchronous reference
506 count to indicate that they should not be unmapped from host-side data
507 structures until the asynchronous copy has completed. */
509 attribute_hidden
void
510 gomp_copy_from_async (struct target_mem_desc
*tgt
)
512 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
513 struct gomp_memory_mapping
*mm
= tgt
->mem_map
;
516 gomp_mutex_lock (&mm
->lock
);
518 for (i
= 0; i
< tgt
->list_count
; i
++)
519 if (tgt
->list
[i
] == NULL
)
521 else if (tgt
->list
[i
]->refcount
> 1)
523 tgt
->list
[i
]->refcount
--;
524 tgt
->list
[i
]->async_refcount
++;
528 splay_tree_key k
= tgt
->list
[i
];
530 /* Copy from device to host memory. */
531 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
532 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
533 k
->host_end
- k
->host_start
);
536 gomp_mutex_unlock (&mm
->lock
);
539 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
540 variables back from device to host: if it is false, it is assumed that this
541 has been done already, i.e. by gomp_copy_from_async above. */
543 attribute_hidden
void
544 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
546 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
547 struct gomp_memory_mapping
*mm
= tgt
->mem_map
;
549 if (tgt
->list_count
== 0)
556 gomp_mutex_lock (&mm
->lock
);
557 for (i
= 0; i
< tgt
->list_count
; i
++)
558 if (tgt
->list
[i
] == NULL
)
560 else if (tgt
->list
[i
]->refcount
> 1)
561 tgt
->list
[i
]->refcount
--;
562 else if (tgt
->list
[i
]->async_refcount
> 0)
563 tgt
->list
[i
]->async_refcount
--;
566 splay_tree_key k
= tgt
->list
[i
];
567 if (k
->copy_from
&& do_copyfrom
)
568 /* Copy from device to host memory. */
569 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
570 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
571 k
->host_end
- k
->host_start
);
572 splay_tree_remove (&mm
->splay_tree
, k
);
573 if (k
->tgt
->refcount
> 1)
576 gomp_unmap_tgt (k
->tgt
);
579 if (tgt
->refcount
> 1)
582 gomp_unmap_tgt (tgt
);
583 gomp_mutex_unlock (&mm
->lock
);
587 gomp_update (struct gomp_device_descr
*devicep
, struct gomp_memory_mapping
*mm
,
588 size_t mapnum
, void **hostaddrs
, size_t *sizes
, void *kinds
,
592 struct splay_tree_key_s cur_node
;
593 const int typemask
= is_openacc
? 0xff : 0x7;
601 gomp_mutex_lock (&mm
->lock
);
602 for (i
= 0; i
< mapnum
; i
++)
605 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
606 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
607 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
,
611 int kind
= get_kind (is_openacc
, kinds
, i
);
612 if (n
->host_start
> cur_node
.host_start
613 || n
->host_end
< cur_node
.host_end
)
614 gomp_fatal ("Trying to update [%p..%p) object when"
615 "only [%p..%p) is mapped",
616 (void *) cur_node
.host_start
,
617 (void *) cur_node
.host_end
,
618 (void *) n
->host_start
,
619 (void *) n
->host_end
);
620 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
621 /* Copy from host to device memory. */
622 devicep
->host2dev_func (devicep
->target_id
,
623 (void *) (n
->tgt
->tgt_start
625 + cur_node
.host_start
627 (void *) cur_node
.host_start
,
628 cur_node
.host_end
- cur_node
.host_start
);
629 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
630 /* Copy from device to host memory. */
631 devicep
->dev2host_func (devicep
->target_id
,
632 (void *) cur_node
.host_start
,
633 (void *) (n
->tgt
->tgt_start
635 + cur_node
.host_start
637 cur_node
.host_end
- cur_node
.host_start
);
640 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
641 (void *) cur_node
.host_start
,
642 (void *) cur_node
.host_end
);
644 gomp_mutex_unlock (&mm
->lock
);
647 static void gomp_register_image_for_device (struct gomp_device_descr
*device
,
648 struct offload_image_descr
*image
);
650 /* This function should be called from every offload image.
651 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
652 the target, and TARGET_DATA needed by target plugin. */
655 GOMP_offload_register (void *host_table
, enum offload_target_type target_type
,
658 offload_images
= gomp_realloc (offload_images
,
659 (num_offload_images
+ 1)
660 * sizeof (struct offload_image_descr
));
662 if (offload_images
== NULL
)
665 offload_images
[num_offload_images
].type
= target_type
;
666 offload_images
[num_offload_images
].host_table
= host_table
;
667 offload_images
[num_offload_images
].target_data
= target_data
;
669 num_offload_images
++;
672 /* This function initializes the target device, specified by DEVICEP. */
674 attribute_hidden
void
675 gomp_init_device (struct gomp_device_descr
*devicep
)
677 /* Initialize the target device. */
678 devicep
->init_device_func (devicep
->target_id
);
680 devicep
->is_initialized
= true;
683 attribute_hidden
void
684 gomp_init_tables (struct gomp_device_descr
*devicep
,
685 struct gomp_memory_mapping
*mm
)
687 /* Get address mapping table for device. */
688 struct mapping_table
*table
= NULL
;
689 int i
, num_entries
= devicep
->get_table_func (devicep
->target_id
, &table
);
691 /* Insert host-target address mapping into dev_splay_tree. */
692 for (i
= 0; i
< num_entries
; i
++)
694 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
696 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
697 tgt
->tgt_start
= table
[i
].tgt_start
;
698 tgt
->tgt_end
= table
[i
].tgt_end
;
701 tgt
->device_descr
= devicep
;
702 splay_tree_node node
= tgt
->array
;
703 splay_tree_key k
= &node
->key
;
704 k
->host_start
= table
[i
].host_start
;
705 k
->host_end
= table
[i
].host_end
;
708 k
->copy_from
= false;
712 splay_tree_insert (&mm
->splay_tree
, node
);
716 mm
->is_initialized
= true;
720 gomp_init_dev_tables (struct gomp_device_descr
*devicep
)
722 gomp_init_device (devicep
);
723 gomp_init_tables (devicep
, &devicep
->mem_map
);
727 attribute_hidden
void
728 gomp_free_memmap (struct gomp_memory_mapping
*mm
)
730 while (mm
->splay_tree
.root
)
732 struct target_mem_desc
*tgt
= mm
->splay_tree
.root
->key
.tgt
;
734 splay_tree_remove (&mm
->splay_tree
, &mm
->splay_tree
.root
->key
);
739 mm
->is_initialized
= false;
742 attribute_hidden
void
743 gomp_fini_device (struct gomp_device_descr
*devicep
)
745 if (devicep
->is_initialized
)
746 devicep
->fini_device_func (devicep
->target_id
);
748 devicep
->is_initialized
= false;
751 /* Called when encountering a target directive. If DEVICE
752 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
753 GOMP_DEVICE_HOST_FALLBACK (or any value
754 larger than last available hw device), use host fallback.
755 FN is address of host code, OFFLOAD_TABLE contains value of the
756 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
757 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
758 with MAPNUM entries, with addresses of the host objects,
759 sizes of the host objects (resp. for pointer kind pointer bias
760 and assumed sizeof (void *) size) and kinds. */
763 GOMP_target (int device
, void (*fn
) (void *), const void *offload_table
,
764 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
765 unsigned char *kinds
)
767 struct gomp_device_descr
*devicep
= resolve_device (device
);
769 if (devicep
!= NULL
&& !devicep
->is_initialized
)
770 gomp_init_dev_tables (devicep
);
773 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
776 struct gomp_thread old_thr
, *thr
= gomp_thread ();
778 memset (thr
, '\0', sizeof (*thr
));
779 if (gomp_places_list
)
781 thr
->place
= old_thr
.place
;
782 thr
->ts
.place_partition_len
= gomp_places_list_len
;
785 gomp_free_thread (thr
);
792 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
793 fn_addr
= (void *) fn
;
796 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
797 gomp_mutex_lock (&mm
->lock
);
798 if (!devicep
->is_initialized
)
799 gomp_init_dev_tables (devicep
);
800 struct splay_tree_key_s k
;
801 k
.host_start
= (uintptr_t) fn
;
802 k
.host_end
= k
.host_start
+ 1;
803 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
.splay_tree
,
806 gomp_fatal ("Target function wasn't mapped");
807 gomp_mutex_unlock (&mm
->lock
);
809 fn_addr
= (void *) tgt_fn
->tgt
->tgt_start
;
812 struct target_mem_desc
*tgt_vars
813 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
815 struct gomp_thread old_thr
, *thr
= gomp_thread ();
817 memset (thr
, '\0', sizeof (*thr
));
818 if (gomp_places_list
)
820 thr
->place
= old_thr
.place
;
821 thr
->ts
.place_partition_len
= gomp_places_list_len
;
823 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
824 gomp_free_thread (thr
);
826 gomp_unmap_vars (tgt_vars
, true);
830 GOMP_target_data (int device
, const void *offload_table
, size_t mapnum
,
831 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
833 struct gomp_device_descr
*devicep
= resolve_device (device
);
835 if (devicep
!= NULL
&& !devicep
->is_initialized
)
836 gomp_init_dev_tables (devicep
);
839 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
842 struct gomp_task_icv
*icv
= gomp_icv (false);
843 if (icv
->target_data
)
845 /* Even when doing a host fallback, if there are any active
846 #pragma omp target data constructs, need to remember the
847 new #pragma omp target data, otherwise GOMP_target_end_data
848 would get out of sync. */
849 struct target_mem_desc
*tgt
850 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false, false);
851 tgt
->prev
= icv
->target_data
;
852 icv
->target_data
= tgt
;
857 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
858 gomp_mutex_lock (&mm
->lock
);
859 if (!devicep
->is_initialized
)
860 gomp_init_dev_tables (devicep
);
861 gomp_mutex_unlock (&mm
->lock
);
863 struct target_mem_desc
*tgt
864 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
866 struct gomp_task_icv
*icv
= gomp_icv (true);
867 tgt
->prev
= icv
->target_data
;
868 icv
->target_data
= tgt
;
872 GOMP_target_end_data (void)
874 struct gomp_task_icv
*icv
= gomp_icv (false);
875 if (icv
->target_data
)
877 struct target_mem_desc
*tgt
= icv
->target_data
;
878 icv
->target_data
= tgt
->prev
;
879 gomp_unmap_vars (tgt
, true);
884 GOMP_target_update (int device
, const void *offload_table
, size_t mapnum
,
885 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
887 struct gomp_device_descr
*devicep
= resolve_device (device
);
892 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
893 gomp_mutex_lock (&mm
->lock
);
894 if (!devicep
->is_initialized
)
895 gomp_init_dev_tables (devicep
);
896 gomp_mutex_unlock (&mm
->lock
);
898 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
901 gomp_update (devicep
, &devicep
->mem_map
, mapnum
, hostaddrs
, sizes
, kinds
,
906 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
910 struct gomp_task_icv
*icv
= gomp_icv (true);
911 icv
->thread_limit_var
912 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
917 #ifdef PLUGIN_SUPPORT
919 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
921 The handles of the found functions are stored in the corresponding fields
922 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
925 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
926 const char *plugin_name
)
928 char *err
= NULL
, *last_missing
= NULL
;
929 int optional_present
, optional_total
;
931 /* Clear any existing error. */
934 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
941 /* Check if all required functions are available in the plugin and store
946 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
952 /* Similar, but missing functions are not an error. */
953 #define DLSYM_OPT(f, n) \
957 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
958 tmp_err = dlerror (); \
959 if (tmp_err == NULL) \
960 optional_present++; \
970 DLSYM (get_num_devices
);
971 DLSYM (register_image
);
979 device
->capabilities
= device
->get_caps_func ();
980 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
982 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
984 optional_present
= optional_total
= 0;
985 DLSYM_OPT (openacc
.exec
, openacc_parallel
);
986 DLSYM_OPT (openacc
.open_device
, openacc_open_device
);
987 DLSYM_OPT (openacc
.close_device
, openacc_close_device
);
988 DLSYM_OPT (openacc
.get_device_num
, openacc_get_device_num
);
989 DLSYM_OPT (openacc
.set_device_num
, openacc_set_device_num
);
990 DLSYM_OPT (openacc
.register_async_cleanup
,
991 openacc_register_async_cleanup
);
992 DLSYM_OPT (openacc
.async_test
, openacc_async_test
);
993 DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
);
994 DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
);
995 DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
);
996 DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
);
997 DLSYM_OPT (openacc
.async_wait_all_async
, openacc_async_wait_all_async
);
998 DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
);
999 DLSYM_OPT (openacc
.create_thread_data
, openacc_create_thread_data
);
1000 DLSYM_OPT (openacc
.destroy_thread_data
, openacc_destroy_thread_data
);
1001 /* Require all the OpenACC handlers if we have
1002 GOMP_OFFLOAD_CAP_OPENACC_200. */
1003 if (optional_present
!= optional_total
)
1005 err
= "plugin missing OpenACC handler function";
1008 optional_present
= optional_total
= 0;
1009 DLSYM_OPT (openacc
.cuda
.get_current_device
,
1010 openacc_get_current_cuda_device
);
1011 DLSYM_OPT (openacc
.cuda
.get_current_context
,
1012 openacc_get_current_cuda_context
);
1013 DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
1014 DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
1015 /* Make sure all the CUDA functions are there if any of them are. */
1016 if (optional_present
&& optional_present
!= optional_total
)
1018 err
= "plugin missing OpenACC CUDA handler function";
1028 gomp_error ("while loading %s: %s", plugin_name
, err
);
1030 gomp_error ("missing function was %s", last_missing
);
1032 dlclose (plugin_handle
);
1037 /* This function adds a compatible offload image IMAGE to an accelerator device
1041 gomp_register_image_for_device (struct gomp_device_descr
*device
,
1042 struct offload_image_descr
*image
)
1044 if (!device
->offload_regions_registered
1045 && (device
->type
== image
->type
1046 || device
->type
== OFFLOAD_TARGET_TYPE_HOST
))
1048 device
->register_image_func (image
->host_table
, image
->target_data
);
1049 device
->offload_regions_registered
= true;
1053 /* This function initializes the runtime needed for offloading.
1054 It parses the list of offload targets and tries to load the plugins for
1055 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1056 will be set, and the array DEVICES initialized, containing descriptors for
1057 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1061 gomp_target_init (void)
1063 const char *prefix
="libgomp-plugin-";
1064 const char *suffix
= ".so.1";
1065 const char *cur
, *next
;
1067 int i
, new_num_devices
;
1072 cur
= OFFLOAD_TARGETS
;
1076 struct gomp_device_descr current_device
;
1078 next
= strchr (cur
, ',');
1080 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
1081 + strlen (prefix
) + strlen (suffix
));
1088 strcpy (plugin_name
, prefix
);
1089 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
1090 strcat (plugin_name
, suffix
);
1092 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
1094 new_num_devices
= current_device
.get_num_devices_func ();
1095 if (new_num_devices
>= 1)
1097 /* Augment DEVICES and NUM_DEVICES. */
1099 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
1100 * sizeof (struct gomp_device_descr
));
1108 current_device
.type
= current_device
.get_type_func ();
1109 current_device
.name
= current_device
.get_name_func ();
1110 current_device
.is_initialized
= false;
1111 current_device
.offload_regions_registered
= false;
1112 current_device
.mem_map
.splay_tree
.root
= NULL
;
1113 current_device
.mem_map
.is_initialized
= false;
1114 current_device
.openacc
.data_environ
= NULL
;
1115 current_device
.openacc
.target_data
= NULL
;
1116 for (i
= 0; i
< new_num_devices
; i
++)
1118 current_device
.target_id
= i
;
1119 devices
[num_devices
] = current_device
;
1120 gomp_mutex_init (&devices
[num_devices
].mem_map
.lock
);
1131 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1132 NUM_DEVICES_OPENMP. */
1133 struct gomp_device_descr
*devices_s
1134 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
1141 num_devices_openmp
= 0;
1142 for (i
= 0; i
< num_devices
; i
++)
1143 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1144 devices_s
[num_devices_openmp
++] = devices
[i
];
1145 int num_devices_after_openmp
= num_devices_openmp
;
1146 for (i
= 0; i
< num_devices
; i
++)
1147 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1148 devices_s
[num_devices_after_openmp
++] = devices
[i
];
1150 devices
= devices_s
;
1152 for (i
= 0; i
< num_devices
; i
++)
1156 for (j
= 0; j
< num_offload_images
; j
++)
1157 gomp_register_image_for_device (&devices
[i
], &offload_images
[j
]);
1159 /* The 'devices' array can be moved (by the realloc call) until we have
1160 found all the plugins, so registering with the OpenACC runtime (which
1161 takes a copy of the pointer argument) must be delayed until now. */
1162 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
1163 goacc_register (&devices
[i
]);
1166 free (offload_images
);
1167 offload_images
= NULL
;
1168 num_offload_images
= 0;
1171 #else /* PLUGIN_SUPPORT */
1172 /* If dlfcn.h is unavailable we always fallback to host execution.
1173 GOMP_target* routines are just stubs for this case. */
1175 gomp_target_init (void)
1178 #endif /* PLUGIN_SUPPORT */