1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file handles the maintainence of threads in response to team
26 creation and termination. */
29 #include "oacc-plugin.h"
30 #include "gomp-constants.h"
40 static void gomp_target_init (void);
42 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
44 #include "splay-tree.h"
46 /* This structure describes an offload image.
47 It contains type of the target, pointer to host table descriptor, and pointer
49 struct offload_image_descr
{
55 /* Array of descriptors of offload images. */
56 static struct offload_image_descr
*offload_images
;
58 /* Total number of offload images. */
59 static int num_offload_images
;
61 /* Array of descriptors of all available devices. */
62 static struct gomp_device_descr
*devices
;
64 /* Total number of available devices. */
65 static int num_devices
;
67 /* The comparison function. */
70 splay_compare (splay_tree_key x
, splay_tree_key y
)
72 if (x
->host_start
== x
->host_end
73 && y
->host_start
== y
->host_end
)
75 if (x
->host_end
<= y
->host_start
)
77 if (x
->host_start
>= y
->host_end
)
85 gomp_init_targets_once (void)
87 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
91 gomp_get_num_devices (void)
93 gomp_init_targets_once ();
97 static struct gomp_device_descr
*
98 resolve_device (int device_id
)
102 struct gomp_task_icv
*icv
= gomp_icv (false);
103 device_id
= icv
->default_device_var
;
106 || device_id
>= gomp_get_num_devices ())
109 return &devices
[device_id
];
112 __attribute__((used
)) static void
113 dump_mappings (FILE *f
, splay_tree_node node
)
117 splay_tree_key k
= &node
->key
;
122 fprintf (f
, "key %p: host_start %p, host_end %p, tgt_offset %p, refcount %d, "
123 "copy_from %s\n", k
, (void *) k
->host_start
,
124 (void *) k
->host_end
, (void *) k
->tgt_offset
, (int) k
->refcount
,
125 k
->copy_from
? "true" : "false");
126 fprintf (f
, "tgt->refcount %d, tgt->tgt_start %p, tgt->tgt_end %p, "
127 "tgt->to_free %p, tgt->prev %p, tgt->list_count %d, "
128 "tgt->device_descr %p\n", (int) k
->tgt
->refcount
,
129 (void *) k
->tgt
->tgt_start
, (void *) k
->tgt
->tgt_end
,
130 k
->tgt
->to_free
, k
->tgt
->prev
, (int) k
->tgt
->list_count
,
131 k
->tgt
->device_descr
);
133 for (i
= 0; i
< k
->tgt
->list_count
; i
++)
134 fprintf (f
, "item %d: %p\n", i
, k
->tgt
->list
[i
]);
136 dump_mappings (f
, node
->left
);
137 dump_mappings (f
, node
->right
);
140 /* Handle the case where splay_tree_lookup found oldn for newn.
141 Helper function of gomp_map_vars. */
144 gomp_map_vars_existing (splay_tree_key oldn
, splay_tree_key newn
,
147 if (oldn
->host_start
> newn
->host_start
148 || oldn
->host_end
< newn
->host_end
)
149 gomp_fatal ("Trying to map into device [%p..%p) object when"
150 "[%p..%p) is already mapped",
151 (void *) newn
->host_start
, (void *) newn
->host_end
,
152 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
157 get_kind (bool is_openacc
, void *kinds
, int idx
)
159 return is_openacc
? ((unsigned short *) kinds
)[idx
]
160 : ((unsigned char *) kinds
)[idx
];
163 attribute_hidden
struct target_mem_desc
*
164 gomp_map_vars (struct gomp_device_descr
*devicep
,
165 struct gomp_memory_mapping
*mm
, size_t mapnum
,
166 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
167 void *kinds
, bool is_openacc
, bool is_target
)
169 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
170 const int rshift
= is_openacc
? 8 : 3;
171 const int typemask
= is_openacc
? 0xff : 0x7;
172 struct splay_tree_key_s cur_node
;
173 struct target_mem_desc
*tgt
174 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
175 tgt
->list_count
= mapnum
;
177 tgt
->device_descr
= devicep
;
180 /* From gcc/fortran/trans-types.c */
181 struct descriptor_dimension
188 struct gfc_array_descriptor
193 struct descriptor_dimension dimension
[];
196 #define GFC_DTYPE_RANK_MASK 0x07
197 #define GFC_DTYPE_TYPE_MASK 0x38
198 #define GFC_DTYPE_TYPE_SHIFT 3
199 #define GFC_DTYPE_SIZE_SHIFT 6
204 tgt_align
= sizeof (void *);
208 size_t align
= 4 * sizeof (void *);
210 tgt_size
= mapnum
* sizeof (void *);
212 gomp_mutex_lock (&mm
->lock
);
213 for (i
= 0; i
< mapnum
; i
++)
215 int kind
= get_kind (is_openacc
, kinds
, i
);
216 if (hostaddrs
[i
] == NULL
)
221 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
222 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
223 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
225 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
226 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
230 gomp_map_vars_existing (n
, &cur_node
, kind
);
236 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
238 struct gfc_array_descriptor
*gad
;
241 bool alloc_arrays
= true;
243 for (j
= i
- 1; j
>= 0; j
--)
245 if (hostaddrs
[j
] == *(void**)hostaddrs
[i
])
247 alloc_arrays
= false;
252 gad
= (struct gfc_array_descriptor
*) cur_node
.host_start
;
253 rank
= gad
->dtype
& GFC_DTYPE_RANK_MASK
;
255 cur_node
.host_start
= (uintptr_t)gad
->data
;
256 cur_node
.host_end
= cur_node
.host_start
+
257 sizeof (struct gfc_array_descriptor
) +
258 (sizeof (struct descriptor_dimension
) * rank
);
264 tsize
= gad
->dtype
>> GFC_DTYPE_SIZE_SHIFT
;
266 for (j
= 0; j
< rank
; j
++)
268 cur_node
.host_end
+= tsize
*
269 (gad
->dimension
[j
].ubound
-
270 gad
->dimension
[j
].lbound
+ 1);
275 size_t align
= (size_t) 1 << (kind
>> rshift
);
277 if (tgt_align
< align
)
279 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
280 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
281 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
284 for (j
= i
+ 1; j
< mapnum
; j
++)
285 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
288 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
289 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
290 > cur_node
.host_end
))
304 gomp_fatal ("unexpected aggregation");
305 tgt
->to_free
= devaddrs
[0];
306 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
307 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
309 else if (not_found_cnt
|| is_target
)
311 /* Allocate tgt_align aligned tgt_size block of memory. */
312 /* FIXME: Perhaps change interface to allocate properly aligned
314 tgt
->to_free
= devicep
->device_alloc_func (tgt_size
+ tgt_align
- 1);
315 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
316 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
317 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
328 tgt_size
= mapnum
* sizeof (void *);
333 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
334 splay_tree_node array
= tgt
->array
;
337 for (i
= 0; i
< mapnum
; i
++)
338 if (tgt
->list
[i
] == NULL
)
340 int kind
= get_kind (is_openacc
, kinds
, i
);
341 if (hostaddrs
[i
] == NULL
)
343 splay_tree_key k
= &array
->key
;
344 k
->host_start
= (uintptr_t) hostaddrs
[i
];
345 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
346 k
->host_end
= k
->host_start
+ sizes
[i
];
348 k
->host_end
= k
->host_start
+ sizeof (void *);
349 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
, k
);
353 gomp_map_vars_existing (n
, k
, kind
);
357 size_t align
= (size_t) 1 << (kind
>> rshift
);
359 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
361 k
->tgt_offset
= tgt_size
;
362 tgt_size
+= k
->host_end
- k
->host_start
;
363 k
->copy_from
= GOMP_MAP_COPYFROM_P (kind
& typemask
)
364 || GOMP_MAP_TOFROM_P (kind
& typemask
);
366 k
->async_refcount
= 0;
371 splay_tree_insert (&mm
->splay_tree
, array
);
373 switch (kind
& typemask
)
375 case GOMP_MAP_FORCE_ALLOC
:
376 case GOMP_MAP_FORCE_FROM
:
377 /* FIXME: No special handling (see comment in
380 case GOMP_MAP_ALLOC_FROM
:
382 case GOMP_MAP_FORCE_TO
:
383 case GOMP_MAP_FORCE_TOFROM
:
384 /* FIXME: No special handling, as above. */
385 case GOMP_MAP_ALLOC_TO
:
386 case GOMP_MAP_ALLOC_TOFROM
:
387 /* Copy from host to device memory. */
388 /* FIXME: Perhaps add some smarts, like if copying
389 several adjacent fields from host to target, use some
390 host buffer to avoid sending each var individually. */
391 devicep
->device_host2dev_func
392 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
393 (void *) k
->host_start
,
394 k
->host_end
- k
->host_start
);
396 case GOMP_MAP_POINTER
:
398 = (uintptr_t) *(void **) k
->host_start
;
399 if (cur_node
.host_start
== (uintptr_t) NULL
)
401 cur_node
.tgt_offset
= (uintptr_t) NULL
;
402 /* Copy from host to device memory. */
403 /* FIXME: see above FIXME comment. */
404 devicep
->device_host2dev_func
405 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
406 (void *) &cur_node
.tgt_offset
,
410 /* Add bias to the pointer value. */
411 cur_node
.host_start
+= sizes
[i
];
412 cur_node
.host_end
= cur_node
.host_start
+ 1;
413 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
416 /* Could be possibly zero size array section. */
418 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
421 cur_node
.host_start
--;
422 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
423 cur_node
.host_start
++;
427 gomp_fatal ("Pointer target of array section "
430 cur_node
.host_start
-= n
->host_start
;
431 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
432 + cur_node
.host_start
;
433 /* At this point tgt_offset is target address of the
434 array section. Now subtract bias to get what we want
435 to initialize the pointer with. */
436 cur_node
.tgt_offset
-= sizes
[i
];
437 /* Copy from host to device memory. */
438 /* FIXME: see above FIXME comment. */
439 devicep
->device_host2dev_func
440 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
441 (void *) &cur_node
.tgt_offset
,
444 case GOMP_MAP_TO_PSET
:
446 /* Copy from host to device memory. */
447 /* FIXME: see above FIXME comment. */
448 devicep
->device_host2dev_func
449 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
450 (void *) k
->host_start
,
451 (k
->host_end
- k
->host_start
));
452 devicep
->device_host2dev_func
453 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
454 (void *) &tgt
->tgt_start
,
457 for (j
= i
+ 1; j
< mapnum
; j
++)
458 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc
, kinds
, j
)
461 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
462 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
470 = (uintptr_t) *(void **) hostaddrs
[j
];
471 if (cur_node
.host_start
== (uintptr_t) NULL
)
473 cur_node
.tgt_offset
= (uintptr_t) NULL
;
474 /* Copy from host to device memory. */
475 /* FIXME: see above FIXME comment. */
476 devicep
->device_host2dev_func
477 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
478 + ((uintptr_t) hostaddrs
[j
]
480 (void *) &cur_node
.tgt_offset
,
485 /* Add bias to the pointer value. */
486 cur_node
.host_start
+= sizes
[j
];
487 cur_node
.host_end
= cur_node
.host_start
+ 1;
488 n
= splay_tree_lookup (&mm
->splay_tree
, &cur_node
);
491 /* Could be possibly zero size array
494 n
= splay_tree_lookup (&mm
->splay_tree
,
498 cur_node
.host_start
--;
499 n
= splay_tree_lookup (&mm
->splay_tree
,
501 cur_node
.host_start
++;
505 gomp_fatal ("Pointer target of array section "
507 cur_node
.host_start
-= n
->host_start
;
508 cur_node
.tgt_offset
= n
->tgt
->tgt_start
510 + cur_node
.host_start
;
511 /* At this point tgt_offset is target address of the
512 array section. Now subtract bias to get what we
513 want to initialize the pointer with. */
514 cur_node
.tgt_offset
-= sizes
[j
];
515 /* Copy from host to device memory. */
516 /* FIXME: see above FIXME comment. */
518 devicep
->device_host2dev_func
519 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
520 + ((uintptr_t) hostaddrs
[j
]
522 (void *) &cur_node
.tgt_offset
,
528 case GOMP_MAP_FORCE_PRESENT
:
530 /* We already looked up the memory region above and it
532 size_t size
= k
->host_end
- k
->host_start
;
533 gomp_fatal ("present clause: !acc_is_present (%p, "
534 "%zd (0x%zx))", (void *) k
->host_start
,
538 case GOMP_MAP_FORCE_DEVICEPTR
:
539 assert (k
->host_end
- k
->host_start
== sizeof (void *));
541 devicep
->device_host2dev_func
542 ((void *) (tgt
->tgt_start
+ k
->tgt_offset
),
543 (void *) k
->host_start
,
546 case GOMP_MAP_FORCE_PRIVATE
:
548 case GOMP_MAP_FORCE_FIRSTPRIVATE
:
551 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
559 #undef GFC_DTYPE_RANK_MASK
560 #undef GFC_DTYPE_TYPE_MASK
561 #undef GFC_DTYPE_TYPE_SHIFT
562 #undef GFC_DTYPE_SIZE_SHIFT
566 for (i
= 0; i
< mapnum
; i
++)
568 if (tgt
->list
[i
] == NULL
)
569 cur_node
.tgt_offset
= (uintptr_t) NULL
;
571 cur_node
.tgt_offset
= tgt
->list
[i
]->tgt
->tgt_start
572 + tgt
->list
[i
]->tgt_offset
;
573 /* Copy from host to device memory. */
574 /* FIXME: see above FIXME comment. */
575 devicep
->device_host2dev_func
576 ((void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
577 (void *) &cur_node
.tgt_offset
,
582 gomp_mutex_unlock (&mm
->lock
);
587 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
589 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
591 tgt
->device_descr
->device_free_func(tgt
->to_free
);
597 /* Decrease the refcount for a set of mapped variables, and queue asychronous
598 copies from the device back to the host after any work that has been issued.
599 Because the regions are still "live", increment an asynchronous reference
600 count to indicate that they should not be unmapped from host-side data
601 structures until the asynchronous copy has completed. */
603 attribute_hidden
void
604 gomp_copy_from_async (struct target_mem_desc
*tgt
)
606 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
607 struct gomp_memory_mapping
*mm
= tgt
->mem_map
;
610 gomp_mutex_lock (&mm
->lock
);
612 for (i
= 0; i
< tgt
->list_count
; i
++)
613 if (tgt
->list
[i
] == NULL
)
615 else if (tgt
->list
[i
]->refcount
> 1)
617 tgt
->list
[i
]->refcount
--;
618 tgt
->list
[i
]->async_refcount
++;
622 splay_tree_key k
= tgt
->list
[i
];
624 /* Copy from device to host memory. */
625 devicep
->device_dev2host_func
626 ((void *) k
->host_start
,
627 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
628 k
->host_end
- k
->host_start
);
631 gomp_mutex_unlock (&mm
->lock
);
634 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
635 variables back from device to host: if it is false, it is assumed that this
636 has been done already, i.e. by gomp_copy_from_async above. */
638 attribute_hidden
void
639 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
641 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
642 struct gomp_memory_mapping
*mm
= tgt
->mem_map
;
644 if (tgt
->list_count
== 0)
651 gomp_mutex_lock (&mm
->lock
);
652 for (i
= 0; i
< tgt
->list_count
; i
++)
653 if (tgt
->list
[i
] == NULL
)
655 else if (tgt
->list
[i
]->refcount
> 1)
656 tgt
->list
[i
]->refcount
--;
657 else if (tgt
->list
[i
]->async_refcount
> 0)
658 tgt
->list
[i
]->async_refcount
--;
661 splay_tree_key k
= tgt
->list
[i
];
662 if (k
->copy_from
&& do_copyfrom
)
663 /* Copy from device to host memory. */
664 devicep
->device_dev2host_func
665 ((void *) k
->host_start
,
666 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
667 k
->host_end
- k
->host_start
);
668 splay_tree_remove (&mm
->splay_tree
, k
);
669 if (k
->tgt
->refcount
> 1)
672 gomp_unmap_tgt (k
->tgt
);
675 if (tgt
->refcount
> 1)
678 gomp_unmap_tgt (tgt
);
679 gomp_mutex_unlock (&mm
->lock
);
683 gomp_update (struct gomp_device_descr
*devicep
, struct gomp_memory_mapping
*mm
,
684 size_t mapnum
, void **hostaddrs
, size_t *sizes
, void *kinds
,
688 struct splay_tree_key_s cur_node
;
689 const int typemask
= is_openacc
? 0xff : 0x7;
697 gomp_mutex_lock (&mm
->lock
);
698 for (i
= 0; i
< mapnum
; i
++)
701 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
702 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
703 splay_tree_key n
= splay_tree_lookup (&mm
->splay_tree
,
707 int kind
= get_kind (is_openacc
, kinds
, i
);
708 if (n
->host_start
> cur_node
.host_start
709 || n
->host_end
< cur_node
.host_end
)
710 gomp_fatal ("Trying to update [%p..%p) object when"
711 "only [%p..%p) is mapped",
712 (void *) cur_node
.host_start
,
713 (void *) cur_node
.host_end
,
714 (void *) n
->host_start
,
715 (void *) n
->host_end
);
716 if (GOMP_MAP_COPYTO_P (kind
& typemask
))
717 /* Copy from host to device memory. */
718 devicep
->device_host2dev_func
719 ((void *) (n
->tgt
->tgt_start
721 + cur_node
.host_start
723 (void *) cur_node
.host_start
,
724 cur_node
.host_end
- cur_node
.host_start
);
725 else if (GOMP_MAP_COPYFROM_P (kind
& typemask
))
726 /* Copy from device to host memory. */
727 devicep
->device_dev2host_func
728 ((void *) cur_node
.host_start
,
729 (void *) (n
->tgt
->tgt_start
731 + cur_node
.host_start
733 cur_node
.host_end
- cur_node
.host_start
);
736 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
737 (void *) cur_node
.host_start
,
738 (void *) cur_node
.host_end
);
740 gomp_mutex_unlock (&mm
->lock
);
743 static void gomp_register_image_for_device (struct gomp_device_descr
*device
,
744 struct offload_image_descr
*image
);
746 /* This function should be called from every offload image. It gets the
747 descriptor of the host func and var tables HOST_TABLE, TYPE of the target,
748 and TARGET_DATA needed by target plugin (target tables, etc.) */
750 GOMP_offload_register (void *host_table
, int type
, void **target_data
)
752 offload_images
= gomp_realloc (offload_images
,
753 (num_offload_images
+ 1)
754 * sizeof (struct offload_image_descr
));
756 if (offload_images
== NULL
)
759 offload_images
[num_offload_images
].type
= type
;
760 offload_images
[num_offload_images
].host_table
= host_table
;
761 offload_images
[num_offload_images
].target_data
= target_data
;
763 num_offload_images
++;
766 attribute_hidden
void
767 gomp_init_device (struct gomp_device_descr
*devicep
)
769 /* Initialize the target device. */
770 devicep
->device_init_func ();
772 devicep
->is_initialized
= true;
775 attribute_hidden
void
776 gomp_init_tables (const struct gomp_device_descr
*devicep
,
777 struct gomp_memory_mapping
*mm
)
779 /* Get address mapping table for device. */
780 struct mapping_table
*table
= NULL
;
781 int i
, num_entries
= devicep
->device_get_table_func (&table
);
783 /* Insert host-target address mapping into dev_splay_tree. */
784 for (i
= 0; i
< num_entries
; i
++)
786 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
788 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
789 tgt
->tgt_start
= table
[i
].tgt_start
;
790 tgt
->tgt_end
= table
[i
].tgt_end
;
793 tgt
->device_descr
= (struct gomp_device_descr
*) devicep
;
794 splay_tree_node node
= tgt
->array
;
795 splay_tree_key k
= &node
->key
;
796 k
->host_start
= table
[i
].host_start
;
797 k
->host_end
= table
[i
].host_end
;
802 splay_tree_insert (&mm
->splay_tree
, node
);
807 mm
->is_initialized
= true;
811 gomp_init_dev_tables (struct gomp_device_descr
*devicep
)
813 gomp_init_device (devicep
);
814 gomp_init_tables (devicep
, &devicep
->mem_map
);
817 attribute_hidden
void
818 gomp_fini_device (struct gomp_device_descr
*devicep
)
820 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
822 if (devicep
->is_initialized
)
823 devicep
->device_fini_func ();
825 while (mm
->splay_tree
.root
)
827 struct target_mem_desc
*tgt
= mm
->splay_tree
.root
->key
.tgt
;
830 splay_tree_remove (&mm
->splay_tree
, &mm
->splay_tree
.root
->key
);
833 devicep
->is_initialized
= false;
836 /* Called when encountering a target directive. If DEVICE
837 is -1, it means use device-var ICV. If it is -2 (or any other value
838 larger than last available hw device, use host fallback.
839 FN is address of host code, OPENMP_TARGET contains value of the
840 __OPENMP_TARGET__ symbol in the shared library or binary that invokes
841 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
842 with MAPNUM entries, with addresses of the host objects,
843 sizes of the host objects (resp. for pointer kind pointer bias
844 and assumed sizeof (void *) size) and kinds. */
847 GOMP_target (int device
, void (*fn
) (void *), const void *openmp_target
,
848 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
849 unsigned char *kinds
)
851 struct gomp_device_descr
*devicep
= resolve_device (device
);
852 struct gomp_memory_mapping
*mm
= &devicep
->mem_map
;
854 if (devicep
!= NULL
&& !devicep
->is_initialized
)
855 gomp_init_dev_tables (devicep
);
857 if (devicep
== NULL
|| !(devicep
->capabilities
& TARGET_CAP_OPENMP_400
))
860 struct gomp_thread old_thr
, *thr
= gomp_thread ();
862 memset (thr
, '\0', sizeof (*thr
));
863 if (gomp_places_list
)
865 thr
->place
= old_thr
.place
;
866 thr
->ts
.place_partition_len
= gomp_places_list_len
;
869 gomp_free_thread (thr
);
874 struct splay_tree_key_s k
;
875 k
.host_start
= (uintptr_t) fn
;
876 k
.host_end
= k
.host_start
+ 1;
877 gomp_mutex_lock (&mm
->lock
);
878 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
.splay_tree
, &k
);
879 if (tgt_fn
== NULL
&& !(devicep
->capabilities
& TARGET_CAP_NATIVE_EXEC
))
880 gomp_fatal ("Target function wasn't mapped");
881 gomp_mutex_unlock (&mm
->lock
);
883 struct target_mem_desc
*tgt_vars
884 = gomp_map_vars (devicep
, &devicep
->mem_map
, mapnum
, hostaddrs
, NULL
,
885 sizes
, kinds
, false, true);
886 struct gomp_thread old_thr
, *thr
= gomp_thread ();
888 memset (thr
, '\0', sizeof (*thr
));
889 if (gomp_places_list
)
891 thr
->place
= old_thr
.place
;
892 thr
->ts
.place_partition_len
= gomp_places_list_len
;
894 if (devicep
->capabilities
& TARGET_CAP_NATIVE_EXEC
)
895 devicep
->device_run_func (fn
, (void *) tgt_vars
->tgt_start
);
897 devicep
->device_run_func ((void *) tgt_fn
->tgt
->tgt_start
,
898 (void *) tgt_vars
->tgt_start
);
899 gomp_free_thread (thr
);
901 gomp_unmap_vars (tgt_vars
, true);
905 GOMP_target_data (int device
, const void *openmp_target
, size_t mapnum
,
906 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
908 struct gomp_device_descr
*devicep
= resolve_device (device
);
910 if (devicep
!= NULL
&& !devicep
->is_initialized
)
911 gomp_init_dev_tables (devicep
);
913 if (devicep
== NULL
|| !(devicep
->capabilities
& TARGET_CAP_OPENMP_400
))
916 struct gomp_task_icv
*icv
= gomp_icv (false);
917 if (icv
->target_data
)
919 /* Even when doing a host fallback, if there are any active
920 #pragma omp target data constructs, need to remember the
921 new #pragma omp target data, otherwise GOMP_target_end_data
922 would get out of sync. */
923 struct target_mem_desc
*tgt
924 = gomp_map_vars (NULL
, NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
926 tgt
->prev
= icv
->target_data
;
927 icv
->target_data
= tgt
;
932 struct target_mem_desc
*tgt
933 = gomp_map_vars (devicep
, &devicep
->mem_map
, mapnum
, hostaddrs
, NULL
, sizes
,
934 kinds
, false, false);
935 struct gomp_task_icv
*icv
= gomp_icv (true);
936 tgt
->prev
= icv
->target_data
;
937 icv
->target_data
= tgt
;
941 GOMP_target_end_data (void)
943 struct gomp_task_icv
*icv
= gomp_icv (false);
944 if (icv
->target_data
)
946 struct target_mem_desc
*tgt
= icv
->target_data
;
947 icv
->target_data
= tgt
->prev
;
948 gomp_unmap_vars (tgt
, true);
953 GOMP_target_update (int device
, const void *openmp_target
, size_t mapnum
,
954 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
956 struct gomp_device_descr
*devicep
= resolve_device (device
);
958 if (devicep
!= NULL
&& !devicep
->is_initialized
)
959 gomp_init_dev_tables (devicep
);
961 if (devicep
== NULL
|| !(devicep
->capabilities
& TARGET_CAP_OPENMP_400
))
964 gomp_update (devicep
, &devicep
->mem_map
, mapnum
, hostaddrs
, sizes
, kinds
,
969 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
973 struct gomp_task_icv
*icv
= gomp_icv (true);
974 icv
->thread_limit_var
975 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
980 #ifdef PLUGIN_SUPPORT
982 /* This function checks if the given string FNAME matches
983 "libgomp-plugin-*.so.1". */
985 gomp_check_plugin_file_name (const char *fname
)
987 const char *prefix
= "libgomp-plugin-";
988 const char *suffix
= ".so.1";
991 if (strncmp (fname
, prefix
, strlen (prefix
)) != 0)
993 if (strncmp (fname
+ strlen (fname
) - strlen (suffix
), suffix
,
994 strlen (suffix
)) != 0)
999 /* This function tries to load plugin for DEVICE. Name of plugin is passed
1001 Plugin handle and handles of the found functions are stored in the
1002 corresponding fields of DEVICE.
1003 The function returns TRUE on success and FALSE otherwise. */
1005 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
1006 const char *plugin_name
)
1008 char *err
= NULL
, *last_missing
= NULL
;
1009 int optional_present
, optional_total
;
1011 /* Clear any existing error. */
1014 device
->plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
1015 if (!device
->plugin_handle
)
1021 /* Check if all required functions are available in the plugin and store
1026 device->f##_func = dlsym (device->plugin_handle, #f); \
1032 /* Similar, but missing functions are not an error. */
1033 #define DLSYM_OPT(f,n) \
1037 device->f##_func = dlsym (device->plugin_handle, #n); \
1038 tmp_err = dlerror (); \
1039 if (tmp_err == NULL) \
1040 optional_present++; \
1042 last_missing = #n; \
1050 DLSYM (get_num_devices
);
1051 DLSYM (offload_register
);
1052 DLSYM (device_init
);
1053 DLSYM (device_fini
);
1054 DLSYM (device_get_table
);
1055 DLSYM (device_alloc
);
1056 DLSYM (device_free
);
1057 DLSYM (device_dev2host
);
1058 DLSYM (device_host2dev
);
1059 if (device
->get_caps_func () & TARGET_CAP_OPENMP_400
)
1061 if (device
->get_caps_func () & TARGET_CAP_OPENACC_200
)
1063 optional_present
= optional_total
= 0;
1064 DLSYM_OPT (openacc
.exec
, openacc_parallel
);
1065 DLSYM_OPT (openacc
.open_device
, openacc_open_device
);
1066 DLSYM_OPT (openacc
.close_device
, openacc_close_device
);
1067 DLSYM_OPT (openacc
.get_device_num
, openacc_get_device_num
);
1068 DLSYM_OPT (openacc
.set_device_num
, openacc_set_device_num
);
1069 DLSYM_OPT (openacc
.avail
, openacc_avail
);
1070 DLSYM_OPT (openacc
.register_async_cleanup
,
1071 openacc_register_async_cleanup
);
1072 DLSYM_OPT (openacc
.async_test
, openacc_async_test
);
1073 DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
);
1074 DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
);
1075 DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
);
1076 DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
);
1077 DLSYM_OPT (openacc
.async_wait_all_async
, openacc_async_wait_all_async
);
1078 DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
);
1079 /* Require all the OpenACC handlers if we have TARGET_CAP_OPENACC_200. */
1080 if (optional_present
!= optional_total
)
1082 err
= "plugin missing OpenACC handler function";
1085 optional_present
= optional_total
= 0;
1086 DLSYM_OPT (openacc
.cuda
.get_current_device
,
1087 openacc_get_current_cuda_device
);
1088 DLSYM_OPT (openacc
.cuda
.get_current_context
,
1089 openacc_get_current_cuda_context
);
1090 DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
1091 DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
1092 /* Make sure all the CUDA functions are there if any of them are. */
1093 if (optional_present
&& optional_present
!= optional_total
)
1095 err
= "plugin missing OpenACC CUDA handler function";
1105 gomp_error ("while loading %s: %s", plugin_name
, err
);
1107 gomp_error ("missing function was %s", last_missing
);
1108 if (device
->plugin_handle
)
1109 dlclose (device
->plugin_handle
);
1114 /* This function adds a compatible offload image IMAGE to an accelerator device
1118 gomp_register_image_for_device (struct gomp_device_descr
*device
,
1119 struct offload_image_descr
*image
)
1121 if (!device
->offload_regions_registered
1122 && (device
->type
== image
->type
|| device
->type
== TARGET_TYPE_HOST
))
1124 device
->offload_register_func (image
->host_table
, image
->target_data
);
1125 device
->offload_regions_registered
= true;
1129 /* This functions scans folder, specified in environment variable
1130 LIBGOMP_PLUGIN_PATH, and loads all suitable libgomp plugins from this folder.
1131 For a plugin to be suitable, its name should be "libgomp-plugin-*.so.1" and
1132 it should implement a certain set of functions.
1133 Result of this function is properly initialized variable NUM_DEVICES and
1134 array DEVICES, containing all plugins and their callback handles. */
1136 gomp_find_available_plugins (void)
1138 char *plugin_path
= NULL
;
1141 char plugin_name
[PATH_MAX
];
1147 plugin_path
= getenv ("LIBGOMP_PLUGIN_PATH");
1151 dir
= opendir (plugin_path
);
1155 while ((ent
= readdir (dir
)) != NULL
)
1157 struct gomp_device_descr current_device
, *devicep
;
1158 if (!gomp_check_plugin_file_name (ent
->d_name
))
1160 if (strlen (plugin_path
) + 1 + strlen (ent
->d_name
) >= PATH_MAX
)
1162 strcpy (plugin_name
, plugin_path
);
1163 strcat (plugin_name
, "/");
1164 strcat (plugin_name
, ent
->d_name
);
1165 if (!gomp_load_plugin_for_device (¤t_device
, plugin_name
))
1167 devices
= gomp_realloc (devices
, (num_devices
+ 1)
1168 * sizeof (struct gomp_device_descr
));
1169 if (devices
== NULL
)
1175 devices
[num_devices
] = current_device
;
1176 devicep
= &devices
[num_devices
];
1178 devicep
->is_initialized
= false;
1179 devicep
->offload_regions_registered
= false;
1180 devicep
->mem_map
.splay_tree
.root
= NULL
;
1181 devicep
->mem_map
.is_initialized
= false;
1182 devicep
->type
= devicep
->get_type_func ();
1183 devicep
->name
= devicep
->get_name_func ();
1184 devicep
->capabilities
= devicep
->get_caps_func ();
1185 gomp_mutex_init (&devicep
->mem_map
.lock
);
1186 devicep
->id
= ++num_devices
;
1188 /* Prefer a device with TARGET_CAP_OPENMP_400 for ICV default-device-var. */
1189 if (num_devices
> 1)
1191 int d
= gomp_icv (false)->default_device_var
;
1193 if (!(devices
[d
].capabilities
& TARGET_CAP_OPENMP_400
))
1195 for (i
= 0; i
< num_devices
; i
++)
1197 if (devices
[i
].capabilities
& TARGET_CAP_OPENMP_400
)
1199 struct gomp_device_descr device_tmp
= devices
[d
];
1200 devices
[d
] = devices
[i
];
1201 devices
[d
].id
= d
+ 1;
1202 devices
[i
] = device_tmp
;
1203 devices
[i
].id
= i
+ 1;
1211 for (i
= 0; i
< num_devices
; i
++)
1215 for (j
= 0; j
< num_offload_images
; j
++)
1216 gomp_register_image_for_device (&devices
[i
], &offload_images
[j
]);
1218 /* The 'devices' array can be moved (by the realloc call) until we have
1219 found all the plugins, so registering with the OpenACC runtime (which
1220 takes a copy of the pointer argument) must be delayed until now. */
1221 if (devices
[i
].capabilities
& TARGET_CAP_OPENACC_200
)
1222 ACC_plugin_register (&devices
[i
]);
1228 free (offload_images
);
1229 offload_images
= NULL
;
1230 num_offload_images
= 0;
1233 /* This function initializes runtime needed for offloading.
1234 It loads plugins, sets up a connection with devices, etc. */
1236 gomp_target_init (void)
1238 gomp_find_available_plugins ();
1241 #else /* PLUGIN_SUPPORT */
1242 /* If dlfcn.h is unavailable we always fallback to host execution.
1243 GOMP_target* routines are just stubs for this case. */
1245 gomp_target_init (void)
1248 #endif /* PLUGIN_SUPPORT */