libgomp: For OpenMP offloading, only publicize GOMP_OFFLOAD_CAP_OPENMP_400 devices.
[official-gcc.git] / libgomp / target.c
blobbf6edd2eadab86a85f473381b254291053df11ea
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "libgomp_target.h"
31 #include "oacc-plugin.h"
32 #include "oacc-int.h"
33 #include "gomp-constants.h"
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <assert.h>
41 #ifdef PLUGIN_SUPPORT
42 #include <dlfcn.h>
43 #endif
45 static void gomp_target_init (void);
47 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
49 /* This structure describes an offload image.
50 It contains type of the target device, pointer to host table descriptor, and
51 pointer to target data. */
52 struct offload_image_descr {
53 enum offload_target_type type;
54 void *host_table;
55 void *target_data;
58 /* Array of descriptors of offload images. */
59 static struct offload_image_descr *offload_images;
61 /* Total number of offload images. */
62 static int num_offload_images;
64 /* Array of descriptors for all available devices. */
65 static struct gomp_device_descr *devices;
67 /* Total number of available devices. */
68 static int num_devices;
70 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
71 static int num_devices_openmp;
73 /* The comparison function. */
75 attribute_hidden int
76 splay_compare (splay_tree_key x, splay_tree_key y)
78 if (x->host_start == x->host_end
79 && y->host_start == y->host_end)
80 return 0;
81 if (x->host_end <= y->host_start)
82 return -1;
83 if (x->host_start >= y->host_end)
84 return 1;
85 return 0;
88 #include "splay-tree.h"
90 attribute_hidden void
91 gomp_init_targets_once (void)
93 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
96 attribute_hidden int
97 gomp_get_num_devices (void)
99 gomp_init_targets_once ();
100 return num_devices_openmp;
103 static struct gomp_device_descr *
104 resolve_device (int device_id)
106 if (device_id == GOMP_DEVICE_ICV)
108 struct gomp_task_icv *icv = gomp_icv (false);
109 device_id = icv->default_device_var;
112 if (device_id < 0 || device_id >= gomp_get_num_devices ())
113 return NULL;
115 return &devices[device_id];
119 /* Handle the case where splay_tree_lookup found oldn for newn.
120 Helper function of gomp_map_vars. */
122 static inline void
123 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
124 unsigned char kind)
126 if ((kind & GOMP_MAP_FLAG_FORCE)
127 || oldn->host_start > newn->host_start
128 || oldn->host_end < newn->host_end)
129 gomp_fatal ("Trying to map into device [%p..%p) object when "
130 "[%p..%p) is already mapped",
131 (void *) newn->host_start, (void *) newn->host_end,
132 (void *) oldn->host_start, (void *) oldn->host_end);
133 oldn->refcount++;
136 static int
137 get_kind (bool is_openacc, void *kinds, int idx)
139 return is_openacc ? ((unsigned short *) kinds)[idx]
140 : ((unsigned char *) kinds)[idx];
143 attribute_hidden struct target_mem_desc *
144 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
145 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
146 bool is_openacc, bool is_target)
148 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
149 const int rshift = is_openacc ? 8 : 3;
150 const int typemask = is_openacc ? 0xff : 0x7;
151 struct gomp_memory_mapping *mm = &devicep->mem_map;
152 struct splay_tree_key_s cur_node;
153 struct target_mem_desc *tgt
154 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
155 tgt->list_count = mapnum;
156 tgt->refcount = 1;
157 tgt->device_descr = devicep;
158 tgt->mem_map = mm;
160 if (mapnum == 0)
161 return tgt;
163 tgt_align = sizeof (void *);
164 tgt_size = 0;
165 if (is_target)
167 size_t align = 4 * sizeof (void *);
168 tgt_align = align;
169 tgt_size = mapnum * sizeof (void *);
171 gomp_mutex_lock (&mm->lock);
172 for (i = 0; i < mapnum; i++)
174 int kind = get_kind (is_openacc, kinds, i);
175 if (hostaddrs[i] == NULL)
177 tgt->list[i] = NULL;
178 continue;
180 cur_node.host_start = (uintptr_t) hostaddrs[i];
181 if (!GOMP_MAP_POINTER_P (kind & typemask))
182 cur_node.host_end = cur_node.host_start + sizes[i];
183 else
184 cur_node.host_end = cur_node.host_start + sizeof (void *);
185 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
186 if (n)
188 tgt->list[i] = n;
189 gomp_map_vars_existing (n, &cur_node, kind & typemask);
191 else
193 tgt->list[i] = NULL;
195 size_t align = (size_t) 1 << (kind >> rshift);
196 not_found_cnt++;
197 if (tgt_align < align)
198 tgt_align = align;
199 tgt_size = (tgt_size + align - 1) & ~(align - 1);
200 tgt_size += cur_node.host_end - cur_node.host_start;
201 if ((kind & typemask) == GOMP_MAP_TO_PSET)
203 size_t j;
204 for (j = i + 1; j < mapnum; j++)
205 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
206 & typemask))
207 break;
208 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
209 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
210 > cur_node.host_end))
211 break;
212 else
214 tgt->list[j] = NULL;
215 i++;
221 if (devaddrs)
223 if (mapnum != 1)
224 gomp_fatal ("unexpected aggregation");
225 tgt->to_free = devaddrs[0];
226 tgt->tgt_start = (uintptr_t) tgt->to_free;
227 tgt->tgt_end = tgt->tgt_start + sizes[0];
229 else if (not_found_cnt || is_target)
231 /* Allocate tgt_align aligned tgt_size block of memory. */
232 /* FIXME: Perhaps change interface to allocate properly aligned
233 memory. */
234 tgt->to_free = devicep->alloc_func (devicep->target_id,
235 tgt_size + tgt_align - 1);
236 tgt->tgt_start = (uintptr_t) tgt->to_free;
237 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
238 tgt->tgt_end = tgt->tgt_start + tgt_size;
240 else
242 tgt->to_free = NULL;
243 tgt->tgt_start = 0;
244 tgt->tgt_end = 0;
247 tgt_size = 0;
248 if (is_target)
249 tgt_size = mapnum * sizeof (void *);
251 tgt->array = NULL;
252 if (not_found_cnt)
254 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
255 splay_tree_node array = tgt->array;
256 size_t j;
258 for (i = 0; i < mapnum; i++)
259 if (tgt->list[i] == NULL)
261 int kind = get_kind (is_openacc, kinds, i);
262 if (hostaddrs[i] == NULL)
263 continue;
264 splay_tree_key k = &array->key;
265 k->host_start = (uintptr_t) hostaddrs[i];
266 if (!GOMP_MAP_POINTER_P (kind & typemask))
267 k->host_end = k->host_start + sizes[i];
268 else
269 k->host_end = k->host_start + sizeof (void *);
270 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
271 if (n)
273 tgt->list[i] = n;
274 gomp_map_vars_existing (n, k, kind & typemask);
276 else
278 size_t align = (size_t) 1 << (kind >> rshift);
279 tgt->list[i] = k;
280 tgt_size = (tgt_size + align - 1) & ~(align - 1);
281 k->tgt = tgt;
282 k->tgt_offset = tgt_size;
283 tgt_size += k->host_end - k->host_start;
284 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
285 k->refcount = 1;
286 k->async_refcount = 0;
287 tgt->refcount++;
288 array->left = NULL;
289 array->right = NULL;
291 splay_tree_insert (&mm->splay_tree, array);
293 switch (kind & typemask)
295 case GOMP_MAP_ALLOC:
296 case GOMP_MAP_FROM:
297 case GOMP_MAP_FORCE_ALLOC:
298 case GOMP_MAP_FORCE_FROM:
299 break;
300 case GOMP_MAP_TO:
301 case GOMP_MAP_TOFROM:
302 case GOMP_MAP_FORCE_TO:
303 case GOMP_MAP_FORCE_TOFROM:
304 /* Copy from host to device memory. */
305 /* FIXME: Perhaps add some smarts, like if copying
306 several adjacent fields from host to target, use some
307 host buffer to avoid sending each var individually. */
308 devicep->host2dev_func (devicep->target_id,
309 (void *) (tgt->tgt_start
310 + k->tgt_offset),
311 (void *) k->host_start,
312 k->host_end - k->host_start);
313 break;
314 case GOMP_MAP_POINTER:
315 cur_node.host_start
316 = (uintptr_t) *(void **) k->host_start;
317 if (cur_node.host_start == (uintptr_t) NULL)
319 cur_node.tgt_offset = (uintptr_t) NULL;
320 /* Copy from host to device memory. */
321 /* FIXME: see above FIXME comment. */
322 devicep->host2dev_func (devicep->target_id,
323 (void *) (tgt->tgt_start
324 + k->tgt_offset),
325 (void *) &cur_node.tgt_offset,
326 sizeof (void *));
327 break;
329 /* Add bias to the pointer value. */
330 cur_node.host_start += sizes[i];
331 cur_node.host_end = cur_node.host_start + 1;
332 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
333 if (n == NULL)
335 /* Could be possibly zero size array section. */
336 cur_node.host_end--;
337 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
338 if (n == NULL)
340 cur_node.host_start--;
341 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
342 cur_node.host_start++;
345 if (n == NULL)
346 gomp_fatal ("Pointer target of array section "
347 "wasn't mapped");
349 cur_node.host_start -= n->host_start;
350 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
351 + cur_node.host_start;
352 /* At this point tgt_offset is target address of the
353 array section. Now subtract bias to get what we want
354 to initialize the pointer with. */
355 cur_node.tgt_offset -= sizes[i];
356 /* Copy from host to device memory. */
357 /* FIXME: see above FIXME comment. */
358 devicep->host2dev_func (devicep->target_id,
359 (void *) (tgt->tgt_start
360 + k->tgt_offset),
361 (void *) &cur_node.tgt_offset,
362 sizeof (void *));
363 break;
364 case GOMP_MAP_TO_PSET:
365 /* Copy from host to device memory. */
366 /* FIXME: see above FIXME comment. */
367 devicep->host2dev_func (devicep->target_id,
368 (void *) (tgt->tgt_start
369 + k->tgt_offset),
370 (void *) k->host_start,
371 k->host_end - k->host_start);
373 for (j = i + 1; j < mapnum; j++)
374 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
375 & typemask))
376 break;
377 else if ((uintptr_t) hostaddrs[j] < k->host_start
378 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
379 > k->host_end))
380 break;
381 else
383 tgt->list[j] = k;
384 k->refcount++;
385 cur_node.host_start
386 = (uintptr_t) *(void **) hostaddrs[j];
387 if (cur_node.host_start == (uintptr_t) NULL)
389 cur_node.tgt_offset = (uintptr_t) NULL;
390 /* Copy from host to device memory. */
391 /* FIXME: see above FIXME comment. */
392 devicep->host2dev_func (devicep->target_id,
393 (void *) (tgt->tgt_start + k->tgt_offset
394 + ((uintptr_t) hostaddrs[j]
395 - k->host_start)),
396 (void *) &cur_node.tgt_offset,
397 sizeof (void *));
398 i++;
399 continue;
401 /* Add bias to the pointer value. */
402 cur_node.host_start += sizes[j];
403 cur_node.host_end = cur_node.host_start + 1;
404 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
405 if (n == NULL)
407 /* Could be possibly zero size array section. */
408 cur_node.host_end--;
409 n = splay_tree_lookup (&mm->splay_tree,
410 &cur_node);
411 if (n == NULL)
413 cur_node.host_start--;
414 n = splay_tree_lookup (&mm->splay_tree,
415 &cur_node);
416 cur_node.host_start++;
419 if (n == NULL)
420 gomp_fatal ("Pointer target of array section "
421 "wasn't mapped");
422 cur_node.host_start -= n->host_start;
423 cur_node.tgt_offset = n->tgt->tgt_start
424 + n->tgt_offset
425 + cur_node.host_start;
426 /* At this point tgt_offset is target address of the
427 array section. Now subtract bias to get what we
428 want to initialize the pointer with. */
429 cur_node.tgt_offset -= sizes[j];
430 /* Copy from host to device memory. */
431 /* FIXME: see above FIXME comment. */
432 devicep->host2dev_func (devicep->target_id,
433 (void *) (tgt->tgt_start + k->tgt_offset
434 + ((uintptr_t) hostaddrs[j]
435 - k->host_start)),
436 (void *) &cur_node.tgt_offset,
437 sizeof (void *));
438 i++;
440 break;
441 case GOMP_MAP_FORCE_PRESENT:
443 /* We already looked up the memory region above and it
444 was missing. */
445 size_t size = k->host_end - k->host_start;
446 gomp_fatal ("present clause: !acc_is_present (%p, "
447 "%zd (0x%zx))", (void *) k->host_start,
448 size, size);
450 break;
451 case GOMP_MAP_FORCE_DEVICEPTR:
452 assert (k->host_end - k->host_start == sizeof (void *));
454 devicep->host2dev_func (devicep->target_id,
455 (void *) (tgt->tgt_start
456 + k->tgt_offset),
457 (void *) k->host_start,
458 sizeof (void *));
459 break;
460 default:
461 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
462 kind);
464 array++;
469 if (is_target)
471 for (i = 0; i < mapnum; i++)
473 if (tgt->list[i] == NULL)
474 cur_node.tgt_offset = (uintptr_t) NULL;
475 else
476 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
477 + tgt->list[i]->tgt_offset;
478 /* Copy from host to device memory. */
479 /* FIXME: see above FIXME comment. */
480 devicep->host2dev_func (devicep->target_id,
481 (void *) (tgt->tgt_start
482 + i * sizeof (void *)),
483 (void *) &cur_node.tgt_offset,
484 sizeof (void *));
488 gomp_mutex_unlock (&mm->lock);
489 return tgt;
492 static void
493 gomp_unmap_tgt (struct target_mem_desc *tgt)
495 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
496 if (tgt->tgt_end)
497 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
499 free (tgt->array);
500 free (tgt);
503 /* Decrease the refcount for a set of mapped variables, and queue asychronous
504 copies from the device back to the host after any work that has been issued.
505 Because the regions are still "live", increment an asynchronous reference
506 count to indicate that they should not be unmapped from host-side data
507 structures until the asynchronous copy has completed. */
509 attribute_hidden void
510 gomp_copy_from_async (struct target_mem_desc *tgt)
512 struct gomp_device_descr *devicep = tgt->device_descr;
513 struct gomp_memory_mapping *mm = tgt->mem_map;
514 size_t i;
516 gomp_mutex_lock (&mm->lock);
518 for (i = 0; i < tgt->list_count; i++)
519 if (tgt->list[i] == NULL)
521 else if (tgt->list[i]->refcount > 1)
523 tgt->list[i]->refcount--;
524 tgt->list[i]->async_refcount++;
526 else
528 splay_tree_key k = tgt->list[i];
529 if (k->copy_from)
530 /* Copy from device to host memory. */
531 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
532 (void *) (k->tgt->tgt_start + k->tgt_offset),
533 k->host_end - k->host_start);
536 gomp_mutex_unlock (&mm->lock);
539 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
540 variables back from device to host: if it is false, it is assumed that this
541 has been done already, i.e. by gomp_copy_from_async above. */
543 attribute_hidden void
544 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
546 struct gomp_device_descr *devicep = tgt->device_descr;
547 struct gomp_memory_mapping *mm = tgt->mem_map;
549 if (tgt->list_count == 0)
551 free (tgt);
552 return;
555 size_t i;
556 gomp_mutex_lock (&mm->lock);
557 for (i = 0; i < tgt->list_count; i++)
558 if (tgt->list[i] == NULL)
560 else if (tgt->list[i]->refcount > 1)
561 tgt->list[i]->refcount--;
562 else if (tgt->list[i]->async_refcount > 0)
563 tgt->list[i]->async_refcount--;
564 else
566 splay_tree_key k = tgt->list[i];
567 if (k->copy_from && do_copyfrom)
568 /* Copy from device to host memory. */
569 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
570 (void *) (k->tgt->tgt_start + k->tgt_offset),
571 k->host_end - k->host_start);
572 splay_tree_remove (&mm->splay_tree, k);
573 if (k->tgt->refcount > 1)
574 k->tgt->refcount--;
575 else
576 gomp_unmap_tgt (k->tgt);
579 if (tgt->refcount > 1)
580 tgt->refcount--;
581 else
582 gomp_unmap_tgt (tgt);
583 gomp_mutex_unlock (&mm->lock);
586 static void
587 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
588 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
589 bool is_openacc)
591 size_t i;
592 struct splay_tree_key_s cur_node;
593 const int typemask = is_openacc ? 0xff : 0x7;
595 if (!devicep)
596 return;
598 if (mapnum == 0)
599 return;
601 gomp_mutex_lock (&mm->lock);
602 for (i = 0; i < mapnum; i++)
603 if (sizes[i])
605 cur_node.host_start = (uintptr_t) hostaddrs[i];
606 cur_node.host_end = cur_node.host_start + sizes[i];
607 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
608 &cur_node);
609 if (n)
611 int kind = get_kind (is_openacc, kinds, i);
612 if (n->host_start > cur_node.host_start
613 || n->host_end < cur_node.host_end)
614 gomp_fatal ("Trying to update [%p..%p) object when"
615 "only [%p..%p) is mapped",
616 (void *) cur_node.host_start,
617 (void *) cur_node.host_end,
618 (void *) n->host_start,
619 (void *) n->host_end);
620 if (GOMP_MAP_COPY_TO_P (kind & typemask))
621 /* Copy from host to device memory. */
622 devicep->host2dev_func (devicep->target_id,
623 (void *) (n->tgt->tgt_start
624 + n->tgt_offset
625 + cur_node.host_start
626 - n->host_start),
627 (void *) cur_node.host_start,
628 cur_node.host_end - cur_node.host_start);
629 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
630 /* Copy from device to host memory. */
631 devicep->dev2host_func (devicep->target_id,
632 (void *) cur_node.host_start,
633 (void *) (n->tgt->tgt_start
634 + n->tgt_offset
635 + cur_node.host_start
636 - n->host_start),
637 cur_node.host_end - cur_node.host_start);
639 else
640 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
641 (void *) cur_node.host_start,
642 (void *) cur_node.host_end);
644 gomp_mutex_unlock (&mm->lock);
647 static void gomp_register_image_for_device (struct gomp_device_descr *device,
648 struct offload_image_descr *image);
650 /* This function should be called from every offload image.
651 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
652 the target, and TARGET_DATA needed by target plugin. */
654 void
655 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
656 void *target_data)
658 offload_images = gomp_realloc (offload_images,
659 (num_offload_images + 1)
660 * sizeof (struct offload_image_descr));
662 if (offload_images == NULL)
663 return;
665 offload_images[num_offload_images].type = target_type;
666 offload_images[num_offload_images].host_table = host_table;
667 offload_images[num_offload_images].target_data = target_data;
669 num_offload_images++;
672 /* This function initializes the target device, specified by DEVICEP. */
674 attribute_hidden void
675 gomp_init_device (struct gomp_device_descr *devicep)
677 /* Initialize the target device. */
678 devicep->init_device_func (devicep->target_id);
680 devicep->is_initialized = true;
683 attribute_hidden void
684 gomp_init_tables (struct gomp_device_descr *devicep,
685 struct gomp_memory_mapping *mm)
687 /* Get address mapping table for device. */
688 struct mapping_table *table = NULL;
689 int i, num_entries = devicep->get_table_func (devicep->target_id, &table);
691 /* Insert host-target address mapping into dev_splay_tree. */
692 for (i = 0; i < num_entries; i++)
694 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
695 tgt->refcount = 1;
696 tgt->array = gomp_malloc (sizeof (*tgt->array));
697 tgt->tgt_start = table[i].tgt_start;
698 tgt->tgt_end = table[i].tgt_end;
699 tgt->to_free = NULL;
700 tgt->list_count = 0;
701 tgt->device_descr = devicep;
702 splay_tree_node node = tgt->array;
703 splay_tree_key k = &node->key;
704 k->host_start = table[i].host_start;
705 k->host_end = table[i].host_end;
706 k->tgt_offset = 0;
707 k->refcount = 1;
708 k->copy_from = false;
709 k->tgt = tgt;
710 node->left = NULL;
711 node->right = NULL;
712 splay_tree_insert (&mm->splay_tree, node);
715 free (table);
716 mm->is_initialized = true;
719 static void
720 gomp_init_dev_tables (struct gomp_device_descr *devicep)
722 gomp_init_device (devicep);
723 gomp_init_tables (devicep, &devicep->mem_map);
727 attribute_hidden void
728 gomp_free_memmap (struct gomp_memory_mapping *mm)
730 while (mm->splay_tree.root)
732 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
734 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
735 free (tgt->array);
736 free (tgt);
739 mm->is_initialized = false;
742 attribute_hidden void
743 gomp_fini_device (struct gomp_device_descr *devicep)
745 if (devicep->is_initialized)
746 devicep->fini_device_func (devicep->target_id);
748 devicep->is_initialized = false;
751 /* Called when encountering a target directive. If DEVICE
752 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
753 GOMP_DEVICE_HOST_FALLBACK (or any value
754 larger than last available hw device), use host fallback.
755 FN is address of host code, OFFLOAD_TABLE contains value of the
756 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
757 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
758 with MAPNUM entries, with addresses of the host objects,
759 sizes of the host objects (resp. for pointer kind pointer bias
760 and assumed sizeof (void *) size) and kinds. */
762 void
763 GOMP_target (int device, void (*fn) (void *), const void *offload_table,
764 size_t mapnum, void **hostaddrs, size_t *sizes,
765 unsigned char *kinds)
767 struct gomp_device_descr *devicep = resolve_device (device);
769 if (devicep != NULL && !devicep->is_initialized)
770 gomp_init_dev_tables (devicep);
772 if (devicep == NULL
773 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
775 /* Host fallback. */
776 struct gomp_thread old_thr, *thr = gomp_thread ();
777 old_thr = *thr;
778 memset (thr, '\0', sizeof (*thr));
779 if (gomp_places_list)
781 thr->place = old_thr.place;
782 thr->ts.place_partition_len = gomp_places_list_len;
784 fn (hostaddrs);
785 gomp_free_thread (thr);
786 *thr = old_thr;
787 return;
790 void *fn_addr;
792 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
793 fn_addr = (void *) fn;
794 else
796 struct gomp_memory_mapping *mm = &devicep->mem_map;
797 gomp_mutex_lock (&mm->lock);
798 if (!devicep->is_initialized)
799 gomp_init_dev_tables (devicep);
800 struct splay_tree_key_s k;
801 k.host_start = (uintptr_t) fn;
802 k.host_end = k.host_start + 1;
803 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map.splay_tree,
804 &k);
805 if (tgt_fn == NULL)
806 gomp_fatal ("Target function wasn't mapped");
807 gomp_mutex_unlock (&mm->lock);
809 fn_addr = (void *) tgt_fn->tgt->tgt_start;
812 struct target_mem_desc *tgt_vars
813 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
814 true);
815 struct gomp_thread old_thr, *thr = gomp_thread ();
816 old_thr = *thr;
817 memset (thr, '\0', sizeof (*thr));
818 if (gomp_places_list)
820 thr->place = old_thr.place;
821 thr->ts.place_partition_len = gomp_places_list_len;
823 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
824 gomp_free_thread (thr);
825 *thr = old_thr;
826 gomp_unmap_vars (tgt_vars, true);
829 void
830 GOMP_target_data (int device, const void *offload_table, size_t mapnum,
831 void **hostaddrs, size_t *sizes, unsigned char *kinds)
833 struct gomp_device_descr *devicep = resolve_device (device);
835 if (devicep != NULL && !devicep->is_initialized)
836 gomp_init_dev_tables (devicep);
838 if (devicep == NULL
839 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
841 /* Host fallback. */
842 struct gomp_task_icv *icv = gomp_icv (false);
843 if (icv->target_data)
845 /* Even when doing a host fallback, if there are any active
846 #pragma omp target data constructs, need to remember the
847 new #pragma omp target data, otherwise GOMP_target_end_data
848 would get out of sync. */
849 struct target_mem_desc *tgt
850 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
851 tgt->prev = icv->target_data;
852 icv->target_data = tgt;
854 return;
857 struct gomp_memory_mapping *mm = &devicep->mem_map;
858 gomp_mutex_lock (&mm->lock);
859 if (!devicep->is_initialized)
860 gomp_init_dev_tables (devicep);
861 gomp_mutex_unlock (&mm->lock);
863 struct target_mem_desc *tgt
864 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
865 false);
866 struct gomp_task_icv *icv = gomp_icv (true);
867 tgt->prev = icv->target_data;
868 icv->target_data = tgt;
871 void
872 GOMP_target_end_data (void)
874 struct gomp_task_icv *icv = gomp_icv (false);
875 if (icv->target_data)
877 struct target_mem_desc *tgt = icv->target_data;
878 icv->target_data = tgt->prev;
879 gomp_unmap_vars (tgt, true);
883 void
884 GOMP_target_update (int device, const void *offload_table, size_t mapnum,
885 void **hostaddrs, size_t *sizes, unsigned char *kinds)
887 struct gomp_device_descr *devicep = resolve_device (device);
889 if (devicep == NULL)
890 return;
892 struct gomp_memory_mapping *mm = &devicep->mem_map;
893 gomp_mutex_lock (&mm->lock);
894 if (!devicep->is_initialized)
895 gomp_init_dev_tables (devicep);
896 gomp_mutex_unlock (&mm->lock);
898 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
899 return;
901 gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds,
902 false);
905 void
906 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
908 if (thread_limit)
910 struct gomp_task_icv *icv = gomp_icv (true);
911 icv->thread_limit_var
912 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
914 (void) num_teams;
917 #ifdef PLUGIN_SUPPORT
919 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
920 in PLUGIN_NAME.
921 The handles of the found functions are stored in the corresponding fields
922 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
924 static bool
925 gomp_load_plugin_for_device (struct gomp_device_descr *device,
926 const char *plugin_name)
928 char *err = NULL, *last_missing = NULL;
929 int optional_present, optional_total;
931 /* Clear any existing error. */
932 dlerror ();
934 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
935 if (!plugin_handle)
937 err = dlerror ();
938 goto out;
941 /* Check if all required functions are available in the plugin and store
942 their handlers. */
943 #define DLSYM(f) \
944 do \
946 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
947 err = dlerror (); \
948 if (err != NULL) \
949 goto out; \
951 while (0)
952 /* Similar, but missing functions are not an error. */
953 #define DLSYM_OPT(f, n) \
954 do \
956 char *tmp_err; \
957 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
958 tmp_err = dlerror (); \
959 if (tmp_err == NULL) \
960 optional_present++; \
961 else \
962 last_missing = #n; \
963 optional_total++; \
965 while (0)
967 DLSYM (get_name);
968 DLSYM (get_caps);
969 DLSYM (get_type);
970 DLSYM (get_num_devices);
971 DLSYM (register_image);
972 DLSYM (init_device);
973 DLSYM (fini_device);
974 DLSYM (get_table);
975 DLSYM (alloc);
976 DLSYM (free);
977 DLSYM (dev2host);
978 DLSYM (host2dev);
979 device->capabilities = device->get_caps_func ();
980 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
981 DLSYM (run);
982 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
984 optional_present = optional_total = 0;
985 DLSYM_OPT (openacc.exec, openacc_parallel);
986 DLSYM_OPT (openacc.open_device, openacc_open_device);
987 DLSYM_OPT (openacc.close_device, openacc_close_device);
988 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
989 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
990 DLSYM_OPT (openacc.register_async_cleanup,
991 openacc_register_async_cleanup);
992 DLSYM_OPT (openacc.async_test, openacc_async_test);
993 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
994 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
995 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
996 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
997 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
998 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
999 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1000 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1001 /* Require all the OpenACC handlers if we have
1002 GOMP_OFFLOAD_CAP_OPENACC_200. */
1003 if (optional_present != optional_total)
1005 err = "plugin missing OpenACC handler function";
1006 goto out;
1008 optional_present = optional_total = 0;
1009 DLSYM_OPT (openacc.cuda.get_current_device,
1010 openacc_get_current_cuda_device);
1011 DLSYM_OPT (openacc.cuda.get_current_context,
1012 openacc_get_current_cuda_context);
1013 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1014 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1015 /* Make sure all the CUDA functions are there if any of them are. */
1016 if (optional_present && optional_present != optional_total)
1018 err = "plugin missing OpenACC CUDA handler function";
1019 goto out;
1022 #undef DLSYM
1023 #undef DLSYM_OPT
1025 out:
1026 if (err != NULL)
1028 gomp_error ("while loading %s: %s", plugin_name, err);
1029 if (last_missing)
1030 gomp_error ("missing function was %s", last_missing);
1031 if (plugin_handle)
1032 dlclose (plugin_handle);
1034 return err == NULL;
1037 /* This function adds a compatible offload image IMAGE to an accelerator device
1038 DEVICE. */
1040 static void
1041 gomp_register_image_for_device (struct gomp_device_descr *device,
1042 struct offload_image_descr *image)
1044 if (!device->offload_regions_registered
1045 && (device->type == image->type
1046 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1048 device->register_image_func (image->host_table, image->target_data);
1049 device->offload_regions_registered = true;
1053 /* This function initializes the runtime needed for offloading.
1054 It parses the list of offload targets and tries to load the plugins for
1055 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1056 will be set, and the array DEVICES initialized, containing descriptors for
1057 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1058 by the others. */
1060 static void
1061 gomp_target_init (void)
1063 const char *prefix ="libgomp-plugin-";
1064 const char *suffix = ".so.1";
1065 const char *cur, *next;
1066 char *plugin_name;
1067 int i, new_num_devices;
1069 num_devices = 0;
1070 devices = NULL;
1072 cur = OFFLOAD_TARGETS;
1073 if (*cur)
1076 struct gomp_device_descr current_device;
1078 next = strchr (cur, ',');
1080 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1081 + strlen (prefix) + strlen (suffix));
1082 if (!plugin_name)
1084 num_devices = 0;
1085 break;
1088 strcpy (plugin_name, prefix);
1089 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1090 strcat (plugin_name, suffix);
1092 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1094 new_num_devices = current_device.get_num_devices_func ();
1095 if (new_num_devices >= 1)
1097 /* Augment DEVICES and NUM_DEVICES. */
1099 devices = realloc (devices, (num_devices + new_num_devices)
1100 * sizeof (struct gomp_device_descr));
1101 if (!devices)
1103 num_devices = 0;
1104 free (plugin_name);
1105 break;
1108 current_device.type = current_device.get_type_func ();
1109 current_device.name = current_device.get_name_func ();
1110 current_device.is_initialized = false;
1111 current_device.offload_regions_registered = false;
1112 current_device.mem_map.splay_tree.root = NULL;
1113 current_device.mem_map.is_initialized = false;
1114 current_device.openacc.data_environ = NULL;
1115 current_device.openacc.target_data = NULL;
1116 for (i = 0; i < new_num_devices; i++)
1118 current_device.target_id = i;
1119 devices[num_devices] = current_device;
1120 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1121 num_devices++;
1126 free (plugin_name);
1127 cur = next + 1;
1129 while (next);
1131 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1132 NUM_DEVICES_OPENMP. */
1133 struct gomp_device_descr *devices_s
1134 = malloc (num_devices * sizeof (struct gomp_device_descr));
1135 if (!devices_s)
1137 num_devices = 0;
1138 free (devices);
1139 devices = NULL;
1141 num_devices_openmp = 0;
1142 for (i = 0; i < num_devices; i++)
1143 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1144 devices_s[num_devices_openmp++] = devices[i];
1145 int num_devices_after_openmp = num_devices_openmp;
1146 for (i = 0; i < num_devices; i++)
1147 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1148 devices_s[num_devices_after_openmp++] = devices[i];
1149 free (devices);
1150 devices = devices_s;
1152 for (i = 0; i < num_devices; i++)
1154 int j;
1156 for (j = 0; j < num_offload_images; j++)
1157 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1159 /* The 'devices' array can be moved (by the realloc call) until we have
1160 found all the plugins, so registering with the OpenACC runtime (which
1161 takes a copy of the pointer argument) must be delayed until now. */
1162 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1163 goacc_register (&devices[i]);
1166 free (offload_images);
1167 offload_images = NULL;
1168 num_offload_images = 0;
1171 #else /* PLUGIN_SUPPORT */
1172 /* If dlfcn.h is unavailable we always fallback to host execution.
1173 GOMP_target* routines are just stubs for this case. */
1174 static void
1175 gomp_target_init (void)
1178 #endif /* PLUGIN_SUPPORT */