re PR c++/59329 (Using `assert(...)` is not allowed in constexpr functions)
[official-gcc.git] / libgomp / target.c
blobc5dda3f0c935f1dd6667b2e3895aff758b4a4764
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* This structure describes an offload image.
53 It contains type of the target device, pointer to host table descriptor, and
54 pointer to target data. */
55 struct offload_image_descr {
56 enum offload_target_type type;
57 void *host_table;
58 void *target_data;
61 /* Array of descriptors of offload images. */
62 static struct offload_image_descr *offload_images;
64 /* Total number of offload images. */
65 static int num_offload_images;
67 /* Array of descriptors for all available devices. */
68 static struct gomp_device_descr *devices;
70 #ifdef PLUGIN_SUPPORT
71 /* Total number of available devices. */
72 static int num_devices;
73 #endif
75 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
76 static int num_devices_openmp;
78 /* The comparison function. */
80 attribute_hidden int
81 splay_compare (splay_tree_key x, splay_tree_key y)
83 if (x->host_start == x->host_end
84 && y->host_start == y->host_end)
85 return 0;
86 if (x->host_end <= y->host_start)
87 return -1;
88 if (x->host_start >= y->host_end)
89 return 1;
90 return 0;
93 #include "splay-tree.h"
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 return &devices[device_id];
124 /* Handle the case where splay_tree_lookup found oldn for newn.
125 Helper function of gomp_map_vars. */
127 static inline void
128 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
129 unsigned char kind)
131 if ((kind & GOMP_MAP_FLAG_FORCE)
132 || oldn->host_start > newn->host_start
133 || oldn->host_end < newn->host_end)
134 gomp_fatal ("Trying to map into device [%p..%p) object when "
135 "[%p..%p) is already mapped",
136 (void *) newn->host_start, (void *) newn->host_end,
137 (void *) oldn->host_start, (void *) oldn->host_end);
138 oldn->refcount++;
141 static int
142 get_kind (bool is_openacc, void *kinds, int idx)
144 return is_openacc ? ((unsigned short *) kinds)[idx]
145 : ((unsigned char *) kinds)[idx];
148 attribute_hidden struct target_mem_desc *
149 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
150 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
151 bool is_openacc, bool is_target)
153 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
154 const int rshift = is_openacc ? 8 : 3;
155 const int typemask = is_openacc ? 0xff : 0x7;
156 struct gomp_memory_mapping *mm = &devicep->mem_map;
157 struct splay_tree_key_s cur_node;
158 struct target_mem_desc *tgt
159 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
160 tgt->list_count = mapnum;
161 tgt->refcount = 1;
162 tgt->device_descr = devicep;
163 tgt->mem_map = mm;
165 if (mapnum == 0)
166 return tgt;
168 tgt_align = sizeof (void *);
169 tgt_size = 0;
170 if (is_target)
172 size_t align = 4 * sizeof (void *);
173 tgt_align = align;
174 tgt_size = mapnum * sizeof (void *);
177 gomp_mutex_lock (&mm->lock);
179 for (i = 0; i < mapnum; i++)
181 int kind = get_kind (is_openacc, kinds, i);
182 if (hostaddrs[i] == NULL)
184 tgt->list[i] = NULL;
185 continue;
187 cur_node.host_start = (uintptr_t) hostaddrs[i];
188 if (!GOMP_MAP_POINTER_P (kind & typemask))
189 cur_node.host_end = cur_node.host_start + sizes[i];
190 else
191 cur_node.host_end = cur_node.host_start + sizeof (void *);
192 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
193 if (n)
195 tgt->list[i] = n;
196 gomp_map_vars_existing (n, &cur_node, kind & typemask);
198 else
200 tgt->list[i] = NULL;
202 size_t align = (size_t) 1 << (kind >> rshift);
203 not_found_cnt++;
204 if (tgt_align < align)
205 tgt_align = align;
206 tgt_size = (tgt_size + align - 1) & ~(align - 1);
207 tgt_size += cur_node.host_end - cur_node.host_start;
208 if ((kind & typemask) == GOMP_MAP_TO_PSET)
210 size_t j;
211 for (j = i + 1; j < mapnum; j++)
212 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
213 & typemask))
214 break;
215 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
216 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
217 > cur_node.host_end))
218 break;
219 else
221 tgt->list[j] = NULL;
222 i++;
228 if (devaddrs)
230 if (mapnum != 1)
231 gomp_fatal ("unexpected aggregation");
232 tgt->to_free = devaddrs[0];
233 tgt->tgt_start = (uintptr_t) tgt->to_free;
234 tgt->tgt_end = tgt->tgt_start + sizes[0];
236 else if (not_found_cnt || is_target)
238 /* Allocate tgt_align aligned tgt_size block of memory. */
239 /* FIXME: Perhaps change interface to allocate properly aligned
240 memory. */
241 tgt->to_free = devicep->alloc_func (devicep->target_id,
242 tgt_size + tgt_align - 1);
243 tgt->tgt_start = (uintptr_t) tgt->to_free;
244 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
245 tgt->tgt_end = tgt->tgt_start + tgt_size;
247 else
249 tgt->to_free = NULL;
250 tgt->tgt_start = 0;
251 tgt->tgt_end = 0;
254 tgt_size = 0;
255 if (is_target)
256 tgt_size = mapnum * sizeof (void *);
258 tgt->array = NULL;
259 if (not_found_cnt)
261 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
262 splay_tree_node array = tgt->array;
263 size_t j;
265 for (i = 0; i < mapnum; i++)
266 if (tgt->list[i] == NULL)
268 int kind = get_kind (is_openacc, kinds, i);
269 if (hostaddrs[i] == NULL)
270 continue;
271 splay_tree_key k = &array->key;
272 k->host_start = (uintptr_t) hostaddrs[i];
273 if (!GOMP_MAP_POINTER_P (kind & typemask))
274 k->host_end = k->host_start + sizes[i];
275 else
276 k->host_end = k->host_start + sizeof (void *);
277 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
278 if (n)
280 tgt->list[i] = n;
281 gomp_map_vars_existing (n, k, kind & typemask);
283 else
285 size_t align = (size_t) 1 << (kind >> rshift);
286 tgt->list[i] = k;
287 tgt_size = (tgt_size + align - 1) & ~(align - 1);
288 k->tgt = tgt;
289 k->tgt_offset = tgt_size;
290 tgt_size += k->host_end - k->host_start;
291 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
292 k->refcount = 1;
293 k->async_refcount = 0;
294 tgt->refcount++;
295 array->left = NULL;
296 array->right = NULL;
297 splay_tree_insert (&mm->splay_tree, array);
298 switch (kind & typemask)
300 case GOMP_MAP_ALLOC:
301 case GOMP_MAP_FROM:
302 case GOMP_MAP_FORCE_ALLOC:
303 case GOMP_MAP_FORCE_FROM:
304 break;
305 case GOMP_MAP_TO:
306 case GOMP_MAP_TOFROM:
307 case GOMP_MAP_FORCE_TO:
308 case GOMP_MAP_FORCE_TOFROM:
309 /* FIXME: Perhaps add some smarts, like if copying
310 several adjacent fields from host to target, use some
311 host buffer to avoid sending each var individually. */
312 devicep->host2dev_func (devicep->target_id,
313 (void *) (tgt->tgt_start
314 + k->tgt_offset),
315 (void *) k->host_start,
316 k->host_end - k->host_start);
317 break;
318 case GOMP_MAP_POINTER:
319 cur_node.host_start
320 = (uintptr_t) *(void **) k->host_start;
321 if (cur_node.host_start == (uintptr_t) NULL)
323 cur_node.tgt_offset = (uintptr_t) NULL;
324 /* FIXME: see above FIXME comment. */
325 devicep->host2dev_func (devicep->target_id,
326 (void *) (tgt->tgt_start
327 + k->tgt_offset),
328 (void *) &cur_node.tgt_offset,
329 sizeof (void *));
330 break;
332 /* Add bias to the pointer value. */
333 cur_node.host_start += sizes[i];
334 cur_node.host_end = cur_node.host_start + 1;
335 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
336 if (n == NULL)
338 /* Could be possibly zero size array section. */
339 cur_node.host_end--;
340 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
341 if (n == NULL)
343 cur_node.host_start--;
344 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
345 cur_node.host_start++;
348 if (n == NULL)
349 gomp_fatal ("Pointer target of array section "
350 "wasn't mapped");
351 cur_node.host_start -= n->host_start;
352 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
353 + cur_node.host_start;
354 /* At this point tgt_offset is target address of the
355 array section. Now subtract bias to get what we want
356 to initialize the pointer with. */
357 cur_node.tgt_offset -= sizes[i];
358 /* FIXME: see above FIXME comment. */
359 devicep->host2dev_func (devicep->target_id,
360 (void *) (tgt->tgt_start
361 + k->tgt_offset),
362 (void *) &cur_node.tgt_offset,
363 sizeof (void *));
364 break;
365 case GOMP_MAP_TO_PSET:
366 /* FIXME: see above FIXME comment. */
367 devicep->host2dev_func (devicep->target_id,
368 (void *) (tgt->tgt_start
369 + k->tgt_offset),
370 (void *) k->host_start,
371 k->host_end - k->host_start);
373 for (j = i + 1; j < mapnum; j++)
374 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
375 & typemask))
376 break;
377 else if ((uintptr_t) hostaddrs[j] < k->host_start
378 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
379 > k->host_end))
380 break;
381 else
383 tgt->list[j] = k;
384 k->refcount++;
385 cur_node.host_start
386 = (uintptr_t) *(void **) hostaddrs[j];
387 if (cur_node.host_start == (uintptr_t) NULL)
389 cur_node.tgt_offset = (uintptr_t) NULL;
390 /* FIXME: see above FIXME comment. */
391 devicep->host2dev_func (devicep->target_id,
392 (void *) (tgt->tgt_start + k->tgt_offset
393 + ((uintptr_t) hostaddrs[j]
394 - k->host_start)),
395 (void *) &cur_node.tgt_offset,
396 sizeof (void *));
397 i++;
398 continue;
400 /* Add bias to the pointer value. */
401 cur_node.host_start += sizes[j];
402 cur_node.host_end = cur_node.host_start + 1;
403 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
404 if (n == NULL)
406 /* Could be possibly zero size array section. */
407 cur_node.host_end--;
408 n = splay_tree_lookup (&mm->splay_tree,
409 &cur_node);
410 if (n == NULL)
412 cur_node.host_start--;
413 n = splay_tree_lookup (&mm->splay_tree,
414 &cur_node);
415 cur_node.host_start++;
418 if (n == NULL)
419 gomp_fatal ("Pointer target of array section "
420 "wasn't mapped");
421 cur_node.host_start -= n->host_start;
422 cur_node.tgt_offset = n->tgt->tgt_start
423 + n->tgt_offset
424 + cur_node.host_start;
425 /* At this point tgt_offset is target address of the
426 array section. Now subtract bias to get what we
427 want to initialize the pointer with. */
428 cur_node.tgt_offset -= sizes[j];
429 /* FIXME: see above FIXME comment. */
430 devicep->host2dev_func (devicep->target_id,
431 (void *) (tgt->tgt_start + k->tgt_offset
432 + ((uintptr_t) hostaddrs[j]
433 - k->host_start)),
434 (void *) &cur_node.tgt_offset,
435 sizeof (void *));
436 i++;
438 break;
439 case GOMP_MAP_FORCE_PRESENT:
441 /* We already looked up the memory region above and it
442 was missing. */
443 size_t size = k->host_end - k->host_start;
444 #ifdef HAVE_INTTYPES_H
445 gomp_fatal ("present clause: !acc_is_present (%p, "
446 "%"PRIu64" (0x%"PRIx64"))",
447 (void *) k->host_start,
448 (uint64_t) size, (uint64_t) size);
449 #else
450 gomp_fatal ("present clause: !acc_is_present (%p, "
451 "%lu (0x%lx))", (void *) k->host_start,
452 (unsigned long) size, (unsigned long) size);
453 #endif
455 break;
456 case GOMP_MAP_FORCE_DEVICEPTR:
457 assert (k->host_end - k->host_start == sizeof (void *));
459 devicep->host2dev_func (devicep->target_id,
460 (void *) (tgt->tgt_start
461 + k->tgt_offset),
462 (void *) k->host_start,
463 sizeof (void *));
464 break;
465 default:
466 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
467 kind);
469 array++;
474 if (is_target)
476 for (i = 0; i < mapnum; i++)
478 if (tgt->list[i] == NULL)
479 cur_node.tgt_offset = (uintptr_t) NULL;
480 else
481 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
482 + tgt->list[i]->tgt_offset;
483 /* FIXME: see above FIXME comment. */
484 devicep->host2dev_func (devicep->target_id,
485 (void *) (tgt->tgt_start
486 + i * sizeof (void *)),
487 (void *) &cur_node.tgt_offset,
488 sizeof (void *));
492 gomp_mutex_unlock (&mm->lock);
493 return tgt;
496 static void
497 gomp_unmap_tgt (struct target_mem_desc *tgt)
499 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
500 if (tgt->tgt_end)
501 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
503 free (tgt->array);
504 free (tgt);
507 /* Decrease the refcount for a set of mapped variables, and queue asychronous
508 copies from the device back to the host after any work that has been issued.
509 Because the regions are still "live", increment an asynchronous reference
510 count to indicate that they should not be unmapped from host-side data
511 structures until the asynchronous copy has completed. */
513 attribute_hidden void
514 gomp_copy_from_async (struct target_mem_desc *tgt)
516 struct gomp_device_descr *devicep = tgt->device_descr;
517 struct gomp_memory_mapping *mm = tgt->mem_map;
518 size_t i;
520 gomp_mutex_lock (&mm->lock);
522 for (i = 0; i < tgt->list_count; i++)
523 if (tgt->list[i] == NULL)
525 else if (tgt->list[i]->refcount > 1)
527 tgt->list[i]->refcount--;
528 tgt->list[i]->async_refcount++;
530 else
532 splay_tree_key k = tgt->list[i];
533 if (k->copy_from)
534 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
535 (void *) (k->tgt->tgt_start + k->tgt_offset),
536 k->host_end - k->host_start);
539 gomp_mutex_unlock (&mm->lock);
542 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
543 variables back from device to host: if it is false, it is assumed that this
544 has been done already, i.e. by gomp_copy_from_async above. */
546 attribute_hidden void
547 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
549 struct gomp_device_descr *devicep = tgt->device_descr;
550 struct gomp_memory_mapping *mm = tgt->mem_map;
552 if (tgt->list_count == 0)
554 free (tgt);
555 return;
558 gomp_mutex_lock (&mm->lock);
560 size_t i;
561 for (i = 0; i < tgt->list_count; i++)
562 if (tgt->list[i] == NULL)
564 else if (tgt->list[i]->refcount > 1)
565 tgt->list[i]->refcount--;
566 else if (tgt->list[i]->async_refcount > 0)
567 tgt->list[i]->async_refcount--;
568 else
570 splay_tree_key k = tgt->list[i];
571 if (k->copy_from && do_copyfrom)
572 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
573 (void *) (k->tgt->tgt_start + k->tgt_offset),
574 k->host_end - k->host_start);
575 splay_tree_remove (&mm->splay_tree, k);
576 if (k->tgt->refcount > 1)
577 k->tgt->refcount--;
578 else
579 gomp_unmap_tgt (k->tgt);
582 if (tgt->refcount > 1)
583 tgt->refcount--;
584 else
585 gomp_unmap_tgt (tgt);
587 gomp_mutex_unlock (&mm->lock);
590 static void
591 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
592 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
593 bool is_openacc)
595 size_t i;
596 struct splay_tree_key_s cur_node;
597 const int typemask = is_openacc ? 0xff : 0x7;
599 if (!devicep)
600 return;
602 if (mapnum == 0)
603 return;
605 gomp_mutex_lock (&mm->lock);
606 for (i = 0; i < mapnum; i++)
607 if (sizes[i])
609 cur_node.host_start = (uintptr_t) hostaddrs[i];
610 cur_node.host_end = cur_node.host_start + sizes[i];
611 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
612 &cur_node);
613 if (n)
615 int kind = get_kind (is_openacc, kinds, i);
616 if (n->host_start > cur_node.host_start
617 || n->host_end < cur_node.host_end)
618 gomp_fatal ("Trying to update [%p..%p) object when"
619 "only [%p..%p) is mapped",
620 (void *) cur_node.host_start,
621 (void *) cur_node.host_end,
622 (void *) n->host_start,
623 (void *) n->host_end);
624 if (GOMP_MAP_COPY_TO_P (kind & typemask))
625 devicep->host2dev_func (devicep->target_id,
626 (void *) (n->tgt->tgt_start
627 + n->tgt_offset
628 + cur_node.host_start
629 - n->host_start),
630 (void *) cur_node.host_start,
631 cur_node.host_end - cur_node.host_start);
632 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
633 devicep->dev2host_func (devicep->target_id,
634 (void *) cur_node.host_start,
635 (void *) (n->tgt->tgt_start
636 + n->tgt_offset
637 + cur_node.host_start
638 - n->host_start),
639 cur_node.host_end - cur_node.host_start);
641 else
642 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
643 (void *) cur_node.host_start,
644 (void *) cur_node.host_end);
646 gomp_mutex_unlock (&mm->lock);
649 /* This function should be called from every offload image.
650 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
651 the target, and TARGET_DATA needed by target plugin. */
653 void
654 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
655 void *target_data)
657 offload_images = gomp_realloc (offload_images,
658 (num_offload_images + 1)
659 * sizeof (struct offload_image_descr));
661 offload_images[num_offload_images].type = target_type;
662 offload_images[num_offload_images].host_table = host_table;
663 offload_images[num_offload_images].target_data = target_data;
665 num_offload_images++;
668 /* This function initializes the target device, specified by DEVICEP. DEVICEP
669 must be locked on entry, and remains locked on return. */
671 attribute_hidden void
672 gomp_init_device (struct gomp_device_descr *devicep)
674 devicep->init_device_func (devicep->target_id);
675 devicep->is_initialized = true;
678 /* Initialize address mapping tables. MM must be locked on entry, and remains
679 locked on return. */
681 attribute_hidden void
682 gomp_init_tables (struct gomp_device_descr *devicep,
683 struct gomp_memory_mapping *mm)
685 /* Get address mapping table for device. */
686 struct mapping_table *table = NULL;
687 int num_entries = devicep->get_table_func (devicep->target_id, &table);
689 /* Insert host-target address mapping into dev_splay_tree. */
690 int i;
691 for (i = 0; i < num_entries; i++)
693 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
694 tgt->refcount = 1;
695 tgt->array = gomp_malloc (sizeof (*tgt->array));
696 tgt->tgt_start = table[i].tgt_start;
697 tgt->tgt_end = table[i].tgt_end;
698 tgt->to_free = NULL;
699 tgt->list_count = 0;
700 tgt->device_descr = devicep;
701 splay_tree_node node = tgt->array;
702 splay_tree_key k = &node->key;
703 k->host_start = table[i].host_start;
704 k->host_end = table[i].host_end;
705 k->tgt_offset = 0;
706 k->refcount = 1;
707 k->copy_from = false;
708 k->tgt = tgt;
709 node->left = NULL;
710 node->right = NULL;
711 splay_tree_insert (&mm->splay_tree, node);
714 free (table);
715 mm->is_initialized = true;
718 /* Free address mapping tables. MM must be locked on entry, and remains locked
719 on return. */
721 attribute_hidden void
722 gomp_free_memmap (struct gomp_memory_mapping *mm)
724 while (mm->splay_tree.root)
726 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
728 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
729 free (tgt->array);
730 free (tgt);
733 mm->is_initialized = false;
736 /* This function de-initializes the target device, specified by DEVICEP.
737 DEVICEP must be locked on entry, and remains locked on return. */
739 attribute_hidden void
740 gomp_fini_device (struct gomp_device_descr *devicep)
742 if (devicep->is_initialized)
743 devicep->fini_device_func (devicep->target_id);
745 devicep->is_initialized = false;
748 /* Called when encountering a target directive. If DEVICE
749 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
750 GOMP_DEVICE_HOST_FALLBACK (or any value
751 larger than last available hw device), use host fallback.
752 FN is address of host code, UNUSED is part of the current ABI, but
753 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
754 with MAPNUM entries, with addresses of the host objects,
755 sizes of the host objects (resp. for pointer kind pointer bias
756 and assumed sizeof (void *) size) and kinds. */
758 void
759 GOMP_target (int device, void (*fn) (void *), const void *unused,
760 size_t mapnum, void **hostaddrs, size_t *sizes,
761 unsigned char *kinds)
763 struct gomp_device_descr *devicep = resolve_device (device);
765 if (devicep == NULL
766 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
768 /* Host fallback. */
769 struct gomp_thread old_thr, *thr = gomp_thread ();
770 old_thr = *thr;
771 memset (thr, '\0', sizeof (*thr));
772 if (gomp_places_list)
774 thr->place = old_thr.place;
775 thr->ts.place_partition_len = gomp_places_list_len;
777 fn (hostaddrs);
778 gomp_free_thread (thr);
779 *thr = old_thr;
780 return;
783 gomp_mutex_lock (&devicep->lock);
784 if (!devicep->is_initialized)
785 gomp_init_device (devicep);
786 gomp_mutex_unlock (&devicep->lock);
788 void *fn_addr;
790 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
791 fn_addr = (void *) fn;
792 else
794 struct gomp_memory_mapping *mm = &devicep->mem_map;
795 gomp_mutex_lock (&mm->lock);
797 if (!mm->is_initialized)
798 gomp_init_tables (devicep, mm);
800 struct splay_tree_key_s k;
801 k.host_start = (uintptr_t) fn;
802 k.host_end = k.host_start + 1;
803 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
804 if (tgt_fn == NULL)
805 gomp_fatal ("Target function wasn't mapped");
807 gomp_mutex_unlock (&mm->lock);
809 fn_addr = (void *) tgt_fn->tgt->tgt_start;
812 struct target_mem_desc *tgt_vars
813 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
814 true);
815 struct gomp_thread old_thr, *thr = gomp_thread ();
816 old_thr = *thr;
817 memset (thr, '\0', sizeof (*thr));
818 if (gomp_places_list)
820 thr->place = old_thr.place;
821 thr->ts.place_partition_len = gomp_places_list_len;
823 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
824 gomp_free_thread (thr);
825 *thr = old_thr;
826 gomp_unmap_vars (tgt_vars, true);
829 void
830 GOMP_target_data (int device, const void *unused, size_t mapnum,
831 void **hostaddrs, size_t *sizes, unsigned char *kinds)
833 struct gomp_device_descr *devicep = resolve_device (device);
835 if (devicep == NULL
836 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
838 /* Host fallback. */
839 struct gomp_task_icv *icv = gomp_icv (false);
840 if (icv->target_data)
842 /* Even when doing a host fallback, if there are any active
843 #pragma omp target data constructs, need to remember the
844 new #pragma omp target data, otherwise GOMP_target_end_data
845 would get out of sync. */
846 struct target_mem_desc *tgt
847 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
848 tgt->prev = icv->target_data;
849 icv->target_data = tgt;
851 return;
854 gomp_mutex_lock (&devicep->lock);
855 if (!devicep->is_initialized)
856 gomp_init_device (devicep);
857 gomp_mutex_unlock (&devicep->lock);
859 struct gomp_memory_mapping *mm = &devicep->mem_map;
860 gomp_mutex_lock (&mm->lock);
861 if (!mm->is_initialized)
862 gomp_init_tables (devicep, mm);
863 gomp_mutex_unlock (&mm->lock);
865 struct target_mem_desc *tgt
866 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
867 false);
868 struct gomp_task_icv *icv = gomp_icv (true);
869 tgt->prev = icv->target_data;
870 icv->target_data = tgt;
873 void
874 GOMP_target_end_data (void)
876 struct gomp_task_icv *icv = gomp_icv (false);
877 if (icv->target_data)
879 struct target_mem_desc *tgt = icv->target_data;
880 icv->target_data = tgt->prev;
881 gomp_unmap_vars (tgt, true);
885 void
886 GOMP_target_update (int device, const void *unused, size_t mapnum,
887 void **hostaddrs, size_t *sizes, unsigned char *kinds)
889 struct gomp_device_descr *devicep = resolve_device (device);
891 if (devicep == NULL
892 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
893 return;
895 gomp_mutex_lock (&devicep->lock);
896 if (!devicep->is_initialized)
897 gomp_init_device (devicep);
898 gomp_mutex_unlock (&devicep->lock);
900 struct gomp_memory_mapping *mm = &devicep->mem_map;
901 gomp_mutex_lock (&mm->lock);
902 if (!mm->is_initialized)
903 gomp_init_tables (devicep, mm);
904 gomp_mutex_unlock (&mm->lock);
906 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
909 void
910 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
912 if (thread_limit)
914 struct gomp_task_icv *icv = gomp_icv (true);
915 icv->thread_limit_var
916 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
918 (void) num_teams;
921 #ifdef PLUGIN_SUPPORT
923 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
924 in PLUGIN_NAME.
925 The handles of the found functions are stored in the corresponding fields
926 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
928 static bool
929 gomp_load_plugin_for_device (struct gomp_device_descr *device,
930 const char *plugin_name)
932 const char *err = NULL, *last_missing = NULL;
933 int optional_present, optional_total;
935 /* Clear any existing error. */
936 dlerror ();
938 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
939 if (!plugin_handle)
941 err = dlerror ();
942 goto out;
945 /* Check if all required functions are available in the plugin and store
946 their handlers. */
947 #define DLSYM(f) \
948 do \
950 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
951 err = dlerror (); \
952 if (err != NULL) \
953 goto out; \
955 while (0)
956 /* Similar, but missing functions are not an error. */
957 #define DLSYM_OPT(f, n) \
958 do \
960 const char *tmp_err; \
961 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
962 tmp_err = dlerror (); \
963 if (tmp_err == NULL) \
964 optional_present++; \
965 else \
966 last_missing = #n; \
967 optional_total++; \
969 while (0)
971 DLSYM (get_name);
972 DLSYM (get_caps);
973 DLSYM (get_type);
974 DLSYM (get_num_devices);
975 DLSYM (register_image);
976 DLSYM (init_device);
977 DLSYM (fini_device);
978 DLSYM (get_table);
979 DLSYM (alloc);
980 DLSYM (free);
981 DLSYM (dev2host);
982 DLSYM (host2dev);
983 device->capabilities = device->get_caps_func ();
984 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
985 DLSYM (run);
986 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
988 optional_present = optional_total = 0;
989 DLSYM_OPT (openacc.exec, openacc_parallel);
990 DLSYM_OPT (openacc.open_device, openacc_open_device);
991 DLSYM_OPT (openacc.close_device, openacc_close_device);
992 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
993 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
994 DLSYM_OPT (openacc.register_async_cleanup,
995 openacc_register_async_cleanup);
996 DLSYM_OPT (openacc.async_test, openacc_async_test);
997 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
998 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
999 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1000 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1001 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1002 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1003 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1004 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1005 /* Require all the OpenACC handlers if we have
1006 GOMP_OFFLOAD_CAP_OPENACC_200. */
1007 if (optional_present != optional_total)
1009 err = "plugin missing OpenACC handler function";
1010 goto out;
1012 optional_present = optional_total = 0;
1013 DLSYM_OPT (openacc.cuda.get_current_device,
1014 openacc_get_current_cuda_device);
1015 DLSYM_OPT (openacc.cuda.get_current_context,
1016 openacc_get_current_cuda_context);
1017 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1018 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1019 /* Make sure all the CUDA functions are there if any of them are. */
1020 if (optional_present && optional_present != optional_total)
1022 err = "plugin missing OpenACC CUDA handler function";
1023 goto out;
1026 #undef DLSYM
1027 #undef DLSYM_OPT
1029 out:
1030 if (err != NULL)
1032 gomp_error ("while loading %s: %s", plugin_name, err);
1033 if (last_missing)
1034 gomp_error ("missing function was %s", last_missing);
1035 if (plugin_handle)
1036 dlclose (plugin_handle);
1038 return err == NULL;
1041 /* This function adds a compatible offload image IMAGE to an accelerator device
1042 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1044 static void
1045 gomp_register_image_for_device (struct gomp_device_descr *device,
1046 struct offload_image_descr *image)
1048 if (!device->offload_regions_registered
1049 && (device->type == image->type
1050 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1052 device->register_image_func (image->host_table, image->target_data);
1053 device->offload_regions_registered = true;
1057 /* This function initializes the runtime needed for offloading.
1058 It parses the list of offload targets and tries to load the plugins for
1059 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1060 will be set, and the array DEVICES initialized, containing descriptors for
1061 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1062 by the others. */
1064 static void
1065 gomp_target_init (void)
1067 const char *prefix ="libgomp-plugin-";
1068 const char *suffix = SONAME_SUFFIX (1);
1069 const char *cur, *next;
1070 char *plugin_name;
1071 int i, new_num_devices;
1073 num_devices = 0;
1074 devices = NULL;
1076 cur = OFFLOAD_TARGETS;
1077 if (*cur)
1080 struct gomp_device_descr current_device;
1082 next = strchr (cur, ',');
1084 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1085 + strlen (prefix) + strlen (suffix));
1086 if (!plugin_name)
1088 num_devices = 0;
1089 break;
1092 strcpy (plugin_name, prefix);
1093 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1094 strcat (plugin_name, suffix);
1096 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1098 new_num_devices = current_device.get_num_devices_func ();
1099 if (new_num_devices >= 1)
1101 /* Augment DEVICES and NUM_DEVICES. */
1103 devices = realloc (devices, (num_devices + new_num_devices)
1104 * sizeof (struct gomp_device_descr));
1105 if (!devices)
1107 num_devices = 0;
1108 free (plugin_name);
1109 break;
1112 current_device.name = current_device.get_name_func ();
1113 /* current_device.capabilities has already been set. */
1114 current_device.type = current_device.get_type_func ();
1115 current_device.mem_map.is_initialized = false;
1116 current_device.mem_map.splay_tree.root = NULL;
1117 current_device.is_initialized = false;
1118 current_device.offload_regions_registered = false;
1119 current_device.openacc.data_environ = NULL;
1120 current_device.openacc.target_data = NULL;
1121 for (i = 0; i < new_num_devices; i++)
1123 current_device.target_id = i;
1124 devices[num_devices] = current_device;
1125 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1126 gomp_mutex_init (&devices[num_devices].lock);
1127 num_devices++;
1132 free (plugin_name);
1133 cur = next + 1;
1135 while (next);
1137 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1138 NUM_DEVICES_OPENMP. */
1139 struct gomp_device_descr *devices_s
1140 = malloc (num_devices * sizeof (struct gomp_device_descr));
1141 if (!devices_s)
1143 num_devices = 0;
1144 free (devices);
1145 devices = NULL;
1147 num_devices_openmp = 0;
1148 for (i = 0; i < num_devices; i++)
1149 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1150 devices_s[num_devices_openmp++] = devices[i];
1151 int num_devices_after_openmp = num_devices_openmp;
1152 for (i = 0; i < num_devices; i++)
1153 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1154 devices_s[num_devices_after_openmp++] = devices[i];
1155 free (devices);
1156 devices = devices_s;
1158 for (i = 0; i < num_devices; i++)
1160 int j;
1162 for (j = 0; j < num_offload_images; j++)
1163 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1165 /* The 'devices' array can be moved (by the realloc call) until we have
1166 found all the plugins, so registering with the OpenACC runtime (which
1167 takes a copy of the pointer argument) must be delayed until now. */
1168 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1169 goacc_register (&devices[i]);
1172 free (offload_images);
1173 offload_images = NULL;
1174 num_offload_images = 0;
1177 #else /* PLUGIN_SUPPORT */
1178 /* If dlfcn.h is unavailable we always fallback to host execution.
1179 GOMP_target* routines are just stubs for this case. */
1180 static void
1181 gomp_target_init (void)
1184 #endif /* PLUGIN_SUPPORT */