2015-02-22 Arnaud Charlet <charlet@adacore.com>
[official-gcc.git] / libgomp / target.c
blob50baa4d1319e5139be660e9a99e2e7f70f9c9bbe
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
39 #ifdef PLUGIN_SUPPORT
40 #include <dlfcn.h>
41 #include "plugin-suffix.h"
42 #endif
44 static void gomp_target_init (void);
46 /* The whole initialization code for offloading plugins is only run one. */
47 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
49 /* This structure describes an offload image.
50 It contains type of the target device, pointer to host table descriptor, and
51 pointer to target data. */
52 struct offload_image_descr {
53 enum offload_target_type type;
54 void *host_table;
55 void *target_data;
58 /* Array of descriptors of offload images. */
59 static struct offload_image_descr *offload_images;
61 /* Total number of offload images. */
62 static int num_offload_images;
64 /* Array of descriptors for all available devices. */
65 static struct gomp_device_descr *devices;
67 #ifdef PLUGIN_SUPPORT
68 /* Total number of available devices. */
69 static int num_devices;
70 #endif
72 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
73 static int num_devices_openmp;
75 /* The comparison function. */
77 attribute_hidden int
78 splay_compare (splay_tree_key x, splay_tree_key y)
80 if (x->host_start == x->host_end
81 && y->host_start == y->host_end)
82 return 0;
83 if (x->host_end <= y->host_start)
84 return -1;
85 if (x->host_start >= y->host_end)
86 return 1;
87 return 0;
90 #include "splay-tree.h"
92 attribute_hidden void
93 gomp_init_targets_once (void)
95 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
98 attribute_hidden int
99 gomp_get_num_devices (void)
101 gomp_init_targets_once ();
102 return num_devices_openmp;
105 static struct gomp_device_descr *
106 resolve_device (int device_id)
108 if (device_id == GOMP_DEVICE_ICV)
110 struct gomp_task_icv *icv = gomp_icv (false);
111 device_id = icv->default_device_var;
114 if (device_id < 0 || device_id >= gomp_get_num_devices ())
115 return NULL;
117 return &devices[device_id];
121 /* Handle the case where splay_tree_lookup found oldn for newn.
122 Helper function of gomp_map_vars. */
124 static inline void
125 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
126 unsigned char kind)
128 if ((kind & GOMP_MAP_FLAG_FORCE)
129 || oldn->host_start > newn->host_start
130 || oldn->host_end < newn->host_end)
131 gomp_fatal ("Trying to map into device [%p..%p) object when "
132 "[%p..%p) is already mapped",
133 (void *) newn->host_start, (void *) newn->host_end,
134 (void *) oldn->host_start, (void *) oldn->host_end);
135 oldn->refcount++;
138 static int
139 get_kind (bool is_openacc, void *kinds, int idx)
141 return is_openacc ? ((unsigned short *) kinds)[idx]
142 : ((unsigned char *) kinds)[idx];
145 attribute_hidden struct target_mem_desc *
146 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
147 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
148 bool is_openacc, bool is_target)
150 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
151 const int rshift = is_openacc ? 8 : 3;
152 const int typemask = is_openacc ? 0xff : 0x7;
153 struct gomp_memory_mapping *mm = &devicep->mem_map;
154 struct splay_tree_key_s cur_node;
155 struct target_mem_desc *tgt
156 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
157 tgt->list_count = mapnum;
158 tgt->refcount = 1;
159 tgt->device_descr = devicep;
160 tgt->mem_map = mm;
162 if (mapnum == 0)
163 return tgt;
165 tgt_align = sizeof (void *);
166 tgt_size = 0;
167 if (is_target)
169 size_t align = 4 * sizeof (void *);
170 tgt_align = align;
171 tgt_size = mapnum * sizeof (void *);
174 gomp_mutex_lock (&mm->lock);
176 for (i = 0; i < mapnum; i++)
178 int kind = get_kind (is_openacc, kinds, i);
179 if (hostaddrs[i] == NULL)
181 tgt->list[i] = NULL;
182 continue;
184 cur_node.host_start = (uintptr_t) hostaddrs[i];
185 if (!GOMP_MAP_POINTER_P (kind & typemask))
186 cur_node.host_end = cur_node.host_start + sizes[i];
187 else
188 cur_node.host_end = cur_node.host_start + sizeof (void *);
189 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
190 if (n)
192 tgt->list[i] = n;
193 gomp_map_vars_existing (n, &cur_node, kind & typemask);
195 else
197 tgt->list[i] = NULL;
199 size_t align = (size_t) 1 << (kind >> rshift);
200 not_found_cnt++;
201 if (tgt_align < align)
202 tgt_align = align;
203 tgt_size = (tgt_size + align - 1) & ~(align - 1);
204 tgt_size += cur_node.host_end - cur_node.host_start;
205 if ((kind & typemask) == GOMP_MAP_TO_PSET)
207 size_t j;
208 for (j = i + 1; j < mapnum; j++)
209 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
210 & typemask))
211 break;
212 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
213 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
214 > cur_node.host_end))
215 break;
216 else
218 tgt->list[j] = NULL;
219 i++;
225 if (devaddrs)
227 if (mapnum != 1)
228 gomp_fatal ("unexpected aggregation");
229 tgt->to_free = devaddrs[0];
230 tgt->tgt_start = (uintptr_t) tgt->to_free;
231 tgt->tgt_end = tgt->tgt_start + sizes[0];
233 else if (not_found_cnt || is_target)
235 /* Allocate tgt_align aligned tgt_size block of memory. */
236 /* FIXME: Perhaps change interface to allocate properly aligned
237 memory. */
238 tgt->to_free = devicep->alloc_func (devicep->target_id,
239 tgt_size + tgt_align - 1);
240 tgt->tgt_start = (uintptr_t) tgt->to_free;
241 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
242 tgt->tgt_end = tgt->tgt_start + tgt_size;
244 else
246 tgt->to_free = NULL;
247 tgt->tgt_start = 0;
248 tgt->tgt_end = 0;
251 tgt_size = 0;
252 if (is_target)
253 tgt_size = mapnum * sizeof (void *);
255 tgt->array = NULL;
256 if (not_found_cnt)
258 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
259 splay_tree_node array = tgt->array;
260 size_t j;
262 for (i = 0; i < mapnum; i++)
263 if (tgt->list[i] == NULL)
265 int kind = get_kind (is_openacc, kinds, i);
266 if (hostaddrs[i] == NULL)
267 continue;
268 splay_tree_key k = &array->key;
269 k->host_start = (uintptr_t) hostaddrs[i];
270 if (!GOMP_MAP_POINTER_P (kind & typemask))
271 k->host_end = k->host_start + sizes[i];
272 else
273 k->host_end = k->host_start + sizeof (void *);
274 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
275 if (n)
277 tgt->list[i] = n;
278 gomp_map_vars_existing (n, k, kind & typemask);
280 else
282 size_t align = (size_t) 1 << (kind >> rshift);
283 tgt->list[i] = k;
284 tgt_size = (tgt_size + align - 1) & ~(align - 1);
285 k->tgt = tgt;
286 k->tgt_offset = tgt_size;
287 tgt_size += k->host_end - k->host_start;
288 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
289 k->refcount = 1;
290 k->async_refcount = 0;
291 tgt->refcount++;
292 array->left = NULL;
293 array->right = NULL;
294 splay_tree_insert (&mm->splay_tree, array);
295 switch (kind & typemask)
297 case GOMP_MAP_ALLOC:
298 case GOMP_MAP_FROM:
299 case GOMP_MAP_FORCE_ALLOC:
300 case GOMP_MAP_FORCE_FROM:
301 break;
302 case GOMP_MAP_TO:
303 case GOMP_MAP_TOFROM:
304 case GOMP_MAP_FORCE_TO:
305 case GOMP_MAP_FORCE_TOFROM:
306 /* FIXME: Perhaps add some smarts, like if copying
307 several adjacent fields from host to target, use some
308 host buffer to avoid sending each var individually. */
309 devicep->host2dev_func (devicep->target_id,
310 (void *) (tgt->tgt_start
311 + k->tgt_offset),
312 (void *) k->host_start,
313 k->host_end - k->host_start);
314 break;
315 case GOMP_MAP_POINTER:
316 cur_node.host_start
317 = (uintptr_t) *(void **) k->host_start;
318 if (cur_node.host_start == (uintptr_t) NULL)
320 cur_node.tgt_offset = (uintptr_t) NULL;
321 /* FIXME: see above FIXME comment. */
322 devicep->host2dev_func (devicep->target_id,
323 (void *) (tgt->tgt_start
324 + k->tgt_offset),
325 (void *) &cur_node.tgt_offset,
326 sizeof (void *));
327 break;
329 /* Add bias to the pointer value. */
330 cur_node.host_start += sizes[i];
331 cur_node.host_end = cur_node.host_start + 1;
332 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
333 if (n == NULL)
335 /* Could be possibly zero size array section. */
336 cur_node.host_end--;
337 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
338 if (n == NULL)
340 cur_node.host_start--;
341 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
342 cur_node.host_start++;
345 if (n == NULL)
346 gomp_fatal ("Pointer target of array section "
347 "wasn't mapped");
348 cur_node.host_start -= n->host_start;
349 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
350 + cur_node.host_start;
351 /* At this point tgt_offset is target address of the
352 array section. Now subtract bias to get what we want
353 to initialize the pointer with. */
354 cur_node.tgt_offset -= sizes[i];
355 /* FIXME: see above FIXME comment. */
356 devicep->host2dev_func (devicep->target_id,
357 (void *) (tgt->tgt_start
358 + k->tgt_offset),
359 (void *) &cur_node.tgt_offset,
360 sizeof (void *));
361 break;
362 case GOMP_MAP_TO_PSET:
363 /* FIXME: see above FIXME comment. */
364 devicep->host2dev_func (devicep->target_id,
365 (void *) (tgt->tgt_start
366 + k->tgt_offset),
367 (void *) k->host_start,
368 k->host_end - k->host_start);
370 for (j = i + 1; j < mapnum; j++)
371 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
372 & typemask))
373 break;
374 else if ((uintptr_t) hostaddrs[j] < k->host_start
375 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
376 > k->host_end))
377 break;
378 else
380 tgt->list[j] = k;
381 k->refcount++;
382 cur_node.host_start
383 = (uintptr_t) *(void **) hostaddrs[j];
384 if (cur_node.host_start == (uintptr_t) NULL)
386 cur_node.tgt_offset = (uintptr_t) NULL;
387 /* FIXME: see above FIXME comment. */
388 devicep->host2dev_func (devicep->target_id,
389 (void *) (tgt->tgt_start + k->tgt_offset
390 + ((uintptr_t) hostaddrs[j]
391 - k->host_start)),
392 (void *) &cur_node.tgt_offset,
393 sizeof (void *));
394 i++;
395 continue;
397 /* Add bias to the pointer value. */
398 cur_node.host_start += sizes[j];
399 cur_node.host_end = cur_node.host_start + 1;
400 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
401 if (n == NULL)
403 /* Could be possibly zero size array section. */
404 cur_node.host_end--;
405 n = splay_tree_lookup (&mm->splay_tree,
406 &cur_node);
407 if (n == NULL)
409 cur_node.host_start--;
410 n = splay_tree_lookup (&mm->splay_tree,
411 &cur_node);
412 cur_node.host_start++;
415 if (n == NULL)
416 gomp_fatal ("Pointer target of array section "
417 "wasn't mapped");
418 cur_node.host_start -= n->host_start;
419 cur_node.tgt_offset = n->tgt->tgt_start
420 + n->tgt_offset
421 + cur_node.host_start;
422 /* At this point tgt_offset is target address of the
423 array section. Now subtract bias to get what we
424 want to initialize the pointer with. */
425 cur_node.tgt_offset -= sizes[j];
426 /* FIXME: see above FIXME comment. */
427 devicep->host2dev_func (devicep->target_id,
428 (void *) (tgt->tgt_start + k->tgt_offset
429 + ((uintptr_t) hostaddrs[j]
430 - k->host_start)),
431 (void *) &cur_node.tgt_offset,
432 sizeof (void *));
433 i++;
435 break;
436 case GOMP_MAP_FORCE_PRESENT:
438 /* We already looked up the memory region above and it
439 was missing. */
440 size_t size = k->host_end - k->host_start;
441 gomp_fatal ("present clause: !acc_is_present (%p, "
442 "%zd (0x%zx))", (void *) k->host_start,
443 size, size);
445 break;
446 case GOMP_MAP_FORCE_DEVICEPTR:
447 assert (k->host_end - k->host_start == sizeof (void *));
449 devicep->host2dev_func (devicep->target_id,
450 (void *) (tgt->tgt_start
451 + k->tgt_offset),
452 (void *) k->host_start,
453 sizeof (void *));
454 break;
455 default:
456 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
457 kind);
459 array++;
464 if (is_target)
466 for (i = 0; i < mapnum; i++)
468 if (tgt->list[i] == NULL)
469 cur_node.tgt_offset = (uintptr_t) NULL;
470 else
471 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
472 + tgt->list[i]->tgt_offset;
473 /* FIXME: see above FIXME comment. */
474 devicep->host2dev_func (devicep->target_id,
475 (void *) (tgt->tgt_start
476 + i * sizeof (void *)),
477 (void *) &cur_node.tgt_offset,
478 sizeof (void *));
482 gomp_mutex_unlock (&mm->lock);
483 return tgt;
486 static void
487 gomp_unmap_tgt (struct target_mem_desc *tgt)
489 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
490 if (tgt->tgt_end)
491 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
493 free (tgt->array);
494 free (tgt);
497 /* Decrease the refcount for a set of mapped variables, and queue asychronous
498 copies from the device back to the host after any work that has been issued.
499 Because the regions are still "live", increment an asynchronous reference
500 count to indicate that they should not be unmapped from host-side data
501 structures until the asynchronous copy has completed. */
503 attribute_hidden void
504 gomp_copy_from_async (struct target_mem_desc *tgt)
506 struct gomp_device_descr *devicep = tgt->device_descr;
507 struct gomp_memory_mapping *mm = tgt->mem_map;
508 size_t i;
510 gomp_mutex_lock (&mm->lock);
512 for (i = 0; i < tgt->list_count; i++)
513 if (tgt->list[i] == NULL)
515 else if (tgt->list[i]->refcount > 1)
517 tgt->list[i]->refcount--;
518 tgt->list[i]->async_refcount++;
520 else
522 splay_tree_key k = tgt->list[i];
523 if (k->copy_from)
524 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
525 (void *) (k->tgt->tgt_start + k->tgt_offset),
526 k->host_end - k->host_start);
529 gomp_mutex_unlock (&mm->lock);
532 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
533 variables back from device to host: if it is false, it is assumed that this
534 has been done already, i.e. by gomp_copy_from_async above. */
536 attribute_hidden void
537 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
539 struct gomp_device_descr *devicep = tgt->device_descr;
540 struct gomp_memory_mapping *mm = tgt->mem_map;
542 if (tgt->list_count == 0)
544 free (tgt);
545 return;
548 gomp_mutex_lock (&mm->lock);
550 size_t i;
551 for (i = 0; i < tgt->list_count; i++)
552 if (tgt->list[i] == NULL)
554 else if (tgt->list[i]->refcount > 1)
555 tgt->list[i]->refcount--;
556 else if (tgt->list[i]->async_refcount > 0)
557 tgt->list[i]->async_refcount--;
558 else
560 splay_tree_key k = tgt->list[i];
561 if (k->copy_from && do_copyfrom)
562 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
563 (void *) (k->tgt->tgt_start + k->tgt_offset),
564 k->host_end - k->host_start);
565 splay_tree_remove (&mm->splay_tree, k);
566 if (k->tgt->refcount > 1)
567 k->tgt->refcount--;
568 else
569 gomp_unmap_tgt (k->tgt);
572 if (tgt->refcount > 1)
573 tgt->refcount--;
574 else
575 gomp_unmap_tgt (tgt);
577 gomp_mutex_unlock (&mm->lock);
580 static void
581 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
582 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
583 bool is_openacc)
585 size_t i;
586 struct splay_tree_key_s cur_node;
587 const int typemask = is_openacc ? 0xff : 0x7;
589 if (!devicep)
590 return;
592 if (mapnum == 0)
593 return;
595 gomp_mutex_lock (&mm->lock);
596 for (i = 0; i < mapnum; i++)
597 if (sizes[i])
599 cur_node.host_start = (uintptr_t) hostaddrs[i];
600 cur_node.host_end = cur_node.host_start + sizes[i];
601 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
602 &cur_node);
603 if (n)
605 int kind = get_kind (is_openacc, kinds, i);
606 if (n->host_start > cur_node.host_start
607 || n->host_end < cur_node.host_end)
608 gomp_fatal ("Trying to update [%p..%p) object when"
609 "only [%p..%p) is mapped",
610 (void *) cur_node.host_start,
611 (void *) cur_node.host_end,
612 (void *) n->host_start,
613 (void *) n->host_end);
614 if (GOMP_MAP_COPY_TO_P (kind & typemask))
615 devicep->host2dev_func (devicep->target_id,
616 (void *) (n->tgt->tgt_start
617 + n->tgt_offset
618 + cur_node.host_start
619 - n->host_start),
620 (void *) cur_node.host_start,
621 cur_node.host_end - cur_node.host_start);
622 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
623 devicep->dev2host_func (devicep->target_id,
624 (void *) cur_node.host_start,
625 (void *) (n->tgt->tgt_start
626 + n->tgt_offset
627 + cur_node.host_start
628 - n->host_start),
629 cur_node.host_end - cur_node.host_start);
631 else
632 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
633 (void *) cur_node.host_start,
634 (void *) cur_node.host_end);
636 gomp_mutex_unlock (&mm->lock);
639 /* This function should be called from every offload image.
640 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
641 the target, and TARGET_DATA needed by target plugin. */
643 void
644 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
645 void *target_data)
647 offload_images = gomp_realloc (offload_images,
648 (num_offload_images + 1)
649 * sizeof (struct offload_image_descr));
651 offload_images[num_offload_images].type = target_type;
652 offload_images[num_offload_images].host_table = host_table;
653 offload_images[num_offload_images].target_data = target_data;
655 num_offload_images++;
658 /* This function initializes the target device, specified by DEVICEP. DEVICEP
659 must be locked on entry, and remains locked on return. */
661 attribute_hidden void
662 gomp_init_device (struct gomp_device_descr *devicep)
664 devicep->init_device_func (devicep->target_id);
665 devicep->is_initialized = true;
668 /* Initialize address mapping tables. MM must be locked on entry, and remains
669 locked on return. */
671 attribute_hidden void
672 gomp_init_tables (struct gomp_device_descr *devicep,
673 struct gomp_memory_mapping *mm)
675 /* Get address mapping table for device. */
676 struct mapping_table *table = NULL;
677 int num_entries = devicep->get_table_func (devicep->target_id, &table);
679 /* Insert host-target address mapping into dev_splay_tree. */
680 int i;
681 for (i = 0; i < num_entries; i++)
683 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
684 tgt->refcount = 1;
685 tgt->array = gomp_malloc (sizeof (*tgt->array));
686 tgt->tgt_start = table[i].tgt_start;
687 tgt->tgt_end = table[i].tgt_end;
688 tgt->to_free = NULL;
689 tgt->list_count = 0;
690 tgt->device_descr = devicep;
691 splay_tree_node node = tgt->array;
692 splay_tree_key k = &node->key;
693 k->host_start = table[i].host_start;
694 k->host_end = table[i].host_end;
695 k->tgt_offset = 0;
696 k->refcount = 1;
697 k->copy_from = false;
698 k->tgt = tgt;
699 node->left = NULL;
700 node->right = NULL;
701 splay_tree_insert (&mm->splay_tree, node);
704 free (table);
705 mm->is_initialized = true;
708 /* Free address mapping tables. MM must be locked on entry, and remains locked
709 on return. */
711 attribute_hidden void
712 gomp_free_memmap (struct gomp_memory_mapping *mm)
714 while (mm->splay_tree.root)
716 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
718 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
719 free (tgt->array);
720 free (tgt);
723 mm->is_initialized = false;
726 /* This function de-initializes the target device, specified by DEVICEP.
727 DEVICEP must be locked on entry, and remains locked on return. */
729 attribute_hidden void
730 gomp_fini_device (struct gomp_device_descr *devicep)
732 if (devicep->is_initialized)
733 devicep->fini_device_func (devicep->target_id);
735 devicep->is_initialized = false;
738 /* Called when encountering a target directive. If DEVICE
739 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
740 GOMP_DEVICE_HOST_FALLBACK (or any value
741 larger than last available hw device), use host fallback.
742 FN is address of host code, UNUSED is part of the current ABI, but
743 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
744 with MAPNUM entries, with addresses of the host objects,
745 sizes of the host objects (resp. for pointer kind pointer bias
746 and assumed sizeof (void *) size) and kinds. */
748 void
749 GOMP_target (int device, void (*fn) (void *), const void *unused,
750 size_t mapnum, void **hostaddrs, size_t *sizes,
751 unsigned char *kinds)
753 struct gomp_device_descr *devicep = resolve_device (device);
755 if (devicep == NULL
756 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
758 /* Host fallback. */
759 struct gomp_thread old_thr, *thr = gomp_thread ();
760 old_thr = *thr;
761 memset (thr, '\0', sizeof (*thr));
762 if (gomp_places_list)
764 thr->place = old_thr.place;
765 thr->ts.place_partition_len = gomp_places_list_len;
767 fn (hostaddrs);
768 gomp_free_thread (thr);
769 *thr = old_thr;
770 return;
773 gomp_mutex_lock (&devicep->lock);
774 if (!devicep->is_initialized)
775 gomp_init_device (devicep);
776 gomp_mutex_unlock (&devicep->lock);
778 void *fn_addr;
780 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
781 fn_addr = (void *) fn;
782 else
784 struct gomp_memory_mapping *mm = &devicep->mem_map;
785 gomp_mutex_lock (&mm->lock);
787 if (!mm->is_initialized)
788 gomp_init_tables (devicep, mm);
790 struct splay_tree_key_s k;
791 k.host_start = (uintptr_t) fn;
792 k.host_end = k.host_start + 1;
793 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
794 if (tgt_fn == NULL)
795 gomp_fatal ("Target function wasn't mapped");
797 gomp_mutex_unlock (&mm->lock);
799 fn_addr = (void *) tgt_fn->tgt->tgt_start;
802 struct target_mem_desc *tgt_vars
803 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
804 true);
805 struct gomp_thread old_thr, *thr = gomp_thread ();
806 old_thr = *thr;
807 memset (thr, '\0', sizeof (*thr));
808 if (gomp_places_list)
810 thr->place = old_thr.place;
811 thr->ts.place_partition_len = gomp_places_list_len;
813 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
814 gomp_free_thread (thr);
815 *thr = old_thr;
816 gomp_unmap_vars (tgt_vars, true);
819 void
820 GOMP_target_data (int device, const void *unused, size_t mapnum,
821 void **hostaddrs, size_t *sizes, unsigned char *kinds)
823 struct gomp_device_descr *devicep = resolve_device (device);
825 if (devicep == NULL
826 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
828 /* Host fallback. */
829 struct gomp_task_icv *icv = gomp_icv (false);
830 if (icv->target_data)
832 /* Even when doing a host fallback, if there are any active
833 #pragma omp target data constructs, need to remember the
834 new #pragma omp target data, otherwise GOMP_target_end_data
835 would get out of sync. */
836 struct target_mem_desc *tgt
837 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
838 tgt->prev = icv->target_data;
839 icv->target_data = tgt;
841 return;
844 gomp_mutex_lock (&devicep->lock);
845 if (!devicep->is_initialized)
846 gomp_init_device (devicep);
847 gomp_mutex_unlock (&devicep->lock);
849 struct gomp_memory_mapping *mm = &devicep->mem_map;
850 gomp_mutex_lock (&mm->lock);
851 if (!mm->is_initialized)
852 gomp_init_tables (devicep, mm);
853 gomp_mutex_unlock (&mm->lock);
855 struct target_mem_desc *tgt
856 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
857 false);
858 struct gomp_task_icv *icv = gomp_icv (true);
859 tgt->prev = icv->target_data;
860 icv->target_data = tgt;
863 void
864 GOMP_target_end_data (void)
866 struct gomp_task_icv *icv = gomp_icv (false);
867 if (icv->target_data)
869 struct target_mem_desc *tgt = icv->target_data;
870 icv->target_data = tgt->prev;
871 gomp_unmap_vars (tgt, true);
875 void
876 GOMP_target_update (int device, const void *unused, size_t mapnum,
877 void **hostaddrs, size_t *sizes, unsigned char *kinds)
879 struct gomp_device_descr *devicep = resolve_device (device);
881 if (devicep == NULL
882 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
883 return;
885 gomp_mutex_lock (&devicep->lock);
886 if (!devicep->is_initialized)
887 gomp_init_device (devicep);
888 gomp_mutex_unlock (&devicep->lock);
890 struct gomp_memory_mapping *mm = &devicep->mem_map;
891 gomp_mutex_lock (&mm->lock);
892 if (!mm->is_initialized)
893 gomp_init_tables (devicep, mm);
894 gomp_mutex_unlock (&mm->lock);
896 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
899 void
900 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
902 if (thread_limit)
904 struct gomp_task_icv *icv = gomp_icv (true);
905 icv->thread_limit_var
906 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
908 (void) num_teams;
911 #ifdef PLUGIN_SUPPORT
913 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
914 in PLUGIN_NAME.
915 The handles of the found functions are stored in the corresponding fields
916 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
918 static bool
919 gomp_load_plugin_for_device (struct gomp_device_descr *device,
920 const char *plugin_name)
922 const char *err = NULL, *last_missing = NULL;
923 int optional_present, optional_total;
925 /* Clear any existing error. */
926 dlerror ();
928 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
929 if (!plugin_handle)
931 err = dlerror ();
932 goto out;
935 /* Check if all required functions are available in the plugin and store
936 their handlers. */
937 #define DLSYM(f) \
938 do \
940 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
941 err = dlerror (); \
942 if (err != NULL) \
943 goto out; \
945 while (0)
946 /* Similar, but missing functions are not an error. */
947 #define DLSYM_OPT(f, n) \
948 do \
950 const char *tmp_err; \
951 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
952 tmp_err = dlerror (); \
953 if (tmp_err == NULL) \
954 optional_present++; \
955 else \
956 last_missing = #n; \
957 optional_total++; \
959 while (0)
961 DLSYM (get_name);
962 DLSYM (get_caps);
963 DLSYM (get_type);
964 DLSYM (get_num_devices);
965 DLSYM (register_image);
966 DLSYM (init_device);
967 DLSYM (fini_device);
968 DLSYM (get_table);
969 DLSYM (alloc);
970 DLSYM (free);
971 DLSYM (dev2host);
972 DLSYM (host2dev);
973 device->capabilities = device->get_caps_func ();
974 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
975 DLSYM (run);
976 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
978 optional_present = optional_total = 0;
979 DLSYM_OPT (openacc.exec, openacc_parallel);
980 DLSYM_OPT (openacc.open_device, openacc_open_device);
981 DLSYM_OPT (openacc.close_device, openacc_close_device);
982 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
983 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
984 DLSYM_OPT (openacc.register_async_cleanup,
985 openacc_register_async_cleanup);
986 DLSYM_OPT (openacc.async_test, openacc_async_test);
987 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
988 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
989 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
990 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
991 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
992 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
993 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
994 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
995 /* Require all the OpenACC handlers if we have
996 GOMP_OFFLOAD_CAP_OPENACC_200. */
997 if (optional_present != optional_total)
999 err = "plugin missing OpenACC handler function";
1000 goto out;
1002 optional_present = optional_total = 0;
1003 DLSYM_OPT (openacc.cuda.get_current_device,
1004 openacc_get_current_cuda_device);
1005 DLSYM_OPT (openacc.cuda.get_current_context,
1006 openacc_get_current_cuda_context);
1007 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1008 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1009 /* Make sure all the CUDA functions are there if any of them are. */
1010 if (optional_present && optional_present != optional_total)
1012 err = "plugin missing OpenACC CUDA handler function";
1013 goto out;
1016 #undef DLSYM
1017 #undef DLSYM_OPT
1019 out:
1020 if (err != NULL)
1022 gomp_error ("while loading %s: %s", plugin_name, err);
1023 if (last_missing)
1024 gomp_error ("missing function was %s", last_missing);
1025 if (plugin_handle)
1026 dlclose (plugin_handle);
1028 return err == NULL;
1031 /* This function adds a compatible offload image IMAGE to an accelerator device
1032 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1034 static void
1035 gomp_register_image_for_device (struct gomp_device_descr *device,
1036 struct offload_image_descr *image)
1038 if (!device->offload_regions_registered
1039 && (device->type == image->type
1040 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1042 device->register_image_func (image->host_table, image->target_data);
1043 device->offload_regions_registered = true;
1047 /* This function initializes the runtime needed for offloading.
1048 It parses the list of offload targets and tries to load the plugins for
1049 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1050 will be set, and the array DEVICES initialized, containing descriptors for
1051 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1052 by the others. */
1054 static void
1055 gomp_target_init (void)
1057 const char *prefix ="libgomp-plugin-";
1058 const char *suffix = SONAME_SUFFIX (1);
1059 const char *cur, *next;
1060 char *plugin_name;
1061 int i, new_num_devices;
1063 num_devices = 0;
1064 devices = NULL;
1066 cur = OFFLOAD_TARGETS;
1067 if (*cur)
1070 struct gomp_device_descr current_device;
1072 next = strchr (cur, ',');
1074 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1075 + strlen (prefix) + strlen (suffix));
1076 if (!plugin_name)
1078 num_devices = 0;
1079 break;
1082 strcpy (plugin_name, prefix);
1083 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1084 strcat (plugin_name, suffix);
1086 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1088 new_num_devices = current_device.get_num_devices_func ();
1089 if (new_num_devices >= 1)
1091 /* Augment DEVICES and NUM_DEVICES. */
1093 devices = realloc (devices, (num_devices + new_num_devices)
1094 * sizeof (struct gomp_device_descr));
1095 if (!devices)
1097 num_devices = 0;
1098 free (plugin_name);
1099 break;
1102 current_device.name = current_device.get_name_func ();
1103 /* current_device.capabilities has already been set. */
1104 current_device.type = current_device.get_type_func ();
1105 current_device.mem_map.is_initialized = false;
1106 current_device.mem_map.splay_tree.root = NULL;
1107 current_device.is_initialized = false;
1108 current_device.offload_regions_registered = false;
1109 current_device.openacc.data_environ = NULL;
1110 current_device.openacc.target_data = NULL;
1111 for (i = 0; i < new_num_devices; i++)
1113 current_device.target_id = i;
1114 devices[num_devices] = current_device;
1115 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1116 gomp_mutex_init (&devices[num_devices].lock);
1117 num_devices++;
1122 free (plugin_name);
1123 cur = next + 1;
1125 while (next);
1127 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1128 NUM_DEVICES_OPENMP. */
1129 struct gomp_device_descr *devices_s
1130 = malloc (num_devices * sizeof (struct gomp_device_descr));
1131 if (!devices_s)
1133 num_devices = 0;
1134 free (devices);
1135 devices = NULL;
1137 num_devices_openmp = 0;
1138 for (i = 0; i < num_devices; i++)
1139 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1140 devices_s[num_devices_openmp++] = devices[i];
1141 int num_devices_after_openmp = num_devices_openmp;
1142 for (i = 0; i < num_devices; i++)
1143 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1144 devices_s[num_devices_after_openmp++] = devices[i];
1145 free (devices);
1146 devices = devices_s;
1148 for (i = 0; i < num_devices; i++)
1150 int j;
1152 for (j = 0; j < num_offload_images; j++)
1153 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1155 /* The 'devices' array can be moved (by the realloc call) until we have
1156 found all the plugins, so registering with the OpenACC runtime (which
1157 takes a copy of the pointer argument) must be delayed until now. */
1158 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1159 goacc_register (&devices[i]);
1162 free (offload_images);
1163 offload_images = NULL;
1164 num_offload_images = 0;
1167 #else /* PLUGIN_SUPPORT */
1168 /* If dlfcn.h is unavailable we always fallback to host execution.
1169 GOMP_target* routines are just stubs for this case. */
1170 static void
1171 gomp_target_init (void)
1174 #endif /* PLUGIN_SUPPORT */