PR target/64688
[official-gcc.git] / libgomp / target.c
blobebff55e47279c9f3691166bf318dd4fb0beead0f
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
39 #ifdef PLUGIN_SUPPORT
40 #include <dlfcn.h>
41 #endif
43 static void gomp_target_init (void);
45 /* The whole initialization code for offloading plugins is only run one. */
46 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
48 /* This structure describes an offload image.
49 It contains type of the target device, pointer to host table descriptor, and
50 pointer to target data. */
51 struct offload_image_descr {
52 enum offload_target_type type;
53 void *host_table;
54 void *target_data;
57 /* Array of descriptors of offload images. */
58 static struct offload_image_descr *offload_images;
60 /* Total number of offload images. */
61 static int num_offload_images;
63 /* Array of descriptors for all available devices. */
64 static struct gomp_device_descr *devices;
66 #ifdef PLUGIN_SUPPORT
67 /* Total number of available devices. */
68 static int num_devices;
69 #endif
71 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
72 static int num_devices_openmp;
74 /* The comparison function. */
76 attribute_hidden int
77 splay_compare (splay_tree_key x, splay_tree_key y)
79 if (x->host_start == x->host_end
80 && y->host_start == y->host_end)
81 return 0;
82 if (x->host_end <= y->host_start)
83 return -1;
84 if (x->host_start >= y->host_end)
85 return 1;
86 return 0;
89 #include "splay-tree.h"
91 attribute_hidden void
92 gomp_init_targets_once (void)
94 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
97 attribute_hidden int
98 gomp_get_num_devices (void)
100 gomp_init_targets_once ();
101 return num_devices_openmp;
104 static struct gomp_device_descr *
105 resolve_device (int device_id)
107 if (device_id == GOMP_DEVICE_ICV)
109 struct gomp_task_icv *icv = gomp_icv (false);
110 device_id = icv->default_device_var;
113 if (device_id < 0 || device_id >= gomp_get_num_devices ())
114 return NULL;
116 return &devices[device_id];
120 /* Handle the case where splay_tree_lookup found oldn for newn.
121 Helper function of gomp_map_vars. */
123 static inline void
124 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
125 unsigned char kind)
127 if ((kind & GOMP_MAP_FLAG_FORCE)
128 || oldn->host_start > newn->host_start
129 || oldn->host_end < newn->host_end)
130 gomp_fatal ("Trying to map into device [%p..%p) object when "
131 "[%p..%p) is already mapped",
132 (void *) newn->host_start, (void *) newn->host_end,
133 (void *) oldn->host_start, (void *) oldn->host_end);
134 oldn->refcount++;
137 static int
138 get_kind (bool is_openacc, void *kinds, int idx)
140 return is_openacc ? ((unsigned short *) kinds)[idx]
141 : ((unsigned char *) kinds)[idx];
144 attribute_hidden struct target_mem_desc *
145 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
146 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
147 bool is_openacc, bool is_target)
149 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
150 const int rshift = is_openacc ? 8 : 3;
151 const int typemask = is_openacc ? 0xff : 0x7;
152 struct gomp_memory_mapping *mm = &devicep->mem_map;
153 struct splay_tree_key_s cur_node;
154 struct target_mem_desc *tgt
155 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
156 tgt->list_count = mapnum;
157 tgt->refcount = 1;
158 tgt->device_descr = devicep;
159 tgt->mem_map = mm;
161 if (mapnum == 0)
162 return tgt;
164 tgt_align = sizeof (void *);
165 tgt_size = 0;
166 if (is_target)
168 size_t align = 4 * sizeof (void *);
169 tgt_align = align;
170 tgt_size = mapnum * sizeof (void *);
173 gomp_mutex_lock (&mm->lock);
175 for (i = 0; i < mapnum; i++)
177 int kind = get_kind (is_openacc, kinds, i);
178 if (hostaddrs[i] == NULL)
180 tgt->list[i] = NULL;
181 continue;
183 cur_node.host_start = (uintptr_t) hostaddrs[i];
184 if (!GOMP_MAP_POINTER_P (kind & typemask))
185 cur_node.host_end = cur_node.host_start + sizes[i];
186 else
187 cur_node.host_end = cur_node.host_start + sizeof (void *);
188 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
189 if (n)
191 tgt->list[i] = n;
192 gomp_map_vars_existing (n, &cur_node, kind & typemask);
194 else
196 tgt->list[i] = NULL;
198 size_t align = (size_t) 1 << (kind >> rshift);
199 not_found_cnt++;
200 if (tgt_align < align)
201 tgt_align = align;
202 tgt_size = (tgt_size + align - 1) & ~(align - 1);
203 tgt_size += cur_node.host_end - cur_node.host_start;
204 if ((kind & typemask) == GOMP_MAP_TO_PSET)
206 size_t j;
207 for (j = i + 1; j < mapnum; j++)
208 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
209 & typemask))
210 break;
211 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
212 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
213 > cur_node.host_end))
214 break;
215 else
217 tgt->list[j] = NULL;
218 i++;
224 if (devaddrs)
226 if (mapnum != 1)
227 gomp_fatal ("unexpected aggregation");
228 tgt->to_free = devaddrs[0];
229 tgt->tgt_start = (uintptr_t) tgt->to_free;
230 tgt->tgt_end = tgt->tgt_start + sizes[0];
232 else if (not_found_cnt || is_target)
234 /* Allocate tgt_align aligned tgt_size block of memory. */
235 /* FIXME: Perhaps change interface to allocate properly aligned
236 memory. */
237 tgt->to_free = devicep->alloc_func (devicep->target_id,
238 tgt_size + tgt_align - 1);
239 tgt->tgt_start = (uintptr_t) tgt->to_free;
240 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
241 tgt->tgt_end = tgt->tgt_start + tgt_size;
243 else
245 tgt->to_free = NULL;
246 tgt->tgt_start = 0;
247 tgt->tgt_end = 0;
250 tgt_size = 0;
251 if (is_target)
252 tgt_size = mapnum * sizeof (void *);
254 tgt->array = NULL;
255 if (not_found_cnt)
257 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
258 splay_tree_node array = tgt->array;
259 size_t j;
261 for (i = 0; i < mapnum; i++)
262 if (tgt->list[i] == NULL)
264 int kind = get_kind (is_openacc, kinds, i);
265 if (hostaddrs[i] == NULL)
266 continue;
267 splay_tree_key k = &array->key;
268 k->host_start = (uintptr_t) hostaddrs[i];
269 if (!GOMP_MAP_POINTER_P (kind & typemask))
270 k->host_end = k->host_start + sizes[i];
271 else
272 k->host_end = k->host_start + sizeof (void *);
273 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
274 if (n)
276 tgt->list[i] = n;
277 gomp_map_vars_existing (n, k, kind & typemask);
279 else
281 size_t align = (size_t) 1 << (kind >> rshift);
282 tgt->list[i] = k;
283 tgt_size = (tgt_size + align - 1) & ~(align - 1);
284 k->tgt = tgt;
285 k->tgt_offset = tgt_size;
286 tgt_size += k->host_end - k->host_start;
287 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
288 k->refcount = 1;
289 k->async_refcount = 0;
290 tgt->refcount++;
291 array->left = NULL;
292 array->right = NULL;
293 splay_tree_insert (&mm->splay_tree, array);
294 switch (kind & typemask)
296 case GOMP_MAP_ALLOC:
297 case GOMP_MAP_FROM:
298 case GOMP_MAP_FORCE_ALLOC:
299 case GOMP_MAP_FORCE_FROM:
300 break;
301 case GOMP_MAP_TO:
302 case GOMP_MAP_TOFROM:
303 case GOMP_MAP_FORCE_TO:
304 case GOMP_MAP_FORCE_TOFROM:
305 /* FIXME: Perhaps add some smarts, like if copying
306 several adjacent fields from host to target, use some
307 host buffer to avoid sending each var individually. */
308 devicep->host2dev_func (devicep->target_id,
309 (void *) (tgt->tgt_start
310 + k->tgt_offset),
311 (void *) k->host_start,
312 k->host_end - k->host_start);
313 break;
314 case GOMP_MAP_POINTER:
315 cur_node.host_start
316 = (uintptr_t) *(void **) k->host_start;
317 if (cur_node.host_start == (uintptr_t) NULL)
319 cur_node.tgt_offset = (uintptr_t) NULL;
320 /* FIXME: see above FIXME comment. */
321 devicep->host2dev_func (devicep->target_id,
322 (void *) (tgt->tgt_start
323 + k->tgt_offset),
324 (void *) &cur_node.tgt_offset,
325 sizeof (void *));
326 break;
328 /* Add bias to the pointer value. */
329 cur_node.host_start += sizes[i];
330 cur_node.host_end = cur_node.host_start + 1;
331 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
332 if (n == NULL)
334 /* Could be possibly zero size array section. */
335 cur_node.host_end--;
336 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
337 if (n == NULL)
339 cur_node.host_start--;
340 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
341 cur_node.host_start++;
344 if (n == NULL)
345 gomp_fatal ("Pointer target of array section "
346 "wasn't mapped");
347 cur_node.host_start -= n->host_start;
348 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
349 + cur_node.host_start;
350 /* At this point tgt_offset is target address of the
351 array section. Now subtract bias to get what we want
352 to initialize the pointer with. */
353 cur_node.tgt_offset -= sizes[i];
354 /* FIXME: see above FIXME comment. */
355 devicep->host2dev_func (devicep->target_id,
356 (void *) (tgt->tgt_start
357 + k->tgt_offset),
358 (void *) &cur_node.tgt_offset,
359 sizeof (void *));
360 break;
361 case GOMP_MAP_TO_PSET:
362 /* FIXME: see above FIXME comment. */
363 devicep->host2dev_func (devicep->target_id,
364 (void *) (tgt->tgt_start
365 + k->tgt_offset),
366 (void *) k->host_start,
367 k->host_end - k->host_start);
369 for (j = i + 1; j < mapnum; j++)
370 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
371 & typemask))
372 break;
373 else if ((uintptr_t) hostaddrs[j] < k->host_start
374 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
375 > k->host_end))
376 break;
377 else
379 tgt->list[j] = k;
380 k->refcount++;
381 cur_node.host_start
382 = (uintptr_t) *(void **) hostaddrs[j];
383 if (cur_node.host_start == (uintptr_t) NULL)
385 cur_node.tgt_offset = (uintptr_t) NULL;
386 /* FIXME: see above FIXME comment. */
387 devicep->host2dev_func (devicep->target_id,
388 (void *) (tgt->tgt_start + k->tgt_offset
389 + ((uintptr_t) hostaddrs[j]
390 - k->host_start)),
391 (void *) &cur_node.tgt_offset,
392 sizeof (void *));
393 i++;
394 continue;
396 /* Add bias to the pointer value. */
397 cur_node.host_start += sizes[j];
398 cur_node.host_end = cur_node.host_start + 1;
399 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
400 if (n == NULL)
402 /* Could be possibly zero size array section. */
403 cur_node.host_end--;
404 n = splay_tree_lookup (&mm->splay_tree,
405 &cur_node);
406 if (n == NULL)
408 cur_node.host_start--;
409 n = splay_tree_lookup (&mm->splay_tree,
410 &cur_node);
411 cur_node.host_start++;
414 if (n == NULL)
415 gomp_fatal ("Pointer target of array section "
416 "wasn't mapped");
417 cur_node.host_start -= n->host_start;
418 cur_node.tgt_offset = n->tgt->tgt_start
419 + n->tgt_offset
420 + cur_node.host_start;
421 /* At this point tgt_offset is target address of the
422 array section. Now subtract bias to get what we
423 want to initialize the pointer with. */
424 cur_node.tgt_offset -= sizes[j];
425 /* FIXME: see above FIXME comment. */
426 devicep->host2dev_func (devicep->target_id,
427 (void *) (tgt->tgt_start + k->tgt_offset
428 + ((uintptr_t) hostaddrs[j]
429 - k->host_start)),
430 (void *) &cur_node.tgt_offset,
431 sizeof (void *));
432 i++;
434 break;
435 case GOMP_MAP_FORCE_PRESENT:
437 /* We already looked up the memory region above and it
438 was missing. */
439 size_t size = k->host_end - k->host_start;
440 gomp_fatal ("present clause: !acc_is_present (%p, "
441 "%zd (0x%zx))", (void *) k->host_start,
442 size, size);
444 break;
445 case GOMP_MAP_FORCE_DEVICEPTR:
446 assert (k->host_end - k->host_start == sizeof (void *));
448 devicep->host2dev_func (devicep->target_id,
449 (void *) (tgt->tgt_start
450 + k->tgt_offset),
451 (void *) k->host_start,
452 sizeof (void *));
453 break;
454 default:
455 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
456 kind);
458 array++;
463 if (is_target)
465 for (i = 0; i < mapnum; i++)
467 if (tgt->list[i] == NULL)
468 cur_node.tgt_offset = (uintptr_t) NULL;
469 else
470 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
471 + tgt->list[i]->tgt_offset;
472 /* FIXME: see above FIXME comment. */
473 devicep->host2dev_func (devicep->target_id,
474 (void *) (tgt->tgt_start
475 + i * sizeof (void *)),
476 (void *) &cur_node.tgt_offset,
477 sizeof (void *));
481 gomp_mutex_unlock (&mm->lock);
482 return tgt;
485 static void
486 gomp_unmap_tgt (struct target_mem_desc *tgt)
488 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
489 if (tgt->tgt_end)
490 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
492 free (tgt->array);
493 free (tgt);
496 /* Decrease the refcount for a set of mapped variables, and queue asychronous
497 copies from the device back to the host after any work that has been issued.
498 Because the regions are still "live", increment an asynchronous reference
499 count to indicate that they should not be unmapped from host-side data
500 structures until the asynchronous copy has completed. */
502 attribute_hidden void
503 gomp_copy_from_async (struct target_mem_desc *tgt)
505 struct gomp_device_descr *devicep = tgt->device_descr;
506 struct gomp_memory_mapping *mm = tgt->mem_map;
507 size_t i;
509 gomp_mutex_lock (&mm->lock);
511 for (i = 0; i < tgt->list_count; i++)
512 if (tgt->list[i] == NULL)
514 else if (tgt->list[i]->refcount > 1)
516 tgt->list[i]->refcount--;
517 tgt->list[i]->async_refcount++;
519 else
521 splay_tree_key k = tgt->list[i];
522 if (k->copy_from)
523 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
524 (void *) (k->tgt->tgt_start + k->tgt_offset),
525 k->host_end - k->host_start);
528 gomp_mutex_unlock (&mm->lock);
531 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
532 variables back from device to host: if it is false, it is assumed that this
533 has been done already, i.e. by gomp_copy_from_async above. */
535 attribute_hidden void
536 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
538 struct gomp_device_descr *devicep = tgt->device_descr;
539 struct gomp_memory_mapping *mm = tgt->mem_map;
541 if (tgt->list_count == 0)
543 free (tgt);
544 return;
547 gomp_mutex_lock (&mm->lock);
549 size_t i;
550 for (i = 0; i < tgt->list_count; i++)
551 if (tgt->list[i] == NULL)
553 else if (tgt->list[i]->refcount > 1)
554 tgt->list[i]->refcount--;
555 else if (tgt->list[i]->async_refcount > 0)
556 tgt->list[i]->async_refcount--;
557 else
559 splay_tree_key k = tgt->list[i];
560 if (k->copy_from && do_copyfrom)
561 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
562 (void *) (k->tgt->tgt_start + k->tgt_offset),
563 k->host_end - k->host_start);
564 splay_tree_remove (&mm->splay_tree, k);
565 if (k->tgt->refcount > 1)
566 k->tgt->refcount--;
567 else
568 gomp_unmap_tgt (k->tgt);
571 if (tgt->refcount > 1)
572 tgt->refcount--;
573 else
574 gomp_unmap_tgt (tgt);
576 gomp_mutex_unlock (&mm->lock);
579 static void
580 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
581 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
582 bool is_openacc)
584 size_t i;
585 struct splay_tree_key_s cur_node;
586 const int typemask = is_openacc ? 0xff : 0x7;
588 if (!devicep)
589 return;
591 if (mapnum == 0)
592 return;
594 gomp_mutex_lock (&mm->lock);
595 for (i = 0; i < mapnum; i++)
596 if (sizes[i])
598 cur_node.host_start = (uintptr_t) hostaddrs[i];
599 cur_node.host_end = cur_node.host_start + sizes[i];
600 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
601 &cur_node);
602 if (n)
604 int kind = get_kind (is_openacc, kinds, i);
605 if (n->host_start > cur_node.host_start
606 || n->host_end < cur_node.host_end)
607 gomp_fatal ("Trying to update [%p..%p) object when"
608 "only [%p..%p) is mapped",
609 (void *) cur_node.host_start,
610 (void *) cur_node.host_end,
611 (void *) n->host_start,
612 (void *) n->host_end);
613 if (GOMP_MAP_COPY_TO_P (kind & typemask))
614 devicep->host2dev_func (devicep->target_id,
615 (void *) (n->tgt->tgt_start
616 + n->tgt_offset
617 + cur_node.host_start
618 - n->host_start),
619 (void *) cur_node.host_start,
620 cur_node.host_end - cur_node.host_start);
621 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
622 devicep->dev2host_func (devicep->target_id,
623 (void *) cur_node.host_start,
624 (void *) (n->tgt->tgt_start
625 + n->tgt_offset
626 + cur_node.host_start
627 - n->host_start),
628 cur_node.host_end - cur_node.host_start);
630 else
631 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
632 (void *) cur_node.host_start,
633 (void *) cur_node.host_end);
635 gomp_mutex_unlock (&mm->lock);
638 /* This function should be called from every offload image.
639 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
640 the target, and TARGET_DATA needed by target plugin. */
642 void
643 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
644 void *target_data)
646 offload_images = gomp_realloc (offload_images,
647 (num_offload_images + 1)
648 * sizeof (struct offload_image_descr));
650 offload_images[num_offload_images].type = target_type;
651 offload_images[num_offload_images].host_table = host_table;
652 offload_images[num_offload_images].target_data = target_data;
654 num_offload_images++;
657 /* This function initializes the target device, specified by DEVICEP. DEVICEP
658 must be locked on entry, and remains locked on return. */
660 attribute_hidden void
661 gomp_init_device (struct gomp_device_descr *devicep)
663 devicep->init_device_func (devicep->target_id);
664 devicep->is_initialized = true;
667 /* Initialize address mapping tables. MM must be locked on entry, and remains
668 locked on return. */
670 attribute_hidden void
671 gomp_init_tables (struct gomp_device_descr *devicep,
672 struct gomp_memory_mapping *mm)
674 /* Get address mapping table for device. */
675 struct mapping_table *table = NULL;
676 int num_entries = devicep->get_table_func (devicep->target_id, &table);
678 /* Insert host-target address mapping into dev_splay_tree. */
679 int i;
680 for (i = 0; i < num_entries; i++)
682 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
683 tgt->refcount = 1;
684 tgt->array = gomp_malloc (sizeof (*tgt->array));
685 tgt->tgt_start = table[i].tgt_start;
686 tgt->tgt_end = table[i].tgt_end;
687 tgt->to_free = NULL;
688 tgt->list_count = 0;
689 tgt->device_descr = devicep;
690 splay_tree_node node = tgt->array;
691 splay_tree_key k = &node->key;
692 k->host_start = table[i].host_start;
693 k->host_end = table[i].host_end;
694 k->tgt_offset = 0;
695 k->refcount = 1;
696 k->copy_from = false;
697 k->tgt = tgt;
698 node->left = NULL;
699 node->right = NULL;
700 splay_tree_insert (&mm->splay_tree, node);
703 free (table);
704 mm->is_initialized = true;
707 /* Free address mapping tables. MM must be locked on entry, and remains locked
708 on return. */
710 attribute_hidden void
711 gomp_free_memmap (struct gomp_memory_mapping *mm)
713 while (mm->splay_tree.root)
715 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
717 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
718 free (tgt->array);
719 free (tgt);
722 mm->is_initialized = false;
725 /* This function de-initializes the target device, specified by DEVICEP.
726 DEVICEP must be locked on entry, and remains locked on return. */
728 attribute_hidden void
729 gomp_fini_device (struct gomp_device_descr *devicep)
731 if (devicep->is_initialized)
732 devicep->fini_device_func (devicep->target_id);
734 devicep->is_initialized = false;
737 /* Called when encountering a target directive. If DEVICE
738 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
739 GOMP_DEVICE_HOST_FALLBACK (or any value
740 larger than last available hw device), use host fallback.
741 FN is address of host code, UNUSED is part of the current ABI, but
742 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
743 with MAPNUM entries, with addresses of the host objects,
744 sizes of the host objects (resp. for pointer kind pointer bias
745 and assumed sizeof (void *) size) and kinds. */
747 void
748 GOMP_target (int device, void (*fn) (void *), const void *unused,
749 size_t mapnum, void **hostaddrs, size_t *sizes,
750 unsigned char *kinds)
752 struct gomp_device_descr *devicep = resolve_device (device);
754 if (devicep == NULL
755 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
757 /* Host fallback. */
758 struct gomp_thread old_thr, *thr = gomp_thread ();
759 old_thr = *thr;
760 memset (thr, '\0', sizeof (*thr));
761 if (gomp_places_list)
763 thr->place = old_thr.place;
764 thr->ts.place_partition_len = gomp_places_list_len;
766 fn (hostaddrs);
767 gomp_free_thread (thr);
768 *thr = old_thr;
769 return;
772 gomp_mutex_lock (&devicep->lock);
773 if (!devicep->is_initialized)
774 gomp_init_device (devicep);
775 gomp_mutex_unlock (&devicep->lock);
777 void *fn_addr;
779 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
780 fn_addr = (void *) fn;
781 else
783 struct gomp_memory_mapping *mm = &devicep->mem_map;
784 gomp_mutex_lock (&mm->lock);
786 if (!mm->is_initialized)
787 gomp_init_tables (devicep, mm);
789 struct splay_tree_key_s k;
790 k.host_start = (uintptr_t) fn;
791 k.host_end = k.host_start + 1;
792 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
793 if (tgt_fn == NULL)
794 gomp_fatal ("Target function wasn't mapped");
796 gomp_mutex_unlock (&mm->lock);
798 fn_addr = (void *) tgt_fn->tgt->tgt_start;
801 struct target_mem_desc *tgt_vars
802 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
803 true);
804 struct gomp_thread old_thr, *thr = gomp_thread ();
805 old_thr = *thr;
806 memset (thr, '\0', sizeof (*thr));
807 if (gomp_places_list)
809 thr->place = old_thr.place;
810 thr->ts.place_partition_len = gomp_places_list_len;
812 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
813 gomp_free_thread (thr);
814 *thr = old_thr;
815 gomp_unmap_vars (tgt_vars, true);
818 void
819 GOMP_target_data (int device, const void *unused, size_t mapnum,
820 void **hostaddrs, size_t *sizes, unsigned char *kinds)
822 struct gomp_device_descr *devicep = resolve_device (device);
824 if (devicep == NULL
825 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
827 /* Host fallback. */
828 struct gomp_task_icv *icv = gomp_icv (false);
829 if (icv->target_data)
831 /* Even when doing a host fallback, if there are any active
832 #pragma omp target data constructs, need to remember the
833 new #pragma omp target data, otherwise GOMP_target_end_data
834 would get out of sync. */
835 struct target_mem_desc *tgt
836 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
837 tgt->prev = icv->target_data;
838 icv->target_data = tgt;
840 return;
843 gomp_mutex_lock (&devicep->lock);
844 if (!devicep->is_initialized)
845 gomp_init_device (devicep);
846 gomp_mutex_unlock (&devicep->lock);
848 struct gomp_memory_mapping *mm = &devicep->mem_map;
849 gomp_mutex_lock (&mm->lock);
850 if (!mm->is_initialized)
851 gomp_init_tables (devicep, mm);
852 gomp_mutex_unlock (&mm->lock);
854 struct target_mem_desc *tgt
855 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
856 false);
857 struct gomp_task_icv *icv = gomp_icv (true);
858 tgt->prev = icv->target_data;
859 icv->target_data = tgt;
862 void
863 GOMP_target_end_data (void)
865 struct gomp_task_icv *icv = gomp_icv (false);
866 if (icv->target_data)
868 struct target_mem_desc *tgt = icv->target_data;
869 icv->target_data = tgt->prev;
870 gomp_unmap_vars (tgt, true);
874 void
875 GOMP_target_update (int device, const void *unused, size_t mapnum,
876 void **hostaddrs, size_t *sizes, unsigned char *kinds)
878 struct gomp_device_descr *devicep = resolve_device (device);
880 if (devicep == NULL
881 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
882 return;
884 gomp_mutex_lock (&devicep->lock);
885 if (!devicep->is_initialized)
886 gomp_init_device (devicep);
887 gomp_mutex_unlock (&devicep->lock);
889 struct gomp_memory_mapping *mm = &devicep->mem_map;
890 gomp_mutex_lock (&mm->lock);
891 if (!mm->is_initialized)
892 gomp_init_tables (devicep, mm);
893 gomp_mutex_unlock (&mm->lock);
895 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
898 void
899 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
901 if (thread_limit)
903 struct gomp_task_icv *icv = gomp_icv (true);
904 icv->thread_limit_var
905 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
907 (void) num_teams;
910 #ifdef PLUGIN_SUPPORT
912 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
913 in PLUGIN_NAME.
914 The handles of the found functions are stored in the corresponding fields
915 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
917 static bool
918 gomp_load_plugin_for_device (struct gomp_device_descr *device,
919 const char *plugin_name)
921 char *err = NULL, *last_missing = NULL;
922 int optional_present, optional_total;
924 /* Clear any existing error. */
925 dlerror ();
927 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
928 if (!plugin_handle)
930 err = dlerror ();
931 goto out;
934 /* Check if all required functions are available in the plugin and store
935 their handlers. */
936 #define DLSYM(f) \
937 do \
939 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
940 err = dlerror (); \
941 if (err != NULL) \
942 goto out; \
944 while (0)
945 /* Similar, but missing functions are not an error. */
946 #define DLSYM_OPT(f, n) \
947 do \
949 char *tmp_err; \
950 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
951 tmp_err = dlerror (); \
952 if (tmp_err == NULL) \
953 optional_present++; \
954 else \
955 last_missing = #n; \
956 optional_total++; \
958 while (0)
960 DLSYM (get_name);
961 DLSYM (get_caps);
962 DLSYM (get_type);
963 DLSYM (get_num_devices);
964 DLSYM (register_image);
965 DLSYM (init_device);
966 DLSYM (fini_device);
967 DLSYM (get_table);
968 DLSYM (alloc);
969 DLSYM (free);
970 DLSYM (dev2host);
971 DLSYM (host2dev);
972 device->capabilities = device->get_caps_func ();
973 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
974 DLSYM (run);
975 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
977 optional_present = optional_total = 0;
978 DLSYM_OPT (openacc.exec, openacc_parallel);
979 DLSYM_OPT (openacc.open_device, openacc_open_device);
980 DLSYM_OPT (openacc.close_device, openacc_close_device);
981 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
982 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
983 DLSYM_OPT (openacc.register_async_cleanup,
984 openacc_register_async_cleanup);
985 DLSYM_OPT (openacc.async_test, openacc_async_test);
986 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
987 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
988 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
989 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
990 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
991 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
992 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
993 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
994 /* Require all the OpenACC handlers if we have
995 GOMP_OFFLOAD_CAP_OPENACC_200. */
996 if (optional_present != optional_total)
998 err = "plugin missing OpenACC handler function";
999 goto out;
1001 optional_present = optional_total = 0;
1002 DLSYM_OPT (openacc.cuda.get_current_device,
1003 openacc_get_current_cuda_device);
1004 DLSYM_OPT (openacc.cuda.get_current_context,
1005 openacc_get_current_cuda_context);
1006 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1007 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1008 /* Make sure all the CUDA functions are there if any of them are. */
1009 if (optional_present && optional_present != optional_total)
1011 err = "plugin missing OpenACC CUDA handler function";
1012 goto out;
1015 #undef DLSYM
1016 #undef DLSYM_OPT
1018 out:
1019 if (err != NULL)
1021 gomp_error ("while loading %s: %s", plugin_name, err);
1022 if (last_missing)
1023 gomp_error ("missing function was %s", last_missing);
1024 if (plugin_handle)
1025 dlclose (plugin_handle);
1027 return err == NULL;
1030 /* This function adds a compatible offload image IMAGE to an accelerator device
1031 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1033 static void
1034 gomp_register_image_for_device (struct gomp_device_descr *device,
1035 struct offload_image_descr *image)
1037 if (!device->offload_regions_registered
1038 && (device->type == image->type
1039 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1041 device->register_image_func (image->host_table, image->target_data);
1042 device->offload_regions_registered = true;
1046 /* This function initializes the runtime needed for offloading.
1047 It parses the list of offload targets and tries to load the plugins for
1048 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1049 will be set, and the array DEVICES initialized, containing descriptors for
1050 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1051 by the others. */
1053 static void
1054 gomp_target_init (void)
1056 const char *prefix ="libgomp-plugin-";
1057 const char *suffix = ".so.1";
1058 const char *cur, *next;
1059 char *plugin_name;
1060 int i, new_num_devices;
1062 num_devices = 0;
1063 devices = NULL;
1065 cur = OFFLOAD_TARGETS;
1066 if (*cur)
1069 struct gomp_device_descr current_device;
1071 next = strchr (cur, ',');
1073 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1074 + strlen (prefix) + strlen (suffix));
1075 if (!plugin_name)
1077 num_devices = 0;
1078 break;
1081 strcpy (plugin_name, prefix);
1082 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1083 strcat (plugin_name, suffix);
1085 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1087 new_num_devices = current_device.get_num_devices_func ();
1088 if (new_num_devices >= 1)
1090 /* Augment DEVICES and NUM_DEVICES. */
1092 devices = realloc (devices, (num_devices + new_num_devices)
1093 * sizeof (struct gomp_device_descr));
1094 if (!devices)
1096 num_devices = 0;
1097 free (plugin_name);
1098 break;
1101 current_device.name = current_device.get_name_func ();
1102 /* current_device.capabilities has already been set. */
1103 current_device.type = current_device.get_type_func ();
1104 current_device.mem_map.is_initialized = false;
1105 current_device.mem_map.splay_tree.root = NULL;
1106 current_device.is_initialized = false;
1107 current_device.offload_regions_registered = false;
1108 current_device.openacc.data_environ = NULL;
1109 current_device.openacc.target_data = NULL;
1110 for (i = 0; i < new_num_devices; i++)
1112 current_device.target_id = i;
1113 devices[num_devices] = current_device;
1114 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1115 gomp_mutex_init (&devices[num_devices].lock);
1116 num_devices++;
1121 free (plugin_name);
1122 cur = next + 1;
1124 while (next);
1126 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1127 NUM_DEVICES_OPENMP. */
1128 struct gomp_device_descr *devices_s
1129 = malloc (num_devices * sizeof (struct gomp_device_descr));
1130 if (!devices_s)
1132 num_devices = 0;
1133 free (devices);
1134 devices = NULL;
1136 num_devices_openmp = 0;
1137 for (i = 0; i < num_devices; i++)
1138 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1139 devices_s[num_devices_openmp++] = devices[i];
1140 int num_devices_after_openmp = num_devices_openmp;
1141 for (i = 0; i < num_devices; i++)
1142 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1143 devices_s[num_devices_after_openmp++] = devices[i];
1144 free (devices);
1145 devices = devices_s;
1147 for (i = 0; i < num_devices; i++)
1149 int j;
1151 for (j = 0; j < num_offload_images; j++)
1152 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1154 /* The 'devices' array can be moved (by the realloc call) until we have
1155 found all the plugins, so registering with the OpenACC runtime (which
1156 takes a copy of the pointer argument) must be delayed until now. */
1157 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1158 goacc_register (&devices[i]);
1161 free (offload_images);
1162 offload_images = NULL;
1163 num_offload_images = 0;
1166 #else /* PLUGIN_SUPPORT */
1167 /* If dlfcn.h is unavailable we always fallback to host execution.
1168 GOMP_target* routines are just stubs for this case. */
1169 static void
1170 gomp_target_init (void)
1173 #endif /* PLUGIN_SUPPORT */