Account for high/lo_sum simplification with displacements
[official-gcc.git] / libgomp / target.c
blob83ad51108d0158f7f58f82749957fc05c9428bd6
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
39 #ifdef PLUGIN_SUPPORT
40 #include <dlfcn.h>
41 #endif
43 static void gomp_target_init (void);
45 /* The whole initialization code for offloading plugins is only run one. */
46 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
48 /* This structure describes an offload image.
49 It contains type of the target device, pointer to host table descriptor, and
50 pointer to target data. */
51 struct offload_image_descr {
52 enum offload_target_type type;
53 void *host_table;
54 void *target_data;
57 /* Array of descriptors of offload images. */
58 static struct offload_image_descr *offload_images;
60 /* Total number of offload images. */
61 static int num_offload_images;
63 /* Array of descriptors for all available devices. */
64 static struct gomp_device_descr *devices;
66 /* Total number of available devices. */
67 static int num_devices;
69 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
70 static int num_devices_openmp;
72 /* The comparison function. */
74 attribute_hidden int
75 splay_compare (splay_tree_key x, splay_tree_key y)
77 if (x->host_start == x->host_end
78 && y->host_start == y->host_end)
79 return 0;
80 if (x->host_end <= y->host_start)
81 return -1;
82 if (x->host_start >= y->host_end)
83 return 1;
84 return 0;
87 #include "splay-tree.h"
89 attribute_hidden void
90 gomp_init_targets_once (void)
92 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
95 attribute_hidden int
96 gomp_get_num_devices (void)
98 gomp_init_targets_once ();
99 return num_devices_openmp;
102 static struct gomp_device_descr *
103 resolve_device (int device_id)
105 if (device_id == GOMP_DEVICE_ICV)
107 struct gomp_task_icv *icv = gomp_icv (false);
108 device_id = icv->default_device_var;
111 if (device_id < 0 || device_id >= gomp_get_num_devices ())
112 return NULL;
114 return &devices[device_id];
118 /* Handle the case where splay_tree_lookup found oldn for newn.
119 Helper function of gomp_map_vars. */
121 static inline void
122 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
123 unsigned char kind)
125 if ((kind & GOMP_MAP_FLAG_FORCE)
126 || oldn->host_start > newn->host_start
127 || oldn->host_end < newn->host_end)
128 gomp_fatal ("Trying to map into device [%p..%p) object when "
129 "[%p..%p) is already mapped",
130 (void *) newn->host_start, (void *) newn->host_end,
131 (void *) oldn->host_start, (void *) oldn->host_end);
132 oldn->refcount++;
135 static int
136 get_kind (bool is_openacc, void *kinds, int idx)
138 return is_openacc ? ((unsigned short *) kinds)[idx]
139 : ((unsigned char *) kinds)[idx];
142 attribute_hidden struct target_mem_desc *
143 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
144 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
145 bool is_openacc, bool is_target)
147 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
148 const int rshift = is_openacc ? 8 : 3;
149 const int typemask = is_openacc ? 0xff : 0x7;
150 struct gomp_memory_mapping *mm = &devicep->mem_map;
151 struct splay_tree_key_s cur_node;
152 struct target_mem_desc *tgt
153 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
154 tgt->list_count = mapnum;
155 tgt->refcount = 1;
156 tgt->device_descr = devicep;
157 tgt->mem_map = mm;
159 if (mapnum == 0)
160 return tgt;
162 tgt_align = sizeof (void *);
163 tgt_size = 0;
164 if (is_target)
166 size_t align = 4 * sizeof (void *);
167 tgt_align = align;
168 tgt_size = mapnum * sizeof (void *);
171 gomp_mutex_lock (&mm->lock);
173 for (i = 0; i < mapnum; i++)
175 int kind = get_kind (is_openacc, kinds, i);
176 if (hostaddrs[i] == NULL)
178 tgt->list[i] = NULL;
179 continue;
181 cur_node.host_start = (uintptr_t) hostaddrs[i];
182 if (!GOMP_MAP_POINTER_P (kind & typemask))
183 cur_node.host_end = cur_node.host_start + sizes[i];
184 else
185 cur_node.host_end = cur_node.host_start + sizeof (void *);
186 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
187 if (n)
189 tgt->list[i] = n;
190 gomp_map_vars_existing (n, &cur_node, kind & typemask);
192 else
194 tgt->list[i] = NULL;
196 size_t align = (size_t) 1 << (kind >> rshift);
197 not_found_cnt++;
198 if (tgt_align < align)
199 tgt_align = align;
200 tgt_size = (tgt_size + align - 1) & ~(align - 1);
201 tgt_size += cur_node.host_end - cur_node.host_start;
202 if ((kind & typemask) == GOMP_MAP_TO_PSET)
204 size_t j;
205 for (j = i + 1; j < mapnum; j++)
206 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
207 & typemask))
208 break;
209 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
210 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
211 > cur_node.host_end))
212 break;
213 else
215 tgt->list[j] = NULL;
216 i++;
222 if (devaddrs)
224 if (mapnum != 1)
225 gomp_fatal ("unexpected aggregation");
226 tgt->to_free = devaddrs[0];
227 tgt->tgt_start = (uintptr_t) tgt->to_free;
228 tgt->tgt_end = tgt->tgt_start + sizes[0];
230 else if (not_found_cnt || is_target)
232 /* Allocate tgt_align aligned tgt_size block of memory. */
233 /* FIXME: Perhaps change interface to allocate properly aligned
234 memory. */
235 tgt->to_free = devicep->alloc_func (devicep->target_id,
236 tgt_size + tgt_align - 1);
237 tgt->tgt_start = (uintptr_t) tgt->to_free;
238 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
239 tgt->tgt_end = tgt->tgt_start + tgt_size;
241 else
243 tgt->to_free = NULL;
244 tgt->tgt_start = 0;
245 tgt->tgt_end = 0;
248 tgt_size = 0;
249 if (is_target)
250 tgt_size = mapnum * sizeof (void *);
252 tgt->array = NULL;
253 if (not_found_cnt)
255 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
256 splay_tree_node array = tgt->array;
257 size_t j;
259 for (i = 0; i < mapnum; i++)
260 if (tgt->list[i] == NULL)
262 int kind = get_kind (is_openacc, kinds, i);
263 if (hostaddrs[i] == NULL)
264 continue;
265 splay_tree_key k = &array->key;
266 k->host_start = (uintptr_t) hostaddrs[i];
267 if (!GOMP_MAP_POINTER_P (kind & typemask))
268 k->host_end = k->host_start + sizes[i];
269 else
270 k->host_end = k->host_start + sizeof (void *);
271 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
272 if (n)
274 tgt->list[i] = n;
275 gomp_map_vars_existing (n, k, kind & typemask);
277 else
279 size_t align = (size_t) 1 << (kind >> rshift);
280 tgt->list[i] = k;
281 tgt_size = (tgt_size + align - 1) & ~(align - 1);
282 k->tgt = tgt;
283 k->tgt_offset = tgt_size;
284 tgt_size += k->host_end - k->host_start;
285 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
286 k->refcount = 1;
287 k->async_refcount = 0;
288 tgt->refcount++;
289 array->left = NULL;
290 array->right = NULL;
291 splay_tree_insert (&mm->splay_tree, array);
292 switch (kind & typemask)
294 case GOMP_MAP_ALLOC:
295 case GOMP_MAP_FROM:
296 case GOMP_MAP_FORCE_ALLOC:
297 case GOMP_MAP_FORCE_FROM:
298 break;
299 case GOMP_MAP_TO:
300 case GOMP_MAP_TOFROM:
301 case GOMP_MAP_FORCE_TO:
302 case GOMP_MAP_FORCE_TOFROM:
303 /* FIXME: Perhaps add some smarts, like if copying
304 several adjacent fields from host to target, use some
305 host buffer to avoid sending each var individually. */
306 devicep->host2dev_func (devicep->target_id,
307 (void *) (tgt->tgt_start
308 + k->tgt_offset),
309 (void *) k->host_start,
310 k->host_end - k->host_start);
311 break;
312 case GOMP_MAP_POINTER:
313 cur_node.host_start
314 = (uintptr_t) *(void **) k->host_start;
315 if (cur_node.host_start == (uintptr_t) NULL)
317 cur_node.tgt_offset = (uintptr_t) NULL;
318 /* FIXME: see above FIXME comment. */
319 devicep->host2dev_func (devicep->target_id,
320 (void *) (tgt->tgt_start
321 + k->tgt_offset),
322 (void *) &cur_node.tgt_offset,
323 sizeof (void *));
324 break;
326 /* Add bias to the pointer value. */
327 cur_node.host_start += sizes[i];
328 cur_node.host_end = cur_node.host_start + 1;
329 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
330 if (n == NULL)
332 /* Could be possibly zero size array section. */
333 cur_node.host_end--;
334 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
335 if (n == NULL)
337 cur_node.host_start--;
338 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
339 cur_node.host_start++;
342 if (n == NULL)
343 gomp_fatal ("Pointer target of array section "
344 "wasn't mapped");
345 cur_node.host_start -= n->host_start;
346 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
347 + cur_node.host_start;
348 /* At this point tgt_offset is target address of the
349 array section. Now subtract bias to get what we want
350 to initialize the pointer with. */
351 cur_node.tgt_offset -= sizes[i];
352 /* FIXME: see above FIXME comment. */
353 devicep->host2dev_func (devicep->target_id,
354 (void *) (tgt->tgt_start
355 + k->tgt_offset),
356 (void *) &cur_node.tgt_offset,
357 sizeof (void *));
358 break;
359 case GOMP_MAP_TO_PSET:
360 /* FIXME: see above FIXME comment. */
361 devicep->host2dev_func (devicep->target_id,
362 (void *) (tgt->tgt_start
363 + k->tgt_offset),
364 (void *) k->host_start,
365 k->host_end - k->host_start);
367 for (j = i + 1; j < mapnum; j++)
368 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
369 & typemask))
370 break;
371 else if ((uintptr_t) hostaddrs[j] < k->host_start
372 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
373 > k->host_end))
374 break;
375 else
377 tgt->list[j] = k;
378 k->refcount++;
379 cur_node.host_start
380 = (uintptr_t) *(void **) hostaddrs[j];
381 if (cur_node.host_start == (uintptr_t) NULL)
383 cur_node.tgt_offset = (uintptr_t) NULL;
384 /* FIXME: see above FIXME comment. */
385 devicep->host2dev_func (devicep->target_id,
386 (void *) (tgt->tgt_start + k->tgt_offset
387 + ((uintptr_t) hostaddrs[j]
388 - k->host_start)),
389 (void *) &cur_node.tgt_offset,
390 sizeof (void *));
391 i++;
392 continue;
394 /* Add bias to the pointer value. */
395 cur_node.host_start += sizes[j];
396 cur_node.host_end = cur_node.host_start + 1;
397 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
398 if (n == NULL)
400 /* Could be possibly zero size array section. */
401 cur_node.host_end--;
402 n = splay_tree_lookup (&mm->splay_tree,
403 &cur_node);
404 if (n == NULL)
406 cur_node.host_start--;
407 n = splay_tree_lookup (&mm->splay_tree,
408 &cur_node);
409 cur_node.host_start++;
412 if (n == NULL)
413 gomp_fatal ("Pointer target of array section "
414 "wasn't mapped");
415 cur_node.host_start -= n->host_start;
416 cur_node.tgt_offset = n->tgt->tgt_start
417 + n->tgt_offset
418 + cur_node.host_start;
419 /* At this point tgt_offset is target address of the
420 array section. Now subtract bias to get what we
421 want to initialize the pointer with. */
422 cur_node.tgt_offset -= sizes[j];
423 /* FIXME: see above FIXME comment. */
424 devicep->host2dev_func (devicep->target_id,
425 (void *) (tgt->tgt_start + k->tgt_offset
426 + ((uintptr_t) hostaddrs[j]
427 - k->host_start)),
428 (void *) &cur_node.tgt_offset,
429 sizeof (void *));
430 i++;
432 break;
433 case GOMP_MAP_FORCE_PRESENT:
435 /* We already looked up the memory region above and it
436 was missing. */
437 size_t size = k->host_end - k->host_start;
438 gomp_fatal ("present clause: !acc_is_present (%p, "
439 "%zd (0x%zx))", (void *) k->host_start,
440 size, size);
442 break;
443 case GOMP_MAP_FORCE_DEVICEPTR:
444 assert (k->host_end - k->host_start == sizeof (void *));
446 devicep->host2dev_func (devicep->target_id,
447 (void *) (tgt->tgt_start
448 + k->tgt_offset),
449 (void *) k->host_start,
450 sizeof (void *));
451 break;
452 default:
453 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
454 kind);
456 array++;
461 if (is_target)
463 for (i = 0; i < mapnum; i++)
465 if (tgt->list[i] == NULL)
466 cur_node.tgt_offset = (uintptr_t) NULL;
467 else
468 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
469 + tgt->list[i]->tgt_offset;
470 /* FIXME: see above FIXME comment. */
471 devicep->host2dev_func (devicep->target_id,
472 (void *) (tgt->tgt_start
473 + i * sizeof (void *)),
474 (void *) &cur_node.tgt_offset,
475 sizeof (void *));
479 gomp_mutex_unlock (&mm->lock);
480 return tgt;
483 static void
484 gomp_unmap_tgt (struct target_mem_desc *tgt)
486 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
487 if (tgt->tgt_end)
488 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
490 free (tgt->array);
491 free (tgt);
494 /* Decrease the refcount for a set of mapped variables, and queue asychronous
495 copies from the device back to the host after any work that has been issued.
496 Because the regions are still "live", increment an asynchronous reference
497 count to indicate that they should not be unmapped from host-side data
498 structures until the asynchronous copy has completed. */
500 attribute_hidden void
501 gomp_copy_from_async (struct target_mem_desc *tgt)
503 struct gomp_device_descr *devicep = tgt->device_descr;
504 struct gomp_memory_mapping *mm = tgt->mem_map;
505 size_t i;
507 gomp_mutex_lock (&mm->lock);
509 for (i = 0; i < tgt->list_count; i++)
510 if (tgt->list[i] == NULL)
512 else if (tgt->list[i]->refcount > 1)
514 tgt->list[i]->refcount--;
515 tgt->list[i]->async_refcount++;
517 else
519 splay_tree_key k = tgt->list[i];
520 if (k->copy_from)
521 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
522 (void *) (k->tgt->tgt_start + k->tgt_offset),
523 k->host_end - k->host_start);
526 gomp_mutex_unlock (&mm->lock);
529 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
530 variables back from device to host: if it is false, it is assumed that this
531 has been done already, i.e. by gomp_copy_from_async above. */
533 attribute_hidden void
534 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
536 struct gomp_device_descr *devicep = tgt->device_descr;
537 struct gomp_memory_mapping *mm = tgt->mem_map;
539 if (tgt->list_count == 0)
541 free (tgt);
542 return;
545 gomp_mutex_lock (&mm->lock);
547 size_t i;
548 for (i = 0; i < tgt->list_count; i++)
549 if (tgt->list[i] == NULL)
551 else if (tgt->list[i]->refcount > 1)
552 tgt->list[i]->refcount--;
553 else if (tgt->list[i]->async_refcount > 0)
554 tgt->list[i]->async_refcount--;
555 else
557 splay_tree_key k = tgt->list[i];
558 if (k->copy_from && do_copyfrom)
559 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560 (void *) (k->tgt->tgt_start + k->tgt_offset),
561 k->host_end - k->host_start);
562 splay_tree_remove (&mm->splay_tree, k);
563 if (k->tgt->refcount > 1)
564 k->tgt->refcount--;
565 else
566 gomp_unmap_tgt (k->tgt);
569 if (tgt->refcount > 1)
570 tgt->refcount--;
571 else
572 gomp_unmap_tgt (tgt);
574 gomp_mutex_unlock (&mm->lock);
577 static void
578 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
579 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
580 bool is_openacc)
582 size_t i;
583 struct splay_tree_key_s cur_node;
584 const int typemask = is_openacc ? 0xff : 0x7;
586 if (!devicep)
587 return;
589 if (mapnum == 0)
590 return;
592 gomp_mutex_lock (&mm->lock);
593 for (i = 0; i < mapnum; i++)
594 if (sizes[i])
596 cur_node.host_start = (uintptr_t) hostaddrs[i];
597 cur_node.host_end = cur_node.host_start + sizes[i];
598 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
599 &cur_node);
600 if (n)
602 int kind = get_kind (is_openacc, kinds, i);
603 if (n->host_start > cur_node.host_start
604 || n->host_end < cur_node.host_end)
605 gomp_fatal ("Trying to update [%p..%p) object when"
606 "only [%p..%p) is mapped",
607 (void *) cur_node.host_start,
608 (void *) cur_node.host_end,
609 (void *) n->host_start,
610 (void *) n->host_end);
611 if (GOMP_MAP_COPY_TO_P (kind & typemask))
612 devicep->host2dev_func (devicep->target_id,
613 (void *) (n->tgt->tgt_start
614 + n->tgt_offset
615 + cur_node.host_start
616 - n->host_start),
617 (void *) cur_node.host_start,
618 cur_node.host_end - cur_node.host_start);
619 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
620 devicep->dev2host_func (devicep->target_id,
621 (void *) cur_node.host_start,
622 (void *) (n->tgt->tgt_start
623 + n->tgt_offset
624 + cur_node.host_start
625 - n->host_start),
626 cur_node.host_end - cur_node.host_start);
628 else
629 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
630 (void *) cur_node.host_start,
631 (void *) cur_node.host_end);
633 gomp_mutex_unlock (&mm->lock);
636 /* This function should be called from every offload image.
637 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
638 the target, and TARGET_DATA needed by target plugin. */
640 void
641 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
642 void *target_data)
644 offload_images = gomp_realloc (offload_images,
645 (num_offload_images + 1)
646 * sizeof (struct offload_image_descr));
648 offload_images[num_offload_images].type = target_type;
649 offload_images[num_offload_images].host_table = host_table;
650 offload_images[num_offload_images].target_data = target_data;
652 num_offload_images++;
655 /* This function initializes the target device, specified by DEVICEP. DEVICEP
656 must be locked on entry, and remains locked on return. */
658 attribute_hidden void
659 gomp_init_device (struct gomp_device_descr *devicep)
661 devicep->init_device_func (devicep->target_id);
662 devicep->is_initialized = true;
665 /* Initialize address mapping tables. MM must be locked on entry, and remains
666 locked on return. */
668 attribute_hidden void
669 gomp_init_tables (struct gomp_device_descr *devicep,
670 struct gomp_memory_mapping *mm)
672 /* Get address mapping table for device. */
673 struct mapping_table *table = NULL;
674 int num_entries = devicep->get_table_func (devicep->target_id, &table);
676 /* Insert host-target address mapping into dev_splay_tree. */
677 int i;
678 for (i = 0; i < num_entries; i++)
680 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
681 tgt->refcount = 1;
682 tgt->array = gomp_malloc (sizeof (*tgt->array));
683 tgt->tgt_start = table[i].tgt_start;
684 tgt->tgt_end = table[i].tgt_end;
685 tgt->to_free = NULL;
686 tgt->list_count = 0;
687 tgt->device_descr = devicep;
688 splay_tree_node node = tgt->array;
689 splay_tree_key k = &node->key;
690 k->host_start = table[i].host_start;
691 k->host_end = table[i].host_end;
692 k->tgt_offset = 0;
693 k->refcount = 1;
694 k->copy_from = false;
695 k->tgt = tgt;
696 node->left = NULL;
697 node->right = NULL;
698 splay_tree_insert (&mm->splay_tree, node);
701 free (table);
702 mm->is_initialized = true;
705 /* Free address mapping tables. MM must be locked on entry, and remains locked
706 on return. */
708 attribute_hidden void
709 gomp_free_memmap (struct gomp_memory_mapping *mm)
711 while (mm->splay_tree.root)
713 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
715 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
716 free (tgt->array);
717 free (tgt);
720 mm->is_initialized = false;
723 /* This function de-initializes the target device, specified by DEVICEP.
724 DEVICEP must be locked on entry, and remains locked on return. */
726 attribute_hidden void
727 gomp_fini_device (struct gomp_device_descr *devicep)
729 if (devicep->is_initialized)
730 devicep->fini_device_func (devicep->target_id);
732 devicep->is_initialized = false;
735 /* Called when encountering a target directive. If DEVICE
736 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
737 GOMP_DEVICE_HOST_FALLBACK (or any value
738 larger than last available hw device), use host fallback.
739 FN is address of host code, OFFLOAD_TABLE contains value of the
740 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
741 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
742 with MAPNUM entries, with addresses of the host objects,
743 sizes of the host objects (resp. for pointer kind pointer bias
744 and assumed sizeof (void *) size) and kinds. */
746 void
747 GOMP_target (int device, void (*fn) (void *), const void *offload_table,
748 size_t mapnum, void **hostaddrs, size_t *sizes,
749 unsigned char *kinds)
751 struct gomp_device_descr *devicep = resolve_device (device);
753 if (devicep == NULL
754 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
756 /* Host fallback. */
757 struct gomp_thread old_thr, *thr = gomp_thread ();
758 old_thr = *thr;
759 memset (thr, '\0', sizeof (*thr));
760 if (gomp_places_list)
762 thr->place = old_thr.place;
763 thr->ts.place_partition_len = gomp_places_list_len;
765 fn (hostaddrs);
766 gomp_free_thread (thr);
767 *thr = old_thr;
768 return;
771 gomp_mutex_lock (&devicep->lock);
772 if (!devicep->is_initialized)
773 gomp_init_device (devicep);
774 gomp_mutex_unlock (&devicep->lock);
776 void *fn_addr;
778 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
779 fn_addr = (void *) fn;
780 else
782 struct gomp_memory_mapping *mm = &devicep->mem_map;
783 gomp_mutex_lock (&mm->lock);
785 if (!mm->is_initialized)
786 gomp_init_tables (devicep, mm);
788 struct splay_tree_key_s k;
789 k.host_start = (uintptr_t) fn;
790 k.host_end = k.host_start + 1;
791 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
792 if (tgt_fn == NULL)
793 gomp_fatal ("Target function wasn't mapped");
795 gomp_mutex_unlock (&mm->lock);
797 fn_addr = (void *) tgt_fn->tgt->tgt_start;
800 struct target_mem_desc *tgt_vars
801 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
802 true);
803 struct gomp_thread old_thr, *thr = gomp_thread ();
804 old_thr = *thr;
805 memset (thr, '\0', sizeof (*thr));
806 if (gomp_places_list)
808 thr->place = old_thr.place;
809 thr->ts.place_partition_len = gomp_places_list_len;
811 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
812 gomp_free_thread (thr);
813 *thr = old_thr;
814 gomp_unmap_vars (tgt_vars, true);
817 void
818 GOMP_target_data (int device, const void *offload_table, size_t mapnum,
819 void **hostaddrs, size_t *sizes, unsigned char *kinds)
821 struct gomp_device_descr *devicep = resolve_device (device);
823 if (devicep == NULL
824 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
826 /* Host fallback. */
827 struct gomp_task_icv *icv = gomp_icv (false);
828 if (icv->target_data)
830 /* Even when doing a host fallback, if there are any active
831 #pragma omp target data constructs, need to remember the
832 new #pragma omp target data, otherwise GOMP_target_end_data
833 would get out of sync. */
834 struct target_mem_desc *tgt
835 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
836 tgt->prev = icv->target_data;
837 icv->target_data = tgt;
839 return;
842 gomp_mutex_lock (&devicep->lock);
843 if (!devicep->is_initialized)
844 gomp_init_device (devicep);
845 gomp_mutex_unlock (&devicep->lock);
847 struct gomp_memory_mapping *mm = &devicep->mem_map;
848 gomp_mutex_lock (&mm->lock);
849 if (!mm->is_initialized)
850 gomp_init_tables (devicep, mm);
851 gomp_mutex_unlock (&mm->lock);
853 struct target_mem_desc *tgt
854 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
855 false);
856 struct gomp_task_icv *icv = gomp_icv (true);
857 tgt->prev = icv->target_data;
858 icv->target_data = tgt;
861 void
862 GOMP_target_end_data (void)
864 struct gomp_task_icv *icv = gomp_icv (false);
865 if (icv->target_data)
867 struct target_mem_desc *tgt = icv->target_data;
868 icv->target_data = tgt->prev;
869 gomp_unmap_vars (tgt, true);
873 void
874 GOMP_target_update (int device, const void *offload_table, size_t mapnum,
875 void **hostaddrs, size_t *sizes, unsigned char *kinds)
877 struct gomp_device_descr *devicep = resolve_device (device);
879 if (devicep == NULL
880 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
881 return;
883 gomp_mutex_lock (&devicep->lock);
884 if (!devicep->is_initialized)
885 gomp_init_device (devicep);
886 gomp_mutex_unlock (&devicep->lock);
888 struct gomp_memory_mapping *mm = &devicep->mem_map;
889 gomp_mutex_lock (&mm->lock);
890 if (!mm->is_initialized)
891 gomp_init_tables (devicep, mm);
892 gomp_mutex_unlock (&mm->lock);
894 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
897 void
898 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
900 if (thread_limit)
902 struct gomp_task_icv *icv = gomp_icv (true);
903 icv->thread_limit_var
904 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
906 (void) num_teams;
909 #ifdef PLUGIN_SUPPORT
911 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
912 in PLUGIN_NAME.
913 The handles of the found functions are stored in the corresponding fields
914 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
916 static bool
917 gomp_load_plugin_for_device (struct gomp_device_descr *device,
918 const char *plugin_name)
920 char *err = NULL, *last_missing = NULL;
921 int optional_present, optional_total;
923 /* Clear any existing error. */
924 dlerror ();
926 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
927 if (!plugin_handle)
929 err = dlerror ();
930 goto out;
933 /* Check if all required functions are available in the plugin and store
934 their handlers. */
935 #define DLSYM(f) \
936 do \
938 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
939 err = dlerror (); \
940 if (err != NULL) \
941 goto out; \
943 while (0)
944 /* Similar, but missing functions are not an error. */
945 #define DLSYM_OPT(f, n) \
946 do \
948 char *tmp_err; \
949 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
950 tmp_err = dlerror (); \
951 if (tmp_err == NULL) \
952 optional_present++; \
953 else \
954 last_missing = #n; \
955 optional_total++; \
957 while (0)
959 DLSYM (get_name);
960 DLSYM (get_caps);
961 DLSYM (get_type);
962 DLSYM (get_num_devices);
963 DLSYM (register_image);
964 DLSYM (init_device);
965 DLSYM (fini_device);
966 DLSYM (get_table);
967 DLSYM (alloc);
968 DLSYM (free);
969 DLSYM (dev2host);
970 DLSYM (host2dev);
971 device->capabilities = device->get_caps_func ();
972 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
973 DLSYM (run);
974 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
976 optional_present = optional_total = 0;
977 DLSYM_OPT (openacc.exec, openacc_parallel);
978 DLSYM_OPT (openacc.open_device, openacc_open_device);
979 DLSYM_OPT (openacc.close_device, openacc_close_device);
980 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
981 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
982 DLSYM_OPT (openacc.register_async_cleanup,
983 openacc_register_async_cleanup);
984 DLSYM_OPT (openacc.async_test, openacc_async_test);
985 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
986 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
987 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
988 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
989 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
990 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
991 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
992 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
993 /* Require all the OpenACC handlers if we have
994 GOMP_OFFLOAD_CAP_OPENACC_200. */
995 if (optional_present != optional_total)
997 err = "plugin missing OpenACC handler function";
998 goto out;
1000 optional_present = optional_total = 0;
1001 DLSYM_OPT (openacc.cuda.get_current_device,
1002 openacc_get_current_cuda_device);
1003 DLSYM_OPT (openacc.cuda.get_current_context,
1004 openacc_get_current_cuda_context);
1005 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1006 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1007 /* Make sure all the CUDA functions are there if any of them are. */
1008 if (optional_present && optional_present != optional_total)
1010 err = "plugin missing OpenACC CUDA handler function";
1011 goto out;
1014 #undef DLSYM
1015 #undef DLSYM_OPT
1017 out:
1018 if (err != NULL)
1020 gomp_error ("while loading %s: %s", plugin_name, err);
1021 if (last_missing)
1022 gomp_error ("missing function was %s", last_missing);
1023 if (plugin_handle)
1024 dlclose (plugin_handle);
1026 return err == NULL;
1029 /* This function adds a compatible offload image IMAGE to an accelerator device
1030 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1032 static void
1033 gomp_register_image_for_device (struct gomp_device_descr *device,
1034 struct offload_image_descr *image)
1036 if (!device->offload_regions_registered
1037 && (device->type == image->type
1038 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1040 device->register_image_func (image->host_table, image->target_data);
1041 device->offload_regions_registered = true;
1045 /* This function initializes the runtime needed for offloading.
1046 It parses the list of offload targets and tries to load the plugins for
1047 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1048 will be set, and the array DEVICES initialized, containing descriptors for
1049 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1050 by the others. */
1052 static void
1053 gomp_target_init (void)
1055 const char *prefix ="libgomp-plugin-";
1056 const char *suffix = ".so.1";
1057 const char *cur, *next;
1058 char *plugin_name;
1059 int i, new_num_devices;
1061 num_devices = 0;
1062 devices = NULL;
1064 cur = OFFLOAD_TARGETS;
1065 if (*cur)
1068 struct gomp_device_descr current_device;
1070 next = strchr (cur, ',');
1072 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1073 + strlen (prefix) + strlen (suffix));
1074 if (!plugin_name)
1076 num_devices = 0;
1077 break;
1080 strcpy (plugin_name, prefix);
1081 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1082 strcat (plugin_name, suffix);
1084 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1086 new_num_devices = current_device.get_num_devices_func ();
1087 if (new_num_devices >= 1)
1089 /* Augment DEVICES and NUM_DEVICES. */
1091 devices = realloc (devices, (num_devices + new_num_devices)
1092 * sizeof (struct gomp_device_descr));
1093 if (!devices)
1095 num_devices = 0;
1096 free (plugin_name);
1097 break;
1100 current_device.name = current_device.get_name_func ();
1101 /* current_device.capabilities has already been set. */
1102 current_device.type = current_device.get_type_func ();
1103 current_device.mem_map.is_initialized = false;
1104 current_device.mem_map.splay_tree.root = NULL;
1105 current_device.is_initialized = false;
1106 current_device.offload_regions_registered = false;
1107 current_device.openacc.data_environ = NULL;
1108 current_device.openacc.target_data = NULL;
1109 for (i = 0; i < new_num_devices; i++)
1111 current_device.target_id = i;
1112 devices[num_devices] = current_device;
1113 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1114 gomp_mutex_init (&devices[num_devices].lock);
1115 num_devices++;
1120 free (plugin_name);
1121 cur = next + 1;
1123 while (next);
1125 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1126 NUM_DEVICES_OPENMP. */
1127 struct gomp_device_descr *devices_s
1128 = malloc (num_devices * sizeof (struct gomp_device_descr));
1129 if (!devices_s)
1131 num_devices = 0;
1132 free (devices);
1133 devices = NULL;
1135 num_devices_openmp = 0;
1136 for (i = 0; i < num_devices; i++)
1137 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1138 devices_s[num_devices_openmp++] = devices[i];
1139 int num_devices_after_openmp = num_devices_openmp;
1140 for (i = 0; i < num_devices; i++)
1141 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1142 devices_s[num_devices_after_openmp++] = devices[i];
1143 free (devices);
1144 devices = devices_s;
1146 for (i = 0; i < num_devices; i++)
1148 int j;
1150 for (j = 0; j < num_offload_images; j++)
1151 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1153 /* The 'devices' array can be moved (by the realloc call) until we have
1154 found all the plugins, so registering with the OpenACC runtime (which
1155 takes a copy of the pointer argument) must be delayed until now. */
1156 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1157 goacc_register (&devices[i]);
1160 free (offload_images);
1161 offload_images = NULL;
1162 num_offload_images = 0;
1165 #else /* PLUGIN_SUPPORT */
1166 /* If dlfcn.h is unavailable we always fallback to host execution.
1167 GOMP_target* routines are just stubs for this case. */
1168 static void
1169 gomp_target_init (void)
1172 #endif /* PLUGIN_SUPPORT */