libgomp: Fix locking in OpenMP GOMP_target* functions.
[official-gcc.git] / libgomp / target.c
blob53e0c7f46f768b1b2e4e4ae0f1dd5a5a768051a6
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "libgomp_target.h"
31 #include "oacc-plugin.h"
32 #include "oacc-int.h"
33 #include "gomp-constants.h"
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <assert.h>
41 #ifdef PLUGIN_SUPPORT
42 #include <dlfcn.h>
43 #endif
45 static void gomp_target_init (void);
47 /* The whole initialization code for offloading plugins is only run one. */
48 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
50 /* This structure describes an offload image.
51 It contains type of the target device, pointer to host table descriptor, and
52 pointer to target data. */
53 struct offload_image_descr {
54 enum offload_target_type type;
55 void *host_table;
56 void *target_data;
59 /* Array of descriptors of offload images. */
60 static struct offload_image_descr *offload_images;
62 /* Total number of offload images. */
63 static int num_offload_images;
65 /* Array of descriptors for all available devices. */
66 static struct gomp_device_descr *devices;
68 /* Total number of available devices. */
69 static int num_devices;
71 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
72 static int num_devices_openmp;
74 /* The comparison function. */
76 attribute_hidden int
77 splay_compare (splay_tree_key x, splay_tree_key y)
79 if (x->host_start == x->host_end
80 && y->host_start == y->host_end)
81 return 0;
82 if (x->host_end <= y->host_start)
83 return -1;
84 if (x->host_start >= y->host_end)
85 return 1;
86 return 0;
89 #include "splay-tree.h"
91 attribute_hidden void
92 gomp_init_targets_once (void)
94 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
97 attribute_hidden int
98 gomp_get_num_devices (void)
100 gomp_init_targets_once ();
101 return num_devices_openmp;
104 static struct gomp_device_descr *
105 resolve_device (int device_id)
107 if (device_id == GOMP_DEVICE_ICV)
109 struct gomp_task_icv *icv = gomp_icv (false);
110 device_id = icv->default_device_var;
113 if (device_id < 0 || device_id >= gomp_get_num_devices ())
114 return NULL;
116 return &devices[device_id];
120 /* Handle the case where splay_tree_lookup found oldn for newn.
121 Helper function of gomp_map_vars. */
123 static inline void
124 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
125 unsigned char kind)
127 if ((kind & GOMP_MAP_FLAG_FORCE)
128 || oldn->host_start > newn->host_start
129 || oldn->host_end < newn->host_end)
130 gomp_fatal ("Trying to map into device [%p..%p) object when "
131 "[%p..%p) is already mapped",
132 (void *) newn->host_start, (void *) newn->host_end,
133 (void *) oldn->host_start, (void *) oldn->host_end);
134 oldn->refcount++;
137 static int
138 get_kind (bool is_openacc, void *kinds, int idx)
140 return is_openacc ? ((unsigned short *) kinds)[idx]
141 : ((unsigned char *) kinds)[idx];
144 attribute_hidden struct target_mem_desc *
145 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
146 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
147 bool is_openacc, bool is_target)
149 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
150 const int rshift = is_openacc ? 8 : 3;
151 const int typemask = is_openacc ? 0xff : 0x7;
152 struct gomp_memory_mapping *mm = &devicep->mem_map;
153 struct splay_tree_key_s cur_node;
154 struct target_mem_desc *tgt
155 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
156 tgt->list_count = mapnum;
157 tgt->refcount = 1;
158 tgt->device_descr = devicep;
159 tgt->mem_map = mm;
161 if (mapnum == 0)
162 return tgt;
164 tgt_align = sizeof (void *);
165 tgt_size = 0;
166 if (is_target)
168 size_t align = 4 * sizeof (void *);
169 tgt_align = align;
170 tgt_size = mapnum * sizeof (void *);
172 gomp_mutex_lock (&mm->lock);
174 for (i = 0; i < mapnum; i++)
176 int kind = get_kind (is_openacc, kinds, i);
177 if (hostaddrs[i] == NULL)
179 tgt->list[i] = NULL;
180 continue;
182 cur_node.host_start = (uintptr_t) hostaddrs[i];
183 if (!GOMP_MAP_POINTER_P (kind & typemask))
184 cur_node.host_end = cur_node.host_start + sizes[i];
185 else
186 cur_node.host_end = cur_node.host_start + sizeof (void *);
187 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
188 if (n)
190 tgt->list[i] = n;
191 gomp_map_vars_existing (n, &cur_node, kind & typemask);
193 else
195 tgt->list[i] = NULL;
197 size_t align = (size_t) 1 << (kind >> rshift);
198 not_found_cnt++;
199 if (tgt_align < align)
200 tgt_align = align;
201 tgt_size = (tgt_size + align - 1) & ~(align - 1);
202 tgt_size += cur_node.host_end - cur_node.host_start;
203 if ((kind & typemask) == GOMP_MAP_TO_PSET)
205 size_t j;
206 for (j = i + 1; j < mapnum; j++)
207 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
208 & typemask))
209 break;
210 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
211 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
212 > cur_node.host_end))
213 break;
214 else
216 tgt->list[j] = NULL;
217 i++;
223 if (devaddrs)
225 if (mapnum != 1)
226 gomp_fatal ("unexpected aggregation");
227 tgt->to_free = devaddrs[0];
228 tgt->tgt_start = (uintptr_t) tgt->to_free;
229 tgt->tgt_end = tgt->tgt_start + sizes[0];
231 else if (not_found_cnt || is_target)
233 /* Allocate tgt_align aligned tgt_size block of memory. */
234 /* FIXME: Perhaps change interface to allocate properly aligned
235 memory. */
236 tgt->to_free = devicep->alloc_func (devicep->target_id,
237 tgt_size + tgt_align - 1);
238 tgt->tgt_start = (uintptr_t) tgt->to_free;
239 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
240 tgt->tgt_end = tgt->tgt_start + tgt_size;
242 else
244 tgt->to_free = NULL;
245 tgt->tgt_start = 0;
246 tgt->tgt_end = 0;
249 tgt_size = 0;
250 if (is_target)
251 tgt_size = mapnum * sizeof (void *);
253 tgt->array = NULL;
254 if (not_found_cnt)
256 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
257 splay_tree_node array = tgt->array;
258 size_t j;
260 for (i = 0; i < mapnum; i++)
261 if (tgt->list[i] == NULL)
263 int kind = get_kind (is_openacc, kinds, i);
264 if (hostaddrs[i] == NULL)
265 continue;
266 splay_tree_key k = &array->key;
267 k->host_start = (uintptr_t) hostaddrs[i];
268 if (!GOMP_MAP_POINTER_P (kind & typemask))
269 k->host_end = k->host_start + sizes[i];
270 else
271 k->host_end = k->host_start + sizeof (void *);
272 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
273 if (n)
275 tgt->list[i] = n;
276 gomp_map_vars_existing (n, k, kind & typemask);
278 else
280 size_t align = (size_t) 1 << (kind >> rshift);
281 tgt->list[i] = k;
282 tgt_size = (tgt_size + align - 1) & ~(align - 1);
283 k->tgt = tgt;
284 k->tgt_offset = tgt_size;
285 tgt_size += k->host_end - k->host_start;
286 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
287 k->refcount = 1;
288 k->async_refcount = 0;
289 tgt->refcount++;
290 array->left = NULL;
291 array->right = NULL;
293 splay_tree_insert (&mm->splay_tree, array);
295 switch (kind & typemask)
297 case GOMP_MAP_ALLOC:
298 case GOMP_MAP_FROM:
299 case GOMP_MAP_FORCE_ALLOC:
300 case GOMP_MAP_FORCE_FROM:
301 break;
302 case GOMP_MAP_TO:
303 case GOMP_MAP_TOFROM:
304 case GOMP_MAP_FORCE_TO:
305 case GOMP_MAP_FORCE_TOFROM:
306 /* Copy from host to device memory. */
307 /* FIXME: Perhaps add some smarts, like if copying
308 several adjacent fields from host to target, use some
309 host buffer to avoid sending each var individually. */
310 devicep->host2dev_func (devicep->target_id,
311 (void *) (tgt->tgt_start
312 + k->tgt_offset),
313 (void *) k->host_start,
314 k->host_end - k->host_start);
315 break;
316 case GOMP_MAP_POINTER:
317 cur_node.host_start
318 = (uintptr_t) *(void **) k->host_start;
319 if (cur_node.host_start == (uintptr_t) NULL)
321 cur_node.tgt_offset = (uintptr_t) NULL;
322 /* Copy from host to device memory. */
323 /* FIXME: see above FIXME comment. */
324 devicep->host2dev_func (devicep->target_id,
325 (void *) (tgt->tgt_start
326 + k->tgt_offset),
327 (void *) &cur_node.tgt_offset,
328 sizeof (void *));
329 break;
331 /* Add bias to the pointer value. */
332 cur_node.host_start += sizes[i];
333 cur_node.host_end = cur_node.host_start + 1;
334 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
335 if (n == NULL)
337 /* Could be possibly zero size array section. */
338 cur_node.host_end--;
339 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
340 if (n == NULL)
342 cur_node.host_start--;
343 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
344 cur_node.host_start++;
347 if (n == NULL)
348 gomp_fatal ("Pointer target of array section "
349 "wasn't mapped");
351 cur_node.host_start -= n->host_start;
352 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
353 + cur_node.host_start;
354 /* At this point tgt_offset is target address of the
355 array section. Now subtract bias to get what we want
356 to initialize the pointer with. */
357 cur_node.tgt_offset -= sizes[i];
358 /* Copy from host to device memory. */
359 /* FIXME: see above FIXME comment. */
360 devicep->host2dev_func (devicep->target_id,
361 (void *) (tgt->tgt_start
362 + k->tgt_offset),
363 (void *) &cur_node.tgt_offset,
364 sizeof (void *));
365 break;
366 case GOMP_MAP_TO_PSET:
367 /* Copy from host to device memory. */
368 /* FIXME: see above FIXME comment. */
369 devicep->host2dev_func (devicep->target_id,
370 (void *) (tgt->tgt_start
371 + k->tgt_offset),
372 (void *) k->host_start,
373 k->host_end - k->host_start);
375 for (j = i + 1; j < mapnum; j++)
376 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
377 & typemask))
378 break;
379 else if ((uintptr_t) hostaddrs[j] < k->host_start
380 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
381 > k->host_end))
382 break;
383 else
385 tgt->list[j] = k;
386 k->refcount++;
387 cur_node.host_start
388 = (uintptr_t) *(void **) hostaddrs[j];
389 if (cur_node.host_start == (uintptr_t) NULL)
391 cur_node.tgt_offset = (uintptr_t) NULL;
392 /* Copy from host to device memory. */
393 /* FIXME: see above FIXME comment. */
394 devicep->host2dev_func (devicep->target_id,
395 (void *) (tgt->tgt_start + k->tgt_offset
396 + ((uintptr_t) hostaddrs[j]
397 - k->host_start)),
398 (void *) &cur_node.tgt_offset,
399 sizeof (void *));
400 i++;
401 continue;
403 /* Add bias to the pointer value. */
404 cur_node.host_start += sizes[j];
405 cur_node.host_end = cur_node.host_start + 1;
406 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
407 if (n == NULL)
409 /* Could be possibly zero size array section. */
410 cur_node.host_end--;
411 n = splay_tree_lookup (&mm->splay_tree,
412 &cur_node);
413 if (n == NULL)
415 cur_node.host_start--;
416 n = splay_tree_lookup (&mm->splay_tree,
417 &cur_node);
418 cur_node.host_start++;
421 if (n == NULL)
422 gomp_fatal ("Pointer target of array section "
423 "wasn't mapped");
424 cur_node.host_start -= n->host_start;
425 cur_node.tgt_offset = n->tgt->tgt_start
426 + n->tgt_offset
427 + cur_node.host_start;
428 /* At this point tgt_offset is target address of the
429 array section. Now subtract bias to get what we
430 want to initialize the pointer with. */
431 cur_node.tgt_offset -= sizes[j];
432 /* Copy from host to device memory. */
433 /* FIXME: see above FIXME comment. */
434 devicep->host2dev_func (devicep->target_id,
435 (void *) (tgt->tgt_start + k->tgt_offset
436 + ((uintptr_t) hostaddrs[j]
437 - k->host_start)),
438 (void *) &cur_node.tgt_offset,
439 sizeof (void *));
440 i++;
442 break;
443 case GOMP_MAP_FORCE_PRESENT:
445 /* We already looked up the memory region above and it
446 was missing. */
447 size_t size = k->host_end - k->host_start;
448 gomp_fatal ("present clause: !acc_is_present (%p, "
449 "%zd (0x%zx))", (void *) k->host_start,
450 size, size);
452 break;
453 case GOMP_MAP_FORCE_DEVICEPTR:
454 assert (k->host_end - k->host_start == sizeof (void *));
456 devicep->host2dev_func (devicep->target_id,
457 (void *) (tgt->tgt_start
458 + k->tgt_offset),
459 (void *) k->host_start,
460 sizeof (void *));
461 break;
462 default:
463 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
464 kind);
466 array++;
471 if (is_target)
473 for (i = 0; i < mapnum; i++)
475 if (tgt->list[i] == NULL)
476 cur_node.tgt_offset = (uintptr_t) NULL;
477 else
478 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
479 + tgt->list[i]->tgt_offset;
480 /* Copy from host to device memory. */
481 /* FIXME: see above FIXME comment. */
482 devicep->host2dev_func (devicep->target_id,
483 (void *) (tgt->tgt_start
484 + i * sizeof (void *)),
485 (void *) &cur_node.tgt_offset,
486 sizeof (void *));
490 gomp_mutex_unlock (&mm->lock);
491 return tgt;
494 static void
495 gomp_unmap_tgt (struct target_mem_desc *tgt)
497 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
498 if (tgt->tgt_end)
499 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
501 free (tgt->array);
502 free (tgt);
505 /* Decrease the refcount for a set of mapped variables, and queue asychronous
506 copies from the device back to the host after any work that has been issued.
507 Because the regions are still "live", increment an asynchronous reference
508 count to indicate that they should not be unmapped from host-side data
509 structures until the asynchronous copy has completed. */
511 attribute_hidden void
512 gomp_copy_from_async (struct target_mem_desc *tgt)
514 struct gomp_device_descr *devicep = tgt->device_descr;
515 struct gomp_memory_mapping *mm = tgt->mem_map;
516 size_t i;
518 gomp_mutex_lock (&mm->lock);
520 for (i = 0; i < tgt->list_count; i++)
521 if (tgt->list[i] == NULL)
523 else if (tgt->list[i]->refcount > 1)
525 tgt->list[i]->refcount--;
526 tgt->list[i]->async_refcount++;
528 else
530 splay_tree_key k = tgt->list[i];
531 if (k->copy_from)
532 /* Copy from device to host memory. */
533 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
534 (void *) (k->tgt->tgt_start + k->tgt_offset),
535 k->host_end - k->host_start);
538 gomp_mutex_unlock (&mm->lock);
541 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
542 variables back from device to host: if it is false, it is assumed that this
543 has been done already, i.e. by gomp_copy_from_async above. */
545 attribute_hidden void
546 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
548 struct gomp_device_descr *devicep = tgt->device_descr;
549 struct gomp_memory_mapping *mm = tgt->mem_map;
551 if (tgt->list_count == 0)
553 free (tgt);
554 return;
557 gomp_mutex_lock (&mm->lock);
559 size_t i;
560 for (i = 0; i < tgt->list_count; i++)
561 if (tgt->list[i] == NULL)
563 else if (tgt->list[i]->refcount > 1)
564 tgt->list[i]->refcount--;
565 else if (tgt->list[i]->async_refcount > 0)
566 tgt->list[i]->async_refcount--;
567 else
569 splay_tree_key k = tgt->list[i];
570 if (k->copy_from && do_copyfrom)
571 /* Copy from device to host memory. */
572 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
573 (void *) (k->tgt->tgt_start + k->tgt_offset),
574 k->host_end - k->host_start);
575 splay_tree_remove (&mm->splay_tree, k);
576 if (k->tgt->refcount > 1)
577 k->tgt->refcount--;
578 else
579 gomp_unmap_tgt (k->tgt);
582 if (tgt->refcount > 1)
583 tgt->refcount--;
584 else
585 gomp_unmap_tgt (tgt);
587 gomp_mutex_unlock (&mm->lock);
590 static void
591 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
592 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
593 bool is_openacc)
595 size_t i;
596 struct splay_tree_key_s cur_node;
597 const int typemask = is_openacc ? 0xff : 0x7;
599 if (!devicep)
600 return;
602 if (mapnum == 0)
603 return;
605 gomp_mutex_lock (&mm->lock);
606 for (i = 0; i < mapnum; i++)
607 if (sizes[i])
609 cur_node.host_start = (uintptr_t) hostaddrs[i];
610 cur_node.host_end = cur_node.host_start + sizes[i];
611 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
612 &cur_node);
613 if (n)
615 int kind = get_kind (is_openacc, kinds, i);
616 if (n->host_start > cur_node.host_start
617 || n->host_end < cur_node.host_end)
618 gomp_fatal ("Trying to update [%p..%p) object when"
619 "only [%p..%p) is mapped",
620 (void *) cur_node.host_start,
621 (void *) cur_node.host_end,
622 (void *) n->host_start,
623 (void *) n->host_end);
624 if (GOMP_MAP_COPY_TO_P (kind & typemask))
625 /* Copy from host to device memory. */
626 devicep->host2dev_func (devicep->target_id,
627 (void *) (n->tgt->tgt_start
628 + n->tgt_offset
629 + cur_node.host_start
630 - n->host_start),
631 (void *) cur_node.host_start,
632 cur_node.host_end - cur_node.host_start);
633 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
634 /* Copy from device to host memory. */
635 devicep->dev2host_func (devicep->target_id,
636 (void *) cur_node.host_start,
637 (void *) (n->tgt->tgt_start
638 + n->tgt_offset
639 + cur_node.host_start
640 - n->host_start),
641 cur_node.host_end - cur_node.host_start);
643 else
644 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
645 (void *) cur_node.host_start,
646 (void *) cur_node.host_end);
648 gomp_mutex_unlock (&mm->lock);
651 static void gomp_register_image_for_device (struct gomp_device_descr *device,
652 struct offload_image_descr *image);
654 /* This function should be called from every offload image.
655 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
656 the target, and TARGET_DATA needed by target plugin. */
658 void
659 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
660 void *target_data)
662 offload_images = gomp_realloc (offload_images,
663 (num_offload_images + 1)
664 * sizeof (struct offload_image_descr));
666 if (offload_images == NULL)
667 return;
669 offload_images[num_offload_images].type = target_type;
670 offload_images[num_offload_images].host_table = host_table;
671 offload_images[num_offload_images].target_data = target_data;
673 num_offload_images++;
676 /* This function initializes the target device, specified by DEVICEP. DEVICEP
677 must be locked on entry, and remains locked on return. */
679 attribute_hidden void
680 gomp_init_device (struct gomp_device_descr *devicep)
682 devicep->init_device_func (devicep->target_id);
683 devicep->is_initialized = true;
686 /* Initialize address mapping tables. MM must be locked on entry, and remains
687 locked on return. */
689 attribute_hidden void
690 gomp_init_tables (struct gomp_device_descr *devicep,
691 struct gomp_memory_mapping *mm)
693 /* Get address mapping table for device. */
694 struct mapping_table *table = NULL;
695 int i, num_entries = devicep->get_table_func (devicep->target_id, &table);
697 /* Insert host-target address mapping into dev_splay_tree. */
698 for (i = 0; i < num_entries; i++)
700 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
701 tgt->refcount = 1;
702 tgt->array = gomp_malloc (sizeof (*tgt->array));
703 tgt->tgt_start = table[i].tgt_start;
704 tgt->tgt_end = table[i].tgt_end;
705 tgt->to_free = NULL;
706 tgt->list_count = 0;
707 tgt->device_descr = devicep;
708 splay_tree_node node = tgt->array;
709 splay_tree_key k = &node->key;
710 k->host_start = table[i].host_start;
711 k->host_end = table[i].host_end;
712 k->tgt_offset = 0;
713 k->refcount = 1;
714 k->copy_from = false;
715 k->tgt = tgt;
716 node->left = NULL;
717 node->right = NULL;
718 splay_tree_insert (&mm->splay_tree, node);
721 free (table);
722 mm->is_initialized = true;
725 /* Free address mapping tables. MM must be locked on entry, and remains locked
726 on return. */
728 attribute_hidden void
729 gomp_free_memmap (struct gomp_memory_mapping *mm)
731 while (mm->splay_tree.root)
733 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
735 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
736 free (tgt->array);
737 free (tgt);
740 mm->is_initialized = false;
743 /* This function de-initializes the target device, specified by DEVICEP.
744 DEVICEP must be locked on entry, and remains locked on return. */
746 attribute_hidden void
747 gomp_fini_device (struct gomp_device_descr *devicep)
749 if (devicep->is_initialized)
750 devicep->fini_device_func (devicep->target_id);
752 devicep->is_initialized = false;
755 /* Called when encountering a target directive. If DEVICE
756 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
757 GOMP_DEVICE_HOST_FALLBACK (or any value
758 larger than last available hw device), use host fallback.
759 FN is address of host code, OFFLOAD_TABLE contains value of the
760 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
761 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
762 with MAPNUM entries, with addresses of the host objects,
763 sizes of the host objects (resp. for pointer kind pointer bias
764 and assumed sizeof (void *) size) and kinds. */
766 void
767 GOMP_target (int device, void (*fn) (void *), const void *offload_table,
768 size_t mapnum, void **hostaddrs, size_t *sizes,
769 unsigned char *kinds)
771 struct gomp_device_descr *devicep = resolve_device (device);
773 if (devicep == NULL
774 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
776 /* Host fallback. */
777 struct gomp_thread old_thr, *thr = gomp_thread ();
778 old_thr = *thr;
779 memset (thr, '\0', sizeof (*thr));
780 if (gomp_places_list)
782 thr->place = old_thr.place;
783 thr->ts.place_partition_len = gomp_places_list_len;
785 fn (hostaddrs);
786 gomp_free_thread (thr);
787 *thr = old_thr;
788 return;
791 gomp_mutex_lock (&devicep->lock);
792 if (!devicep->is_initialized)
793 gomp_init_device (devicep);
794 gomp_mutex_unlock (&devicep->lock);
796 void *fn_addr;
798 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
799 fn_addr = (void *) fn;
800 else
802 struct gomp_memory_mapping *mm = &devicep->mem_map;
803 gomp_mutex_lock (&mm->lock);
805 if (!mm->is_initialized)
806 gomp_init_tables (devicep, mm);
808 struct splay_tree_key_s k;
809 k.host_start = (uintptr_t) fn;
810 k.host_end = k.host_start + 1;
811 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
812 if (tgt_fn == NULL)
813 gomp_fatal ("Target function wasn't mapped");
815 gomp_mutex_unlock (&mm->lock);
817 fn_addr = (void *) tgt_fn->tgt->tgt_start;
820 struct target_mem_desc *tgt_vars
821 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
822 true);
823 struct gomp_thread old_thr, *thr = gomp_thread ();
824 old_thr = *thr;
825 memset (thr, '\0', sizeof (*thr));
826 if (gomp_places_list)
828 thr->place = old_thr.place;
829 thr->ts.place_partition_len = gomp_places_list_len;
831 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
832 gomp_free_thread (thr);
833 *thr = old_thr;
834 gomp_unmap_vars (tgt_vars, true);
837 void
838 GOMP_target_data (int device, const void *offload_table, size_t mapnum,
839 void **hostaddrs, size_t *sizes, unsigned char *kinds)
841 struct gomp_device_descr *devicep = resolve_device (device);
843 if (devicep == NULL
844 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
846 /* Host fallback. */
847 struct gomp_task_icv *icv = gomp_icv (false);
848 if (icv->target_data)
850 /* Even when doing a host fallback, if there are any active
851 #pragma omp target data constructs, need to remember the
852 new #pragma omp target data, otherwise GOMP_target_end_data
853 would get out of sync. */
854 struct target_mem_desc *tgt
855 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
856 tgt->prev = icv->target_data;
857 icv->target_data = tgt;
859 return;
862 gomp_mutex_lock (&devicep->lock);
863 if (!devicep->is_initialized)
864 gomp_init_device (devicep);
865 gomp_mutex_unlock (&devicep->lock);
867 struct gomp_memory_mapping *mm = &devicep->mem_map;
868 gomp_mutex_lock (&mm->lock);
869 if (!mm->is_initialized)
870 gomp_init_tables (devicep, mm);
871 gomp_mutex_unlock (&mm->lock);
873 struct target_mem_desc *tgt
874 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
875 false);
876 struct gomp_task_icv *icv = gomp_icv (true);
877 tgt->prev = icv->target_data;
878 icv->target_data = tgt;
881 void
882 GOMP_target_end_data (void)
884 struct gomp_task_icv *icv = gomp_icv (false);
885 if (icv->target_data)
887 struct target_mem_desc *tgt = icv->target_data;
888 icv->target_data = tgt->prev;
889 gomp_unmap_vars (tgt, true);
893 void
894 GOMP_target_update (int device, const void *offload_table, size_t mapnum,
895 void **hostaddrs, size_t *sizes, unsigned char *kinds)
897 struct gomp_device_descr *devicep = resolve_device (device);
899 if (devicep == NULL
900 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
901 return;
903 gomp_mutex_lock (&devicep->lock);
904 if (!devicep->is_initialized)
905 gomp_init_device (devicep);
906 gomp_mutex_unlock (&devicep->lock);
908 struct gomp_memory_mapping *mm = &devicep->mem_map;
909 gomp_mutex_lock (&mm->lock);
910 if (!mm->is_initialized)
911 gomp_init_tables (devicep, mm);
912 gomp_mutex_unlock (&mm->lock);
914 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
917 void
918 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
920 if (thread_limit)
922 struct gomp_task_icv *icv = gomp_icv (true);
923 icv->thread_limit_var
924 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
926 (void) num_teams;
929 #ifdef PLUGIN_SUPPORT
931 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
932 in PLUGIN_NAME.
933 The handles of the found functions are stored in the corresponding fields
934 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
936 static bool
937 gomp_load_plugin_for_device (struct gomp_device_descr *device,
938 const char *plugin_name)
940 char *err = NULL, *last_missing = NULL;
941 int optional_present, optional_total;
943 /* Clear any existing error. */
944 dlerror ();
946 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
947 if (!plugin_handle)
949 err = dlerror ();
950 goto out;
953 /* Check if all required functions are available in the plugin and store
954 their handlers. */
955 #define DLSYM(f) \
956 do \
958 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
959 err = dlerror (); \
960 if (err != NULL) \
961 goto out; \
963 while (0)
964 /* Similar, but missing functions are not an error. */
965 #define DLSYM_OPT(f, n) \
966 do \
968 char *tmp_err; \
969 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
970 tmp_err = dlerror (); \
971 if (tmp_err == NULL) \
972 optional_present++; \
973 else \
974 last_missing = #n; \
975 optional_total++; \
977 while (0)
979 DLSYM (get_name);
980 DLSYM (get_caps);
981 DLSYM (get_type);
982 DLSYM (get_num_devices);
983 DLSYM (register_image);
984 DLSYM (init_device);
985 DLSYM (fini_device);
986 DLSYM (get_table);
987 DLSYM (alloc);
988 DLSYM (free);
989 DLSYM (dev2host);
990 DLSYM (host2dev);
991 device->capabilities = device->get_caps_func ();
992 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
993 DLSYM (run);
994 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
996 optional_present = optional_total = 0;
997 DLSYM_OPT (openacc.exec, openacc_parallel);
998 DLSYM_OPT (openacc.open_device, openacc_open_device);
999 DLSYM_OPT (openacc.close_device, openacc_close_device);
1000 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
1001 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
1002 DLSYM_OPT (openacc.register_async_cleanup,
1003 openacc_register_async_cleanup);
1004 DLSYM_OPT (openacc.async_test, openacc_async_test);
1005 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1006 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1007 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1008 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1009 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1010 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1011 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1012 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1013 /* Require all the OpenACC handlers if we have
1014 GOMP_OFFLOAD_CAP_OPENACC_200. */
1015 if (optional_present != optional_total)
1017 err = "plugin missing OpenACC handler function";
1018 goto out;
1020 optional_present = optional_total = 0;
1021 DLSYM_OPT (openacc.cuda.get_current_device,
1022 openacc_get_current_cuda_device);
1023 DLSYM_OPT (openacc.cuda.get_current_context,
1024 openacc_get_current_cuda_context);
1025 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1026 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1027 /* Make sure all the CUDA functions are there if any of them are. */
1028 if (optional_present && optional_present != optional_total)
1030 err = "plugin missing OpenACC CUDA handler function";
1031 goto out;
1034 #undef DLSYM
1035 #undef DLSYM_OPT
1037 out:
1038 if (err != NULL)
1040 gomp_error ("while loading %s: %s", plugin_name, err);
1041 if (last_missing)
1042 gomp_error ("missing function was %s", last_missing);
1043 if (plugin_handle)
1044 dlclose (plugin_handle);
1046 return err == NULL;
1049 /* This function adds a compatible offload image IMAGE to an accelerator device
1050 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1052 static void
1053 gomp_register_image_for_device (struct gomp_device_descr *device,
1054 struct offload_image_descr *image)
1056 if (!device->offload_regions_registered
1057 && (device->type == image->type
1058 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1060 device->register_image_func (image->host_table, image->target_data);
1061 device->offload_regions_registered = true;
1065 /* This function initializes the runtime needed for offloading.
1066 It parses the list of offload targets and tries to load the plugins for
1067 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1068 will be set, and the array DEVICES initialized, containing descriptors for
1069 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1070 by the others. */
1072 static void
1073 gomp_target_init (void)
1075 const char *prefix ="libgomp-plugin-";
1076 const char *suffix = ".so.1";
1077 const char *cur, *next;
1078 char *plugin_name;
1079 int i, new_num_devices;
1081 num_devices = 0;
1082 devices = NULL;
1084 cur = OFFLOAD_TARGETS;
1085 if (*cur)
1088 struct gomp_device_descr current_device;
1090 next = strchr (cur, ',');
1092 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1093 + strlen (prefix) + strlen (suffix));
1094 if (!plugin_name)
1096 num_devices = 0;
1097 break;
1100 strcpy (plugin_name, prefix);
1101 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1102 strcat (plugin_name, suffix);
1104 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1106 new_num_devices = current_device.get_num_devices_func ();
1107 if (new_num_devices >= 1)
1109 /* Augment DEVICES and NUM_DEVICES. */
1111 devices = realloc (devices, (num_devices + new_num_devices)
1112 * sizeof (struct gomp_device_descr));
1113 if (!devices)
1115 num_devices = 0;
1116 free (plugin_name);
1117 break;
1120 current_device.type = current_device.get_type_func ();
1121 current_device.name = current_device.get_name_func ();
1122 current_device.is_initialized = false;
1123 current_device.offload_regions_registered = false;
1124 current_device.mem_map.splay_tree.root = NULL;
1125 current_device.mem_map.is_initialized = false;
1126 current_device.openacc.data_environ = NULL;
1127 current_device.openacc.target_data = NULL;
1128 for (i = 0; i < new_num_devices; i++)
1130 current_device.target_id = i;
1131 devices[num_devices] = current_device;
1132 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1133 gomp_mutex_init (&devices[num_devices].lock);
1134 num_devices++;
1139 free (plugin_name);
1140 cur = next + 1;
1142 while (next);
1144 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1145 NUM_DEVICES_OPENMP. */
1146 struct gomp_device_descr *devices_s
1147 = malloc (num_devices * sizeof (struct gomp_device_descr));
1148 if (!devices_s)
1150 num_devices = 0;
1151 free (devices);
1152 devices = NULL;
1154 num_devices_openmp = 0;
1155 for (i = 0; i < num_devices; i++)
1156 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1157 devices_s[num_devices_openmp++] = devices[i];
1158 int num_devices_after_openmp = num_devices_openmp;
1159 for (i = 0; i < num_devices; i++)
1160 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1161 devices_s[num_devices_after_openmp++] = devices[i];
1162 free (devices);
1163 devices = devices_s;
1165 for (i = 0; i < num_devices; i++)
1167 int j;
1169 for (j = 0; j < num_offload_images; j++)
1170 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1172 /* The 'devices' array can be moved (by the realloc call) until we have
1173 found all the plugins, so registering with the OpenACC runtime (which
1174 takes a copy of the pointer argument) must be delayed until now. */
1175 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1176 goacc_register (&devices[i]);
1179 free (offload_images);
1180 offload_images = NULL;
1181 num_offload_images = 0;
1184 #else /* PLUGIN_SUPPORT */
1185 /* If dlfcn.h is unavailable we always fallback to host execution.
1186 GOMP_target* routines are just stubs for this case. */
1187 static void
1188 gomp_target_init (void)
1191 #endif /* PLUGIN_SUPPORT */