1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2018 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
42 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
44 struct splay_tree_key_s node
;
47 node
.host_start
= (uintptr_t) h
;
48 node
.host_end
= (uintptr_t) h
+ s
;
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
72 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
79 for (i
= 0; i
< t
->list_count
; i
++)
83 splay_tree_key k
= &t
->array
[i
].key
;
84 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
86 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
102 goacc_lazy_initialize ();
104 struct goacc_thread
*thr
= goacc_thread ();
108 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
111 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115 the device address is mapped. We choose to check if it mapped,
116 and if it is, to unmap it. */
125 struct goacc_thread
*thr
= goacc_thread ();
127 assert (thr
&& thr
->dev
);
129 struct gomp_device_descr
*acc_dev
= thr
->dev
;
131 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
134 gomp_mutex_lock (&acc_dev
->lock
);
136 /* We don't have to call lazy open here, as the ptr value must have
137 been returned by acc_malloc. It's not permitted to pass NULL in
138 (unless you got that null from acc_malloc). */
139 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
143 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
145 gomp_mutex_unlock (&acc_dev
->lock
);
147 acc_unmap_data ((void *)(k
->host_start
+ offset
));
150 gomp_mutex_unlock (&acc_dev
->lock
);
152 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
153 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__
);
157 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
159 /* No need to call lazy open here, as the device pointer must have
160 been obtained from a routine that did that. */
161 struct goacc_thread
*thr
= goacc_thread ();
163 assert (thr
&& thr
->dev
);
165 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
171 if (!thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
))
172 gomp_fatal ("error in %s", __FUNCTION__
);
176 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
178 /* No need to call lazy open here, as the device pointer must have
179 been obtained from a routine that did that. */
180 struct goacc_thread
*thr
= goacc_thread ();
182 assert (thr
&& thr
->dev
);
184 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
190 if (!thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
))
191 gomp_fatal ("error in %s", __FUNCTION__
);
194 /* Return the device pointer that corresponds to host data H. Or NULL
198 acc_deviceptr (void *h
)
204 goacc_lazy_initialize ();
206 struct goacc_thread
*thr
= goacc_thread ();
207 struct gomp_device_descr
*dev
= thr
->dev
;
209 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
212 gomp_mutex_lock (&dev
->lock
);
214 n
= lookup_host (dev
, h
, 1);
218 gomp_mutex_unlock (&dev
->lock
);
222 offset
= h
- n
->host_start
;
224 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
226 gomp_mutex_unlock (&dev
->lock
);
231 /* Return the host pointer that corresponds to device data D. Or NULL
235 acc_hostptr (void *d
)
241 goacc_lazy_initialize ();
243 struct goacc_thread
*thr
= goacc_thread ();
244 struct gomp_device_descr
*acc_dev
= thr
->dev
;
246 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
249 gomp_mutex_lock (&acc_dev
->lock
);
251 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
255 gomp_mutex_unlock (&acc_dev
->lock
);
259 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
261 h
= n
->host_start
+ offset
;
263 gomp_mutex_unlock (&acc_dev
->lock
);
268 /* Return 1 if host data [H,+S] is present on the device. */
271 acc_is_present (void *h
, size_t s
)
278 goacc_lazy_initialize ();
280 struct goacc_thread
*thr
= goacc_thread ();
281 struct gomp_device_descr
*acc_dev
= thr
->dev
;
283 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
286 gomp_mutex_lock (&acc_dev
->lock
);
288 n
= lookup_host (acc_dev
, h
, s
);
290 if (n
&& ((uintptr_t)h
< n
->host_start
291 || (uintptr_t)h
+ s
> n
->host_end
292 || s
> n
->host_end
- n
->host_start
))
295 gomp_mutex_unlock (&acc_dev
->lock
);
300 /* Create a mapping for host [H,+S] -> device [D,+S] */
303 acc_map_data (void *h
, void *d
, size_t s
)
305 struct target_mem_desc
*tgt
= NULL
;
310 unsigned short kinds
= GOMP_MAP_ALLOC
;
312 goacc_lazy_initialize ();
314 struct goacc_thread
*thr
= goacc_thread ();
315 struct gomp_device_descr
*acc_dev
= thr
->dev
;
317 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
320 gomp_fatal ("cannot map data on shared-memory system");
324 struct goacc_thread
*thr
= goacc_thread ();
327 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
328 (void *)h
, (int)s
, (void *)d
, (int)s
);
330 gomp_mutex_lock (&acc_dev
->lock
);
332 if (lookup_host (acc_dev
, h
, s
))
334 gomp_mutex_unlock (&acc_dev
->lock
);
335 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
339 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
341 gomp_mutex_unlock (&acc_dev
->lock
);
342 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
346 gomp_mutex_unlock (&acc_dev
->lock
);
348 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
349 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
350 tgt
->list
[0].key
->refcount
= REFCOUNT_INFINITY
;
353 gomp_mutex_lock (&acc_dev
->lock
);
354 tgt
->prev
= acc_dev
->openacc
.data_environ
;
355 acc_dev
->openacc
.data_environ
= tgt
;
356 gomp_mutex_unlock (&acc_dev
->lock
);
360 acc_unmap_data (void *h
)
362 struct goacc_thread
*thr
= goacc_thread ();
363 struct gomp_device_descr
*acc_dev
= thr
->dev
;
365 /* No need to call lazy open, as the address must have been mapped. */
367 /* This is a no-op on shared-memory targets. */
368 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
373 gomp_mutex_lock (&acc_dev
->lock
);
375 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
376 struct target_mem_desc
*t
;
380 gomp_mutex_unlock (&acc_dev
->lock
);
381 gomp_fatal ("%p is not a mapped block", (void *)h
);
384 host_size
= n
->host_end
- n
->host_start
;
386 if (n
->host_start
!= (uintptr_t) h
)
388 gomp_mutex_unlock (&acc_dev
->lock
);
389 gomp_fatal ("[%p,%d] surrounds %p",
390 (void *) n
->host_start
, (int) host_size
, (void *) h
);
393 /* Mark for removal. */
398 if (t
->refcount
== 2)
400 struct target_mem_desc
*tp
;
402 /* This is the last reference, so pull the descriptor off the
403 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
404 freeing the device memory. */
408 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
415 acc_dev
->openacc
.data_environ
= t
->prev
;
421 gomp_mutex_unlock (&acc_dev
->lock
);
423 gomp_unmap_vars (t
, true);
426 #define FLAG_PRESENT (1 << 0)
427 #define FLAG_CREATE (1 << 1)
428 #define FLAG_COPY (1 << 2)
431 present_create_copy (unsigned f
, void *h
, size_t s
)
437 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
439 goacc_lazy_initialize ();
441 struct goacc_thread
*thr
= goacc_thread ();
442 struct gomp_device_descr
*acc_dev
= thr
->dev
;
444 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
447 gomp_mutex_lock (&acc_dev
->lock
);
449 n
= lookup_host (acc_dev
, h
, s
);
453 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
455 if (!(f
& FLAG_PRESENT
))
457 gomp_mutex_unlock (&acc_dev
->lock
);
458 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
459 (void *)h
, (int)s
, (void *)d
, (int)s
);
461 if ((h
+ s
) > (void *)n
->host_end
)
463 gomp_mutex_unlock (&acc_dev
->lock
);
464 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
467 if (n
->refcount
!= REFCOUNT_INFINITY
)
470 n
->dynamic_refcount
++;
472 gomp_mutex_unlock (&acc_dev
->lock
);
474 else if (!(f
& FLAG_CREATE
))
476 gomp_mutex_unlock (&acc_dev
->lock
);
477 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
481 struct target_mem_desc
*tgt
;
483 unsigned short kinds
;
489 kinds
= GOMP_MAP_ALLOC
;
491 gomp_mutex_unlock (&acc_dev
->lock
);
493 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
494 GOMP_MAP_VARS_OPENACC
);
495 /* Initialize dynamic refcount. */
496 tgt
->list
[0].key
->dynamic_refcount
= 1;
498 gomp_mutex_lock (&acc_dev
->lock
);
501 tgt
->prev
= acc_dev
->openacc
.data_environ
;
502 acc_dev
->openacc
.data_environ
= tgt
;
504 gomp_mutex_unlock (&acc_dev
->lock
);
511 acc_create (void *h
, size_t s
)
513 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
517 acc_copyin (void *h
, size_t s
)
519 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
523 acc_present_or_create (void *h
, size_t s
)
525 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
528 /* acc_pcreate is acc_present_or_create by a different name. */
529 #ifdef HAVE_ATTRIBUTE_ALIAS
530 strong_alias (acc_present_or_create
, acc_pcreate
)
533 acc_pcreate (void *h
, size_t s
)
535 return acc_present_or_create (h
, s
);
540 acc_present_or_copyin (void *h
, size_t s
)
542 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
545 /* acc_pcopyin is acc_present_or_copyin by a different name. */
546 #ifdef HAVE_ATTRIBUTE_ALIAS
547 strong_alias (acc_present_or_copyin
, acc_pcopyin
)
550 acc_pcopyin (void *h
, size_t s
)
552 return acc_present_or_copyin (h
, s
);
556 #define FLAG_COPYOUT (1 << 0)
557 #define FLAG_FINALIZE (1 << 1)
560 delete_copyout (unsigned f
, void *h
, size_t s
, const char *libfnname
)
565 struct goacc_thread
*thr
= goacc_thread ();
566 struct gomp_device_descr
*acc_dev
= thr
->dev
;
568 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
571 gomp_mutex_lock (&acc_dev
->lock
);
573 n
= lookup_host (acc_dev
, h
, s
);
575 /* No need to call lazy open, as the data must already have been
580 gomp_mutex_unlock (&acc_dev
->lock
);
581 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
584 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
585 + (uintptr_t) h
- n
->host_start
);
587 host_size
= n
->host_end
- n
->host_start
;
589 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
591 gomp_mutex_unlock (&acc_dev
->lock
);
592 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
593 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
596 if (n
->refcount
== REFCOUNT_INFINITY
)
599 n
->dynamic_refcount
= 0;
601 if (n
->refcount
< n
->dynamic_refcount
)
603 gomp_mutex_unlock (&acc_dev
->lock
);
604 gomp_fatal ("Dynamic reference counting assert fail\n");
607 if (f
& FLAG_FINALIZE
)
609 n
->refcount
-= n
->dynamic_refcount
;
610 n
->dynamic_refcount
= 0;
612 else if (n
->dynamic_refcount
)
614 n
->dynamic_refcount
--;
618 if (n
->refcount
== 0)
620 if (n
->tgt
->refcount
== 2)
622 struct target_mem_desc
*tp
, *t
;
623 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
630 acc_dev
->openacc
.data_environ
= t
->prev
;
635 if (f
& FLAG_COPYOUT
)
636 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
638 gomp_remove_var (acc_dev
, n
);
641 gomp_mutex_unlock (&acc_dev
->lock
);
645 acc_delete (void *h
, size_t s
)
647 delete_copyout (0, h
, s
, __FUNCTION__
);
651 acc_delete_finalize (void *h
, size_t s
)
653 delete_copyout (FLAG_FINALIZE
, h
, s
, __FUNCTION__
);
657 acc_delete_finalize_async (void *h
, size_t s
, int async
)
659 delete_copyout (FLAG_FINALIZE
, h
, s
, __FUNCTION__
);
663 acc_copyout (void *h
, size_t s
)
665 delete_copyout (FLAG_COPYOUT
, h
, s
, __FUNCTION__
);
669 acc_copyout_finalize (void *h
, size_t s
)
671 delete_copyout (FLAG_COPYOUT
| FLAG_FINALIZE
, h
, s
, __FUNCTION__
);
675 acc_copyout_finalize_async (void *h
, size_t s
, int async
)
677 delete_copyout (FLAG_COPYOUT
| FLAG_FINALIZE
, h
, s
, __FUNCTION__
);
681 update_dev_host (int is_dev
, void *h
, size_t s
)
686 goacc_lazy_initialize ();
688 struct goacc_thread
*thr
= goacc_thread ();
689 struct gomp_device_descr
*acc_dev
= thr
->dev
;
691 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
694 gomp_mutex_lock (&acc_dev
->lock
);
696 n
= lookup_host (acc_dev
, h
, s
);
700 gomp_mutex_unlock (&acc_dev
->lock
);
701 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
704 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
705 + (uintptr_t) h
- n
->host_start
);
708 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
710 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
712 gomp_mutex_unlock (&acc_dev
->lock
);
716 acc_update_device (void *h
, size_t s
)
718 update_dev_host (1, h
, s
);
722 acc_update_self (void *h
, size_t s
)
724 update_dev_host (0, h
, s
);
728 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
731 struct target_mem_desc
*tgt
;
732 struct goacc_thread
*thr
= goacc_thread ();
733 struct gomp_device_descr
*acc_dev
= thr
->dev
;
735 if (acc_is_present (*hostaddrs
, *sizes
))
738 gomp_mutex_lock (&acc_dev
->lock
);
739 n
= lookup_host (acc_dev
, *hostaddrs
, *sizes
);
740 gomp_mutex_unlock (&acc_dev
->lock
);
743 for (size_t i
= 0; i
< tgt
->list_count
; i
++)
744 if (tgt
->list
[i
].key
== n
)
746 for (size_t j
= 0; j
< mapnum
; j
++)
747 if (i
+ j
< tgt
->list_count
&& tgt
->list
[i
+ j
].key
)
749 tgt
->list
[i
+ j
].key
->refcount
++;
750 tgt
->list
[i
+ j
].key
->dynamic_refcount
++;
754 /* Should not reach here. */
755 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
758 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
759 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
760 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
761 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
763 /* Initialize dynamic refcount. */
764 tgt
->list
[0].key
->dynamic_refcount
= 1;
766 gomp_mutex_lock (&acc_dev
->lock
);
767 tgt
->prev
= acc_dev
->openacc
.data_environ
;
768 acc_dev
->openacc
.data_environ
= tgt
;
769 gomp_mutex_unlock (&acc_dev
->lock
);
773 gomp_acc_remove_pointer (void *h
, size_t s
, bool force_copyfrom
, int async
,
774 int finalize
, int mapnum
)
776 struct goacc_thread
*thr
= goacc_thread ();
777 struct gomp_device_descr
*acc_dev
= thr
->dev
;
779 struct target_mem_desc
*t
;
780 int minrefs
= (mapnum
== 1) ? 2 : 3;
782 if (!acc_is_present (h
, s
))
785 gomp_mutex_lock (&acc_dev
->lock
);
787 n
= lookup_host (acc_dev
, h
, 1);
791 gomp_mutex_unlock (&acc_dev
->lock
);
792 gomp_fatal ("%p is not a mapped block", (void *)h
);
795 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
799 if (n
->refcount
< n
->dynamic_refcount
)
801 gomp_mutex_unlock (&acc_dev
->lock
);
802 gomp_fatal ("Dynamic reference counting assert fail\n");
807 n
->refcount
-= n
->dynamic_refcount
;
808 n
->dynamic_refcount
= 0;
810 else if (n
->dynamic_refcount
)
812 n
->dynamic_refcount
--;
816 gomp_mutex_unlock (&acc_dev
->lock
);
818 if (n
->refcount
== 0)
820 if (t
->refcount
== minrefs
)
822 /* This is the last reference, so pull the descriptor off the
823 chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
824 freeing the device memory. */
825 struct target_mem_desc
*tp
;
826 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
834 acc_dev
->openacc
.data_environ
= t
->prev
;
840 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
842 t
->refcount
= minrefs
;
843 for (size_t i
= 0; i
< t
->list_count
; i
++)
844 if (t
->list
[i
].key
== n
)
846 t
->list
[i
].copy_from
= force_copyfrom
? 1 : 0;
850 /* If running synchronously, unmap immediately. */
851 if (async
< acc_async_noval
)
852 gomp_unmap_vars (t
, true);
854 t
->device_descr
->openacc
.register_async_cleanup_func (t
, async
);
857 gomp_mutex_unlock (&acc_dev
->lock
);
859 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);