1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2024 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
31 #include "gomp-constants.h"
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
40 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
42 struct splay_tree_key_s node
;
45 node
.host_start
= (uintptr_t) h
;
46 node
.host_end
= (uintptr_t) h
+ s
;
48 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
53 /* Helper for lookup_dev. Iterate over splay tree. */
56 lookup_dev_1 (splay_tree_node node
, uintptr_t d
, size_t s
)
58 splay_tree_key key
= &node
->key
;
59 if (d
>= key
->tgt
->tgt_start
&& d
+ s
<= key
->tgt
->tgt_end
)
64 key
= lookup_dev_1 (node
->left
, d
, s
);
65 if (!key
&& node
->right
)
66 key
= lookup_dev_1 (node
->right
, d
, s
);
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
76 The device lock associated with MEM_MAP must be locked on entry, and remains
80 lookup_dev (splay_tree mem_map
, void *d
, size_t s
)
82 if (!mem_map
|| !mem_map
->root
)
85 return lookup_dev_1 (mem_map
->root
, (uintptr_t) d
, s
);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
98 goacc_lazy_initialize ();
100 struct goacc_thread
*thr
= goacc_thread ();
104 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
107 acc_prof_info prof_info
;
108 acc_api_info api_info
;
109 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
111 void *res
= thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
115 thr
->prof_info
= NULL
;
116 thr
->api_info
= NULL
;
130 struct goacc_thread
*thr
= goacc_thread ();
132 assert (thr
&& thr
->dev
);
134 struct gomp_device_descr
*acc_dev
= thr
->dev
;
136 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
139 acc_prof_info prof_info
;
140 acc_api_info api_info
;
141 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
143 gomp_mutex_lock (&acc_dev
->lock
);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k
= lookup_dev (&acc_dev
->mem_map
, d
, 1)))
150 void *offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
151 void *h
= k
->host_start
+ offset
;
152 size_t h_size
= k
->host_end
- k
->host_start
;
153 gomp_mutex_unlock (&acc_dev
->lock
);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
161 gomp_mutex_unlock (&acc_dev
->lock
);
163 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__
);
168 thr
->prof_info
= NULL
;
169 thr
->api_info
= NULL
;
174 memcpy_tofrom_device (bool from
, void *d
, void *h
, size_t s
, int async
,
175 const char *libfnname
)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread
*thr
= goacc_thread ();
181 assert (thr
&& thr
->dev
);
183 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
192 acc_prof_info prof_info
;
193 acc_api_info api_info
;
194 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
197 prof_info
.async
= async
;
198 prof_info
.async_queue
= prof_info
.async
;
201 goacc_aq aq
= get_goacc_asyncqueue (async
);
203 gomp_copy_dev2host (thr
->dev
, aq
, h
, d
, s
);
205 gomp_copy_host2dev (thr
->dev
, aq
, d
, h
, s
, false, /* TODO: cbuf? */ NULL
);
209 thr
->prof_info
= NULL
;
210 thr
->api_info
= NULL
;
215 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
217 memcpy_tofrom_device (false, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
221 acc_memcpy_to_device_async (void *d
, void *h
, size_t s
, int async
)
223 memcpy_tofrom_device (false, d
, h
, s
, async
, __FUNCTION__
);
227 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
229 memcpy_tofrom_device (true, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
233 acc_memcpy_from_device_async (void *h
, void *d
, size_t s
, int async
)
235 memcpy_tofrom_device (true, d
, h
, s
, async
, __FUNCTION__
);
238 /* Return the device pointer that corresponds to host data H. Or NULL
242 acc_deviceptr (void *h
)
248 goacc_lazy_initialize ();
250 struct goacc_thread
*thr
= goacc_thread ();
251 struct gomp_device_descr
*dev
= thr
->dev
;
253 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
256 /* In the following, no OpenACC Profiling Interface events can possibly be
259 gomp_mutex_lock (&dev
->lock
);
261 n
= lookup_host (dev
, h
, 1);
265 gomp_mutex_unlock (&dev
->lock
);
269 offset
= h
- n
->host_start
;
271 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
273 gomp_mutex_unlock (&dev
->lock
);
278 /* Return the host pointer that corresponds to device data D. Or NULL
282 acc_hostptr (void *d
)
288 goacc_lazy_initialize ();
290 struct goacc_thread
*thr
= goacc_thread ();
291 struct gomp_device_descr
*acc_dev
= thr
->dev
;
293 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
296 /* In the following, no OpenACC Profiling Interface events can possibly be
299 gomp_mutex_lock (&acc_dev
->lock
);
301 n
= lookup_dev (&acc_dev
->mem_map
, d
, 1);
305 gomp_mutex_unlock (&acc_dev
->lock
);
309 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
311 h
= n
->host_start
+ offset
;
313 gomp_mutex_unlock (&acc_dev
->lock
);
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h
, size_t s
)
328 goacc_lazy_initialize ();
330 struct goacc_thread
*thr
= goacc_thread ();
331 struct gomp_device_descr
*acc_dev
= thr
->dev
;
333 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
336 /* In the following, no OpenACC Profiling Interface events can possibly be
339 gomp_mutex_lock (&acc_dev
->lock
);
341 n
= lookup_host (acc_dev
, h
, s
);
343 if (n
&& ((uintptr_t)h
< n
->host_start
344 || (uintptr_t)h
+ s
> n
->host_end
345 || s
> n
->host_end
- n
->host_start
))
348 gomp_mutex_unlock (&acc_dev
->lock
);
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
356 acc_map_data (void *h
, void *d
, size_t s
)
362 unsigned short kinds
= GOMP_MAP_ALLOC
;
364 goacc_lazy_initialize ();
366 struct goacc_thread
*thr
= goacc_thread ();
367 struct gomp_device_descr
*acc_dev
= thr
->dev
;
369 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
372 gomp_fatal ("cannot map data on shared-memory system");
376 struct goacc_thread
*thr
= goacc_thread ();
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h
, (int)s
, (void *)d
, (int)s
);
382 acc_prof_info prof_info
;
383 acc_api_info api_info
;
384 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
386 gomp_mutex_lock (&acc_dev
->lock
);
388 if (lookup_host (acc_dev
, h
, s
))
390 gomp_mutex_unlock (&acc_dev
->lock
);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
395 if (lookup_dev (&thr
->dev
->mem_map
, d
, s
))
397 gomp_mutex_unlock (&acc_dev
->lock
);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
402 gomp_mutex_unlock (&acc_dev
->lock
);
404 struct target_mem_desc
*tgt
405 = goacc_map_vars (acc_dev
, NULL
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
406 &kinds
, true, GOMP_MAP_VARS_ENTER_DATA
);
408 assert (tgt
->list_count
== 1);
409 splay_tree_key n
= tgt
->list
[0].key
;
411 assert (n
->refcount
== 1);
412 assert (n
->dynamic_refcount
== 0);
413 /* Special reference counting behavior. */
414 n
->refcount
= REFCOUNT_INFINITY
;
418 thr
->prof_info
= NULL
;
419 thr
->api_info
= NULL
;
425 acc_unmap_data (void *h
)
427 struct goacc_thread
*thr
= goacc_thread ();
428 struct gomp_device_descr
*acc_dev
= thr
->dev
;
430 /* No need to call lazy open, as the address must have been mapped. */
432 /* This is a no-op on shared-memory targets. */
433 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
436 acc_prof_info prof_info
;
437 acc_api_info api_info
;
438 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
440 gomp_mutex_lock (&acc_dev
->lock
);
442 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
446 gomp_mutex_unlock (&acc_dev
->lock
);
447 gomp_fatal ("%p is not a mapped block", (void *)h
);
450 size_t host_size
= n
->host_end
- n
->host_start
;
452 if (n
->host_start
!= (uintptr_t) h
)
454 gomp_mutex_unlock (&acc_dev
->lock
);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n
->host_start
, (int) host_size
, (void *) h
);
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 else if (n
->refcount
!= REFCOUNT_INFINITY
)
465 gomp_mutex_unlock (&acc_dev
->lock
);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h
, (int) host_size
);
471 struct target_mem_desc
*tgt
= n
->tgt
;
473 if (tgt
->refcount
== REFCOUNT_INFINITY
)
475 gomp_mutex_unlock (&acc_dev
->lock
);
476 gomp_fatal ("cannot unmap target block");
479 /* Above, we've verified that the mapping must have been set up by
481 assert (tgt
->refcount
== 1);
483 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484 from freeing the target memory. */
488 bool is_tgt_unmapped
= gomp_remove_var (acc_dev
, n
);
489 assert (is_tgt_unmapped
);
491 gomp_mutex_unlock (&acc_dev
->lock
);
495 thr
->prof_info
= NULL
;
496 thr
->api_info
= NULL
;
501 /* Helper function to map a single dynamic data item, represented by a single
502 mapping. The acc_dev->lock should be held on entry, and remains locked on
506 goacc_map_var_existing (struct gomp_device_descr
*acc_dev
, void *hostaddr
,
507 size_t size
, splay_tree_key n
)
512 void *d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ hostaddr
515 if (hostaddr
+ size
> (void *) n
->host_end
)
517 gomp_mutex_unlock (&acc_dev
->lock
);
518 gomp_fatal ("[%p,+%d] not mapped", hostaddr
, (int) size
);
521 assert (n
->refcount
!= REFCOUNT_LINK
);
522 if (n
->refcount
!= REFCOUNT_INFINITY
)
524 n
->dynamic_refcount
++;
529 /* Enter dynamic mapping for a single datum. Return the device pointer. */
532 goacc_enter_datum (void **hostaddrs
, size_t *sizes
, void *kinds
, int async
)
537 if (!hostaddrs
[0] || !sizes
[0])
538 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs
[0], (int) sizes
[0]);
540 goacc_lazy_initialize ();
542 struct goacc_thread
*thr
= goacc_thread ();
543 struct gomp_device_descr
*acc_dev
= thr
->dev
;
545 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
548 acc_prof_info prof_info
;
549 acc_api_info api_info
;
550 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
553 prof_info
.async
= async
;
554 prof_info
.async_queue
= prof_info
.async
;
557 gomp_mutex_lock (&acc_dev
->lock
);
559 n
= lookup_host (acc_dev
, hostaddrs
[0], sizes
[0]);
562 d
= goacc_map_var_existing (acc_dev
, hostaddrs
[0], sizes
[0], n
);
563 gomp_mutex_unlock (&acc_dev
->lock
);
567 const size_t mapnum
= 1;
569 gomp_mutex_unlock (&acc_dev
->lock
);
571 goacc_aq aq
= get_goacc_asyncqueue (async
);
573 struct target_mem_desc
*tgt
574 = goacc_map_vars (acc_dev
, aq
, mapnum
, hostaddrs
, NULL
, sizes
,
575 kinds
, true, GOMP_MAP_VARS_ENTER_DATA
);
577 assert (tgt
->list_count
== 1);
578 n
= tgt
->list
[0].key
;
580 assert (n
->refcount
== 1);
581 assert (n
->dynamic_refcount
== 0);
582 n
->dynamic_refcount
++;
584 d
= (void *) tgt
->tgt_start
;
589 thr
->prof_info
= NULL
;
590 thr
->api_info
= NULL
;
597 acc_create (void *h
, size_t s
)
599 unsigned short kinds
[1] = { GOMP_MAP_ALLOC
};
600 return goacc_enter_datum (&h
, &s
, &kinds
, acc_async_sync
);
604 acc_create_async (void *h
, size_t s
, int async
)
606 unsigned short kinds
[1] = { GOMP_MAP_ALLOC
};
607 goacc_enter_datum (&h
, &s
, &kinds
, async
);
610 /* acc_present_or_create used to be what acc_create is now. */
611 /* acc_pcreate is acc_present_or_create by a different name. */
612 #ifdef HAVE_ATTRIBUTE_ALIAS
613 strong_alias (acc_create
, acc_present_or_create
)
614 strong_alias (acc_create
, acc_pcreate
)
617 acc_present_or_create (void *h
, size_t s
)
619 return acc_create (h
, s
);
623 acc_pcreate (void *h
, size_t s
)
625 return acc_create (h
, s
);
630 acc_copyin (void *h
, size_t s
)
632 unsigned short kinds
[1] = { GOMP_MAP_TO
};
633 return goacc_enter_datum (&h
, &s
, &kinds
, acc_async_sync
);
637 acc_copyin_async (void *h
, size_t s
, int async
)
639 unsigned short kinds
[1] = { GOMP_MAP_TO
};
640 goacc_enter_datum (&h
, &s
, &kinds
, async
);
643 /* acc_present_or_copyin used to be what acc_copyin is now. */
644 /* acc_pcopyin is acc_present_or_copyin by a different name. */
645 #ifdef HAVE_ATTRIBUTE_ALIAS
646 strong_alias (acc_copyin
, acc_present_or_copyin
)
647 strong_alias (acc_copyin
, acc_pcopyin
)
650 acc_present_or_copyin (void *h
, size_t s
)
652 return acc_copyin (h
, s
);
656 acc_pcopyin (void *h
, size_t s
)
658 return acc_copyin (h
, s
);
663 /* Helper function to unmap a single data item. Device lock should be held on
664 entry, and remains locked on exit. */
667 goacc_exit_datum_1 (struct gomp_device_descr
*acc_dev
, void *h
, size_t s
,
668 unsigned short kind
, splay_tree_key n
, goacc_aq aq
)
670 assert (kind
!= GOMP_MAP_DETACH
671 && kind
!= GOMP_MAP_FORCE_DETACH
);
673 if ((uintptr_t) h
< n
->host_start
|| (uintptr_t) h
+ s
> n
->host_end
)
675 size_t host_size
= n
->host_end
- n
->host_start
;
676 gomp_mutex_unlock (&acc_dev
->lock
);
677 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
678 (void *) h
, (int) s
, (void *) n
->host_start
, (int) host_size
);
681 bool finalize
= (kind
== GOMP_MAP_FORCE_FROM
682 || kind
== GOMP_MAP_DELETE
);
684 assert (n
->refcount
!= REFCOUNT_LINK
);
685 if (n
->refcount
!= REFCOUNT_INFINITY
686 && n
->refcount
< n
->dynamic_refcount
)
688 gomp_mutex_unlock (&acc_dev
->lock
);
689 gomp_fatal ("Dynamic reference counting assert fail\n");
694 if (n
->refcount
!= REFCOUNT_INFINITY
)
695 n
->refcount
-= n
->dynamic_refcount
;
696 n
->dynamic_refcount
= 0;
698 else if (n
->dynamic_refcount
)
700 if (n
->refcount
!= REFCOUNT_INFINITY
)
702 n
->dynamic_refcount
--;
705 if (n
->refcount
== 0)
707 bool copyout
= (kind
== GOMP_MAP_FROM
708 || kind
== GOMP_MAP_FORCE_FROM
);
711 void *d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
712 + (uintptr_t) h
- n
->host_start
);
713 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
717 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
718 'gomp_unref_tgt' comment in
719 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
721 gomp_remove_var_async (acc_dev
, n
, aq
);
724 size_t num_mappings
= 0;
725 /* If the target_mem_desc represents a single data mapping, we can
726 check that it is freed when this splay tree key's refcount reaches
727 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
728 multiple members), fall back to skipping the test. */
729 for (size_t l_i
= 0; l_i
< n
->tgt
->list_count
; ++l_i
)
730 if (n
->tgt
->list
[l_i
].key
731 && !n
->tgt
->list
[l_i
].is_attach
)
733 bool is_tgt_unmapped
= gomp_remove_var (acc_dev
, n
);
734 assert (is_tgt_unmapped
|| num_mappings
> 1);
740 /* Exit a dynamic mapping for a single variable. */
743 goacc_exit_datum (void *h
, size_t s
, unsigned short kind
, int async
)
745 /* No need to call lazy open, as the data must already have been
750 struct goacc_thread
*thr
= goacc_thread ();
751 struct gomp_device_descr
*acc_dev
= thr
->dev
;
753 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
756 acc_prof_info prof_info
;
757 acc_api_info api_info
;
758 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
761 prof_info
.async
= async
;
762 prof_info
.async_queue
= prof_info
.async
;
765 gomp_mutex_lock (&acc_dev
->lock
);
767 splay_tree_key n
= lookup_host (acc_dev
, h
, s
);
768 /* Non-present data is a no-op: PR92726, RP92970, PR92984. */
771 goacc_aq aq
= get_goacc_asyncqueue (async
);
772 goacc_exit_datum_1 (acc_dev
, h
, s
, kind
, n
, aq
);
775 gomp_mutex_unlock (&acc_dev
->lock
);
779 thr
->prof_info
= NULL
;
780 thr
->api_info
= NULL
;
785 acc_delete (void *h
, size_t s
)
787 goacc_exit_datum (h
, s
, GOMP_MAP_RELEASE
, acc_async_sync
);
791 acc_delete_async (void *h
, size_t s
, int async
)
793 goacc_exit_datum (h
, s
, GOMP_MAP_RELEASE
, async
);
797 acc_delete_finalize (void *h
, size_t s
)
799 goacc_exit_datum (h
, s
, GOMP_MAP_DELETE
, acc_async_sync
);
803 acc_delete_finalize_async (void *h
, size_t s
, int async
)
805 goacc_exit_datum (h
, s
, GOMP_MAP_DELETE
, async
);
809 acc_copyout (void *h
, size_t s
)
811 goacc_exit_datum (h
, s
, GOMP_MAP_FROM
, acc_async_sync
);
815 acc_copyout_async (void *h
, size_t s
, int async
)
817 goacc_exit_datum (h
, s
, GOMP_MAP_FROM
, async
);
821 acc_copyout_finalize (void *h
, size_t s
)
823 goacc_exit_datum (h
, s
, GOMP_MAP_FORCE_FROM
, acc_async_sync
);
827 acc_copyout_finalize_async (void *h
, size_t s
, int async
)
829 goacc_exit_datum (h
, s
, GOMP_MAP_FORCE_FROM
, async
);
833 update_dev_host (int is_dev
, void *h
, size_t s
, int async
)
838 goacc_lazy_initialize ();
840 struct goacc_thread
*thr
= goacc_thread ();
841 struct gomp_device_descr
*acc_dev
= thr
->dev
;
843 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
846 /* Fortran optional arguments that are non-present result in a
847 NULL host address here. This can safely be ignored as it is
848 not possible to 'update' a non-present optional argument. */
852 acc_prof_info prof_info
;
853 acc_api_info api_info
;
854 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
857 prof_info
.async
= async
;
858 prof_info
.async_queue
= prof_info
.async
;
861 gomp_mutex_lock (&acc_dev
->lock
);
863 n
= lookup_host (acc_dev
, h
, s
);
867 gomp_mutex_unlock (&acc_dev
->lock
);
868 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
871 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
872 + (uintptr_t) h
- n
->host_start
);
874 goacc_aq aq
= get_goacc_asyncqueue (async
);
877 gomp_copy_host2dev (acc_dev
, aq
, d
, h
, s
, false, /* TODO: cbuf? */ NULL
);
879 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
881 gomp_mutex_unlock (&acc_dev
->lock
);
885 thr
->prof_info
= NULL
;
886 thr
->api_info
= NULL
;
891 acc_update_device (void *h
, size_t s
)
893 update_dev_host (1, h
, s
, acc_async_sync
);
897 acc_update_device_async (void *h
, size_t s
, int async
)
899 update_dev_host (1, h
, s
, async
);
903 acc_update_self (void *h
, size_t s
)
905 update_dev_host (0, h
, s
, acc_async_sync
);
909 acc_update_self_async (void *h
, size_t s
, int async
)
911 update_dev_host (0, h
, s
, async
);
915 acc_attach_async (void **hostaddr
, int async
)
917 struct goacc_thread
*thr
= goacc_thread ();
918 struct gomp_device_descr
*acc_dev
= thr
->dev
;
919 goacc_aq aq
= get_goacc_asyncqueue (async
);
921 struct splay_tree_key_s cur_node
;
924 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
927 gomp_mutex_lock (&acc_dev
->lock
);
929 cur_node
.host_start
= (uintptr_t) hostaddr
;
930 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
931 n
= splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
935 gomp_mutex_unlock (&acc_dev
->lock
);
936 gomp_fatal ("struct not mapped for acc_attach");
939 gomp_attach_pointer (acc_dev
, aq
, &acc_dev
->mem_map
, n
, (uintptr_t) hostaddr
,
942 gomp_mutex_unlock (&acc_dev
->lock
);
946 acc_attach (void **hostaddr
)
948 acc_attach_async (hostaddr
, acc_async_sync
);
952 goacc_detach_internal (void **hostaddr
, int async
, bool finalize
)
954 struct goacc_thread
*thr
= goacc_thread ();
955 struct gomp_device_descr
*acc_dev
= thr
->dev
;
956 struct splay_tree_key_s cur_node
;
958 struct goacc_asyncqueue
*aq
= get_goacc_asyncqueue (async
);
960 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
963 gomp_mutex_lock (&acc_dev
->lock
);
965 cur_node
.host_start
= (uintptr_t) hostaddr
;
966 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
967 n
= splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
971 gomp_mutex_unlock (&acc_dev
->lock
);
972 gomp_fatal ("struct not mapped for acc_detach");
975 gomp_detach_pointer (acc_dev
, aq
, n
, (uintptr_t) hostaddr
, finalize
, NULL
);
977 gomp_mutex_unlock (&acc_dev
->lock
);
981 acc_detach (void **hostaddr
)
983 goacc_detach_internal (hostaddr
, acc_async_sync
, false);
987 acc_detach_async (void **hostaddr
, int async
)
989 goacc_detach_internal (hostaddr
, async
, false);
993 acc_detach_finalize (void **hostaddr
)
995 goacc_detach_internal (hostaddr
, acc_async_sync
, true);
999 acc_detach_finalize_async (void **hostaddr
, int async
)
1001 goacc_detach_internal (hostaddr
, async
, true);
1004 /* Some types of (pointer) variables use several consecutive mappings, which
1005 must be treated as a group for enter/exit data directives. This function
1006 returns the last mapping in such a group (inclusive), or POS for singleton
1010 find_group_last (int pos
, size_t mapnum
, size_t *sizes
, unsigned short *kinds
)
1012 unsigned char kind0
= kinds
[pos
] & 0xff;
1013 int first_pos
= pos
;
1017 case GOMP_MAP_TO_PSET
:
1018 if (pos
+ 1 < mapnum
1019 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
1022 while (pos
+ 1 < mapnum
1023 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_POINTER
)
1025 /* We expect at least one GOMP_MAP_POINTER (if not a single
1026 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
1027 assert (pos
> first_pos
);
1030 case GOMP_MAP_STRUCT
:
1031 case GOMP_MAP_STRUCT_UNORD
:
1035 case GOMP_MAP_POINTER
:
1036 case GOMP_MAP_ALWAYS_POINTER
:
1037 /* These mappings are only expected after some other mapping. If we
1038 see one by itself, something has gone wrong. */
1039 gomp_fatal ("unexpected mapping");
1042 case GOMP_MAP_ATTACH
:
1046 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1048 if (pos
+ 1 < mapnum
)
1050 unsigned char kind1
= kinds
[pos
+ 1] & 0xff;
1051 if (kind1
== GOMP_MAP_ALWAYS_POINTER
)
1055 /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1057 if (pos
+ 1 < mapnum
1058 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
1061 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1063 while (pos
+ 1 < mapnum
1064 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_POINTER
)
1071 /* Map variables for OpenACC "enter data". We can't just call
1072 goacc_map_vars once, because individual mapped variables might have
1073 "exit data" called for them at different times. */
1076 goacc_enter_data_internal (struct gomp_device_descr
*acc_dev
, size_t mapnum
,
1077 void **hostaddrs
, size_t *sizes
,
1078 unsigned short *kinds
, goacc_aq aq
)
1080 gomp_mutex_lock (&acc_dev
->lock
);
1082 for (size_t i
= 0; i
< mapnum
; i
++)
1085 size_t group_last
= find_group_last (i
, mapnum
, sizes
, kinds
);
1086 bool struct_p
= false;
1087 size_t size
, groupnum
= (group_last
- i
) + 1;
1089 switch (kinds
[i
] & 0xff)
1091 case GOMP_MAP_STRUCT
:
1092 case GOMP_MAP_STRUCT_UNORD
:
1094 size
= (uintptr_t) hostaddrs
[group_last
] + sizes
[group_last
]
1095 - (uintptr_t) hostaddrs
[i
];
1100 case GOMP_MAP_ATTACH
:
1101 size
= sizeof (void *);
1108 n
= lookup_host (acc_dev
, hostaddrs
[i
], size
);
1112 for (size_t j
= i
+ 1; j
<= group_last
; j
++)
1114 struct splay_tree_key_s cur_node
;
1115 cur_node
.host_start
= (uintptr_t) hostaddrs
[j
];
1116 cur_node
.host_end
= cur_node
.host_start
+ sizes
[j
];
1118 = splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
1120 || n2
->tgt
!= n
->tgt
1121 || n2
->host_start
- n
->host_start
1122 != n2
->tgt_offset
- n
->tgt_offset
)
1124 gomp_mutex_unlock (&acc_dev
->lock
);
1125 gomp_fatal ("Trying to map into device [%p..%p) structure "
1126 "element when other mapped elements from the "
1127 "same structure weren't mapped together with "
1128 "it", (void *) cur_node
.host_start
,
1129 (void *) cur_node
.host_end
);
1132 /* This is a special case because we must increment the refcount by
1133 the number of mapped struct elements, rather than by one. */
1134 if (n
->refcount
!= REFCOUNT_INFINITY
)
1135 n
->refcount
+= groupnum
- 1;
1136 n
->dynamic_refcount
+= groupnum
- 1;
1138 else if (n
&& groupnum
== 1)
1140 void *h
= hostaddrs
[i
];
1141 size_t s
= sizes
[i
];
1143 if ((kinds
[i
] & 0xff) == GOMP_MAP_ATTACH
)
1145 gomp_attach_pointer (acc_dev
, aq
, &acc_dev
->mem_map
, n
,
1146 (uintptr_t) h
, s
, NULL
, false);
1147 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1148 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1151 goacc_map_var_existing (acc_dev
, h
, s
, n
);
1153 else if (n
&& groupnum
> 1)
1155 assert (n
->refcount
!= REFCOUNT_LINK
);
1157 for (size_t j
= i
+ 1; j
<= group_last
; j
++)
1158 if ((kinds
[j
] & 0xff) == GOMP_MAP_ATTACH
)
1161 = lookup_host (acc_dev
, hostaddrs
[j
], sizeof (void *));
1162 gomp_attach_pointer (acc_dev
, aq
, &acc_dev
->mem_map
, m
,
1163 (uintptr_t) hostaddrs
[j
], sizes
[j
], NULL
,
1167 bool processed
= false;
1169 struct target_mem_desc
*tgt
= n
->tgt
;
1171 /* Minimal OpenACC variant corresponding to PR96668
1172 "[OpenMP] Re-mapping allocated but previously unallocated
1173 allocatable does not work" 'libgomp/target.c' changes, so that
1174 OpenACC 'declare' code à la PR106643
1175 "[gfortran + OpenACC] Allocate in module causes refcount error"
1176 has a chance to work. */
1177 if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
1178 && tgt
->list_count
== 0)
1180 /* 'declare target'. */
1181 assert (n
->refcount
== REFCOUNT_INFINITY
);
1183 for (size_t k
= 1; k
< groupnum
; k
++)
1185 /* The only thing we expect to see here. */
1186 assert ((kinds
[i
+ k
] & 0xff) == GOMP_MAP_POINTER
);
1189 /* Let 'goacc_map_vars' -> 'gomp_map_vars_internal' handle
1191 gomp_mutex_unlock (&acc_dev
->lock
);
1192 struct target_mem_desc
*tgt_
1193 = goacc_map_vars (acc_dev
, aq
, groupnum
, &hostaddrs
[i
], NULL
,
1194 &sizes
[i
], &kinds
[i
], true,
1195 GOMP_MAP_VARS_ENTER_DATA
);
1196 assert (tgt_
== NULL
);
1197 gomp_mutex_lock (&acc_dev
->lock
);
1199 /* Given that 'goacc_exit_data_internal'/'goacc_exit_datum_1'
1200 will always see 'n->refcount == REFCOUNT_INFINITY',
1201 there's no need to adjust 'n->dynamic_refcount' here. */
1206 assert (n
->refcount
!= REFCOUNT_INFINITY
);
1208 for (size_t j
= 0; j
< tgt
->list_count
; j
++)
1209 if (tgt
->list
[j
].key
== n
)
1211 /* We are processing a group of mappings (e.g.
1212 [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1213 Find the right group in the target_mem_desc's variable
1214 list, and increment the refcounts for each item in that
1216 for (size_t k
= 0; k
< groupnum
; k
++)
1217 if (j
+ k
< tgt
->list_count
1218 && tgt
->list
[j
+ k
].key
1219 && !tgt
->list
[j
+ k
].is_attach
)
1221 tgt
->list
[j
+ k
].key
->refcount
++;
1222 tgt
->list
[j
+ k
].key
->dynamic_refcount
++;
1230 gomp_mutex_unlock (&acc_dev
->lock
);
1231 gomp_fatal ("dynamic refcount incrementing failed for "
1235 else if (hostaddrs
[i
])
1237 /* The data is not mapped already. Map it now, unless the first
1238 member in the group has a NULL pointer (e.g. a non-present
1239 optional parameter). */
1240 gomp_mutex_unlock (&acc_dev
->lock
);
1242 struct target_mem_desc
*tgt
1243 = goacc_map_vars (acc_dev
, aq
, groupnum
, &hostaddrs
[i
], NULL
,
1244 &sizes
[i
], &kinds
[i
], true,
1245 GOMP_MAP_VARS_ENTER_DATA
);
1248 gomp_mutex_lock (&acc_dev
->lock
);
1250 for (size_t j
= 0; j
< tgt
->list_count
; j
++)
1252 n
= tgt
->list
[j
].key
;
1253 if (n
&& !tgt
->list
[j
].is_attach
)
1254 n
->dynamic_refcount
++;
1261 gomp_mutex_unlock (&acc_dev
->lock
);
1264 /* Unmap variables for OpenACC "exit data". */
1267 goacc_exit_data_internal (struct gomp_device_descr
*acc_dev
, size_t mapnum
,
1268 void **hostaddrs
, size_t *sizes
,
1269 unsigned short *kinds
, goacc_aq aq
)
1271 gomp_mutex_lock (&acc_dev
->lock
);
1273 /* Handle "detach" before copyback/deletion of mapped data. */
1274 for (size_t i
= 0; i
< mapnum
; ++i
)
1276 unsigned char kind
= kinds
[i
] & 0xff;
1277 bool finalize
= false;
1280 case GOMP_MAP_FORCE_DETACH
:
1284 case GOMP_MAP_DETACH
:
1286 struct splay_tree_key_s cur_node
;
1287 uintptr_t hostaddr
= (uintptr_t) hostaddrs
[i
];
1288 cur_node
.host_start
= hostaddr
;
1289 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1291 = splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
1295 gomp_mutex_unlock (&acc_dev
->lock
);
1296 gomp_fatal ("struct not mapped for detach operation");
1299 gomp_detach_pointer (acc_dev
, aq
, n
, hostaddr
, finalize
, NULL
);
1307 for (size_t i
= 0; i
< mapnum
; ++i
)
1309 unsigned char kind
= kinds
[i
] & 0xff;
1314 case GOMP_MAP_FORCE_FROM
:
1315 case GOMP_MAP_TO_PSET
:
1316 case GOMP_MAP_POINTER
:
1317 case GOMP_MAP_DELETE
:
1318 case GOMP_MAP_RELEASE
:
1320 struct splay_tree_key_s cur_node
;
1322 if (kind
== GOMP_MAP_POINTER
)
1323 size
= sizeof (void *);
1326 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1327 cur_node
.host_end
= cur_node
.host_start
+ size
;
1329 = splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
1334 goacc_exit_datum_1 (acc_dev
, hostaddrs
[i
], size
, kind
, n
, aq
);
1338 case GOMP_MAP_STRUCT
:
1339 case GOMP_MAP_STRUCT_UNORD
:
1340 /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1341 for all its entries. This special handling exists for GCC 10.1
1342 compatibility; afterwards, we're not generating these no-op
1343 'GOMP_MAP_STRUCT's anymore. */
1346 case GOMP_MAP_DETACH
:
1347 case GOMP_MAP_FORCE_DETACH
:
1348 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1349 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1353 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1358 gomp_mutex_unlock (&acc_dev
->lock
);
1362 goacc_enter_exit_data_internal (int flags_m
, size_t mapnum
, void **hostaddrs
,
1363 size_t *sizes
, unsigned short *kinds
,
1364 bool data_enter
, int async
, int num_waits
,
1367 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
1369 struct goacc_thread
*thr
;
1370 struct gomp_device_descr
*acc_dev
;
1372 goacc_lazy_initialize ();
1374 thr
= goacc_thread ();
1377 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
1379 acc_prof_info prof_info
;
1382 thr
->prof_info
= &prof_info
;
1384 prof_info
.event_type
1385 = data_enter
? acc_ev_enter_data_start
: acc_ev_exit_data_start
;
1386 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
1387 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
1388 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
1389 prof_info
.device_number
= acc_dev
->target_id
;
1390 prof_info
.thread_id
= -1;
1391 prof_info
.async
= async
;
1392 prof_info
.async_queue
= prof_info
.async
;
1393 prof_info
.src_file
= NULL
;
1394 prof_info
.func_name
= NULL
;
1395 prof_info
.line_no
= -1;
1396 prof_info
.end_line_no
= -1;
1397 prof_info
.func_line_no
= -1;
1398 prof_info
.func_end_line_no
= -1;
1400 acc_event_info enter_exit_data_event_info
;
1403 enter_exit_data_event_info
.other_event
.event_type
1404 = prof_info
.event_type
;
1405 enter_exit_data_event_info
.other_event
.valid_bytes
1406 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
1407 enter_exit_data_event_info
.other_event
.parent_construct
1408 = data_enter
? acc_construct_enter_data
: acc_construct_exit_data
;
1409 enter_exit_data_event_info
.other_event
.implicit
= 0;
1410 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
1412 acc_api_info api_info
;
1415 thr
->api_info
= &api_info
;
1417 api_info
.device_api
= acc_device_api_none
;
1418 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
1419 api_info
.device_type
= prof_info
.device_type
;
1420 api_info
.vendor
= -1;
1421 api_info
.device_handle
= NULL
;
1422 api_info
.context_handle
= NULL
;
1423 api_info
.async_handle
= NULL
;
1427 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1430 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1431 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
1433 prof_info
.device_type
= acc_device_host
;
1434 api_info
.device_type
= prof_info
.device_type
;
1440 goacc_wait (async
, num_waits
, ap
);
1442 goacc_aq aq
= get_goacc_asyncqueue (async
);
1445 goacc_enter_data_internal (acc_dev
, mapnum
, hostaddrs
, sizes
, kinds
, aq
);
1447 goacc_exit_data_internal (acc_dev
, mapnum
, hostaddrs
, sizes
, kinds
, aq
);
1452 prof_info
.event_type
1453 = data_enter
? acc_ev_enter_data_end
: acc_ev_exit_data_end
;
1454 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
1455 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1458 thr
->prof_info
= NULL
;
1459 thr
->api_info
= NULL
;
1463 /* Legacy entry point (GCC 11 and earlier). */
1466 GOACC_enter_exit_data (int flags_m
, size_t mapnum
, void **hostaddrs
,
1467 size_t *sizes
, unsigned short *kinds
, int async
,
1470 /* Determine if this is an OpenACC "enter data". */
1471 bool data_enter
= false;
1472 for (size_t i
= 0; i
< mapnum
; ++i
)
1474 unsigned char kind
= kinds
[i
] & 0xff;
1476 if (kind
== GOMP_MAP_POINTER
1477 || kind
== GOMP_MAP_TO_PSET
1478 || kind
== GOMP_MAP_STRUCT
1479 || kind
== GOMP_MAP_STRUCT_UNORD
)
1482 if (kind
== GOMP_MAP_FORCE_ALLOC
1483 || kind
== GOMP_MAP_FORCE_PRESENT
1484 || kind
== GOMP_MAP_ATTACH
1485 || kind
== GOMP_MAP_FORCE_TO
1486 || kind
== GOMP_MAP_TO
1487 || kind
== GOMP_MAP_ALLOC
)
1493 if (kind
== GOMP_MAP_RELEASE
1494 || kind
== GOMP_MAP_DELETE
1495 || kind
== GOMP_MAP_DETACH
1496 || kind
== GOMP_MAP_FORCE_DETACH
1497 || kind
== GOMP_MAP_FROM
1498 || kind
== GOMP_MAP_FORCE_FROM
)
1501 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1506 va_start (ap
, num_waits
);
1507 goacc_enter_exit_data_internal (flags_m
, mapnum
, hostaddrs
, sizes
, kinds
,
1508 data_enter
, async
, num_waits
, &ap
);
1513 GOACC_enter_data (int flags_m
, size_t mapnum
, void **hostaddrs
,
1514 size_t *sizes
, unsigned short *kinds
, int async
,
1518 va_start (ap
, num_waits
);
1519 goacc_enter_exit_data_internal (flags_m
, mapnum
, hostaddrs
, sizes
, kinds
,
1520 true, async
, num_waits
, &ap
);
1525 GOACC_exit_data (int flags_m
, size_t mapnum
, void **hostaddrs
,
1526 size_t *sizes
, unsigned short *kinds
, int async
,
1530 va_start (ap
, num_waits
);
1531 goacc_enter_exit_data_internal (flags_m
, mapnum
, hostaddrs
, sizes
, kinds
,
1532 false, async
, num_waits
, &ap
);
1537 GOACC_declare (int flags_m
, size_t mapnum
,
1538 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1540 for (size_t i
= 0; i
< mapnum
; i
++)
1542 unsigned char kind
= kinds
[i
] & 0xff;
1544 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
1549 case GOMP_MAP_ALLOC
:
1550 if (acc_is_present (hostaddrs
[i
], sizes
[i
]))
1553 case GOMP_MAP_FORCE_ALLOC
:
1555 case GOMP_MAP_FORCE_TO
:
1556 goacc_enter_exit_data_internal (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1557 &kinds
[i
], true, GOMP_ASYNC_SYNC
, 0, NULL
);
1561 case GOMP_MAP_FORCE_FROM
:
1562 case GOMP_MAP_RELEASE
:
1563 case GOMP_MAP_DELETE
:
1564 goacc_enter_exit_data_internal (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1565 &kinds
[i
], false, GOMP_ASYNC_SYNC
, 0, NULL
);
1568 case GOMP_MAP_FORCE_DEVICEPTR
:
1571 case GOMP_MAP_FORCE_PRESENT
:
1572 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
1573 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs
[i
],
1574 (unsigned long) sizes
[i
]);