Reset prologue_location before calling code_end
[official-gcc.git] / libgomp / oacc-mem.c
blob405574dfa2b5ba6275929ee56c7ff179aa14f53b
1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2021 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 struct splay_tree_key_s node;
43 splay_tree_key key;
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
48 key = splay_tree_lookup (&dev->mem_map, &node);
50 return key;
53 /* Helper for lookup_dev. Iterate over splay tree. */
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
68 return key;
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
82 if (!mem_map || !mem_map->root)
83 return NULL;
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
92 void *
93 acc_malloc (size_t s)
95 if (!s)
96 return NULL;
98 goacc_lazy_initialize ();
100 struct goacc_thread *thr = goacc_thread ();
102 assert (thr->dev);
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
113 if (profiling_p)
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
119 return res;
122 void
123 acc_free (void *d)
125 splay_tree_key k;
127 if (!d)
128 return;
130 struct goacc_thread *thr = goacc_thread ();
132 assert (thr && thr->dev);
134 struct gomp_device_descr *acc_dev = thr->dev;
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
143 gomp_mutex_lock (&acc_dev->lock);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
166 if (profiling_p)
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
181 assert (thr && thr->dev);
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
207 if (profiling_p)
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
241 void *
242 acc_deviceptr (void *h)
244 splay_tree_key n;
245 void *d;
246 void *offset;
248 goacc_lazy_initialize ();
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
259 gomp_mutex_lock (&dev->lock);
261 n = lookup_host (dev, h, 1);
263 if (!n)
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
269 offset = h - n->host_start;
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
273 gomp_mutex_unlock (&dev->lock);
275 return d;
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
281 void *
282 acc_hostptr (void *d)
284 splay_tree_key n;
285 void *h;
286 void *offset;
288 goacc_lazy_initialize ();
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
299 gomp_mutex_lock (&acc_dev->lock);
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
303 if (!n)
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
311 h = n->host_start + offset;
313 gomp_mutex_unlock (&acc_dev->lock);
315 return h;
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h, size_t s)
323 splay_tree_key n;
325 if (!s || !h)
326 return 0;
328 goacc_lazy_initialize ();
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
339 gomp_mutex_lock (&acc_dev->lock);
341 n = lookup_host (acc_dev, h, s);
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
348 gomp_mutex_unlock (&acc_dev->lock);
350 return n != NULL;
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
355 void
356 acc_map_data (void *h, void *d, size_t s)
358 size_t mapnum = 1;
359 void *hostaddrs = h;
360 void *devaddrs = d;
361 size_t sizes = s;
362 unsigned short kinds = GOMP_MAP_ALLOC;
364 goacc_lazy_initialize ();
366 struct goacc_thread *thr = goacc_thread ();
367 struct gomp_device_descr *acc_dev = thr->dev;
369 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 if (d != h)
372 gomp_fatal ("cannot map data on shared-memory system");
374 else
376 struct goacc_thread *thr = goacc_thread ();
378 if (!d || !h || !s)
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h, (int)s, (void *)d, (int)s);
382 acc_prof_info prof_info;
383 acc_api_info api_info;
384 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386 gomp_mutex_lock (&acc_dev->lock);
388 if (lookup_host (acc_dev, h, s))
390 gomp_mutex_unlock (&acc_dev->lock);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 (int)s);
395 if (lookup_dev (&thr->dev->mem_map, d, s))
397 gomp_mutex_unlock (&acc_dev->lock);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 (int)s);
402 gomp_mutex_unlock (&acc_dev->lock);
404 struct target_mem_desc *tgt
405 = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true,
407 GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA);
408 assert (tgt);
409 assert (tgt->list_count == 1);
410 splay_tree_key n = tgt->list[0].key;
411 assert (n);
412 assert (n->refcount == 1);
413 assert (n->dynamic_refcount == 0);
414 /* Special reference counting behavior. */
415 n->refcount = REFCOUNT_INFINITY;
417 if (profiling_p)
419 thr->prof_info = NULL;
420 thr->api_info = NULL;
425 void
426 acc_unmap_data (void *h)
428 struct goacc_thread *thr = goacc_thread ();
429 struct gomp_device_descr *acc_dev = thr->dev;
431 /* No need to call lazy open, as the address must have been mapped. */
433 /* This is a no-op on shared-memory targets. */
434 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
435 return;
437 acc_prof_info prof_info;
438 acc_api_info api_info;
439 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
441 gomp_mutex_lock (&acc_dev->lock);
443 splay_tree_key n = lookup_host (acc_dev, h, 1);
445 if (!n)
447 gomp_mutex_unlock (&acc_dev->lock);
448 gomp_fatal ("%p is not a mapped block", (void *)h);
451 size_t host_size = n->host_end - n->host_start;
453 if (n->host_start != (uintptr_t) h)
455 gomp_mutex_unlock (&acc_dev->lock);
456 gomp_fatal ("[%p,%d] surrounds %p",
457 (void *) n->host_start, (int) host_size, (void *) h);
459 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
460 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
461 the different 'REFCOUNT_INFINITY' cases, or simply separate
462 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 etc.)? */
464 else if (n->refcount != REFCOUNT_INFINITY)
466 gomp_mutex_unlock (&acc_dev->lock);
467 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
468 " by 'acc_map_data'",
469 (void *) h, (int) host_size);
472 struct target_mem_desc *tgt = n->tgt;
474 if (tgt->refcount == REFCOUNT_INFINITY)
476 gomp_mutex_unlock (&acc_dev->lock);
477 gomp_fatal ("cannot unmap target block");
480 /* Above, we've verified that the mapping must have been set up by
481 'acc_map_data'. */
482 assert (tgt->refcount == 1);
484 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
485 from freeing the target memory. */
486 tgt->tgt_end = 0;
487 tgt->to_free = NULL;
489 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
490 assert (is_tgt_unmapped);
492 gomp_mutex_unlock (&acc_dev->lock);
494 if (profiling_p)
496 thr->prof_info = NULL;
497 thr->api_info = NULL;
502 /* Helper function to map a single dynamic data item, represented by a single
503 mapping. The acc_dev->lock should be held on entry, and remains locked on
504 exit. */
506 static void *
507 goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
508 size_t size, splay_tree_key n)
510 assert (n);
512 /* Present. */
513 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
514 - n->host_start);
516 if (hostaddr + size > (void *) n->host_end)
518 gomp_mutex_unlock (&acc_dev->lock);
519 gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
522 assert (n->refcount != REFCOUNT_LINK);
523 if (n->refcount != REFCOUNT_INFINITY)
524 n->refcount++;
525 n->dynamic_refcount++;
527 return d;
530 /* Enter dynamic mapping for a single datum. Return the device pointer. */
532 static void *
533 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
535 void *d;
536 splay_tree_key n;
538 if (!hostaddrs[0] || !sizes[0])
539 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
541 goacc_lazy_initialize ();
543 struct goacc_thread *thr = goacc_thread ();
544 struct gomp_device_descr *acc_dev = thr->dev;
546 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
547 return hostaddrs[0];
549 acc_prof_info prof_info;
550 acc_api_info api_info;
551 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
552 if (profiling_p)
554 prof_info.async = async;
555 prof_info.async_queue = prof_info.async;
558 gomp_mutex_lock (&acc_dev->lock);
560 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
561 if (n)
563 d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
564 gomp_mutex_unlock (&acc_dev->lock);
566 else
568 const size_t mapnum = 1;
570 gomp_mutex_unlock (&acc_dev->lock);
572 goacc_aq aq = get_goacc_asyncqueue (async);
574 struct target_mem_desc *tgt
575 = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
576 kinds, true, (GOMP_MAP_VARS_OPENACC
577 | GOMP_MAP_VARS_ENTER_DATA));
578 assert (tgt);
579 assert (tgt->list_count == 1);
580 n = tgt->list[0].key;
581 assert (n);
582 assert (n->refcount == 1);
583 assert (n->dynamic_refcount == 0);
584 n->dynamic_refcount++;
586 d = (void *) tgt->tgt_start;
589 if (profiling_p)
591 thr->prof_info = NULL;
592 thr->api_info = NULL;
595 return d;
598 void *
599 acc_create (void *h, size_t s)
601 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
602 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
605 void
606 acc_create_async (void *h, size_t s, int async)
608 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
609 goacc_enter_datum (&h, &s, &kinds, async);
612 /* acc_present_or_create used to be what acc_create is now. */
613 /* acc_pcreate is acc_present_or_create by a different name. */
614 #ifdef HAVE_ATTRIBUTE_ALIAS
615 strong_alias (acc_create, acc_present_or_create)
616 strong_alias (acc_create, acc_pcreate)
617 #else
618 void *
619 acc_present_or_create (void *h, size_t s)
621 return acc_create (h, s);
624 void *
625 acc_pcreate (void *h, size_t s)
627 return acc_create (h, s);
629 #endif
631 void *
632 acc_copyin (void *h, size_t s)
634 unsigned short kinds[1] = { GOMP_MAP_TO };
635 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
638 void
639 acc_copyin_async (void *h, size_t s, int async)
641 unsigned short kinds[1] = { GOMP_MAP_TO };
642 goacc_enter_datum (&h, &s, &kinds, async);
645 /* acc_present_or_copyin used to be what acc_copyin is now. */
646 /* acc_pcopyin is acc_present_or_copyin by a different name. */
647 #ifdef HAVE_ATTRIBUTE_ALIAS
648 strong_alias (acc_copyin, acc_present_or_copyin)
649 strong_alias (acc_copyin, acc_pcopyin)
650 #else
651 void *
652 acc_present_or_copyin (void *h, size_t s)
654 return acc_copyin (h, s);
657 void *
658 acc_pcopyin (void *h, size_t s)
660 return acc_copyin (h, s);
662 #endif
665 /* Helper function to unmap a single data item. Device lock should be held on
666 entry, and remains locked on exit. */
668 static void
669 goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
670 unsigned short kind, splay_tree_key n, goacc_aq aq)
672 assert (kind != GOMP_MAP_DETACH
673 && kind != GOMP_MAP_FORCE_DETACH);
675 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
677 size_t host_size = n->host_end - n->host_start;
678 gomp_mutex_unlock (&acc_dev->lock);
679 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
680 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
683 bool finalize = (kind == GOMP_MAP_FORCE_FROM
684 || kind == GOMP_MAP_DELETE);
686 assert (n->refcount != REFCOUNT_LINK);
687 if (n->refcount != REFCOUNT_INFINITY
688 && n->refcount < n->dynamic_refcount)
690 gomp_mutex_unlock (&acc_dev->lock);
691 gomp_fatal ("Dynamic reference counting assert fail\n");
694 if (finalize)
696 if (n->refcount != REFCOUNT_INFINITY)
697 n->refcount -= n->dynamic_refcount;
698 n->dynamic_refcount = 0;
700 else if (n->dynamic_refcount)
702 if (n->refcount != REFCOUNT_INFINITY)
703 n->refcount--;
704 n->dynamic_refcount--;
707 if (n->refcount == 0)
709 bool copyout = (kind == GOMP_MAP_FROM
710 || kind == GOMP_MAP_FORCE_FROM);
711 if (copyout)
713 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
714 + (uintptr_t) h - n->host_start);
715 gomp_copy_dev2host (acc_dev, aq, h, d, s);
718 if (aq)
719 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
720 'gomp_unref_tgt' comment in
721 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
722 PR92881. */
723 gomp_remove_var_async (acc_dev, n, aq);
724 else
726 size_t num_mappings = 0;
727 /* If the target_mem_desc represents a single data mapping, we can
728 check that it is freed when this splay tree key's refcount reaches
729 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
730 multiple members), fall back to skipping the test. */
731 for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
732 if (n->tgt->list[l_i].key
733 && !n->tgt->list[l_i].is_attach)
734 ++num_mappings;
735 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
736 assert (is_tgt_unmapped || num_mappings > 1);
742 /* Exit a dynamic mapping for a single variable. */
744 static void
745 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
747 /* No need to call lazy open, as the data must already have been
748 mapped. */
750 kind &= 0xff;
752 struct goacc_thread *thr = goacc_thread ();
753 struct gomp_device_descr *acc_dev = thr->dev;
755 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
756 return;
758 acc_prof_info prof_info;
759 acc_api_info api_info;
760 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
761 if (profiling_p)
763 prof_info.async = async;
764 prof_info.async_queue = prof_info.async;
767 gomp_mutex_lock (&acc_dev->lock);
769 splay_tree_key n = lookup_host (acc_dev, h, s);
770 /* Non-present data is a no-op: PR92726, RP92970, PR92984. */
771 if (n)
773 goacc_aq aq = get_goacc_asyncqueue (async);
774 goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
777 gomp_mutex_unlock (&acc_dev->lock);
779 if (profiling_p)
781 thr->prof_info = NULL;
782 thr->api_info = NULL;
786 void
787 acc_delete (void *h , size_t s)
789 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
792 void
793 acc_delete_async (void *h , size_t s, int async)
795 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
798 void
799 acc_delete_finalize (void *h , size_t s)
801 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
804 void
805 acc_delete_finalize_async (void *h , size_t s, int async)
807 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
810 void
811 acc_copyout (void *h, size_t s)
813 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
816 void
817 acc_copyout_async (void *h, size_t s, int async)
819 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
822 void
823 acc_copyout_finalize (void *h, size_t s)
825 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
828 void
829 acc_copyout_finalize_async (void *h, size_t s, int async)
831 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
834 static void
835 update_dev_host (int is_dev, void *h, size_t s, int async)
837 splay_tree_key n;
838 void *d;
840 goacc_lazy_initialize ();
842 struct goacc_thread *thr = goacc_thread ();
843 struct gomp_device_descr *acc_dev = thr->dev;
845 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
846 return;
848 /* Fortran optional arguments that are non-present result in a
849 NULL host address here. This can safely be ignored as it is
850 not possible to 'update' a non-present optional argument. */
851 if (h == NULL)
852 return;
854 acc_prof_info prof_info;
855 acc_api_info api_info;
856 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
857 if (profiling_p)
859 prof_info.async = async;
860 prof_info.async_queue = prof_info.async;
863 gomp_mutex_lock (&acc_dev->lock);
865 n = lookup_host (acc_dev, h, s);
867 if (!n)
869 gomp_mutex_unlock (&acc_dev->lock);
870 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
873 d = (void *) (n->tgt->tgt_start + n->tgt_offset
874 + (uintptr_t) h - n->host_start);
876 goacc_aq aq = get_goacc_asyncqueue (async);
878 if (is_dev)
879 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
880 else
881 gomp_copy_dev2host (acc_dev, aq, h, d, s);
883 gomp_mutex_unlock (&acc_dev->lock);
885 if (profiling_p)
887 thr->prof_info = NULL;
888 thr->api_info = NULL;
892 void
893 acc_update_device (void *h, size_t s)
895 update_dev_host (1, h, s, acc_async_sync);
898 void
899 acc_update_device_async (void *h, size_t s, int async)
901 update_dev_host (1, h, s, async);
904 void
905 acc_update_self (void *h, size_t s)
907 update_dev_host (0, h, s, acc_async_sync);
910 void
911 acc_update_self_async (void *h, size_t s, int async)
913 update_dev_host (0, h, s, async);
916 void
917 acc_attach_async (void **hostaddr, int async)
919 struct goacc_thread *thr = goacc_thread ();
920 struct gomp_device_descr *acc_dev = thr->dev;
921 goacc_aq aq = get_goacc_asyncqueue (async);
923 struct splay_tree_key_s cur_node;
924 splay_tree_key n;
926 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
927 return;
929 gomp_mutex_lock (&acc_dev->lock);
931 cur_node.host_start = (uintptr_t) hostaddr;
932 cur_node.host_end = cur_node.host_start + sizeof (void *);
933 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
935 if (n == NULL)
937 gomp_mutex_unlock (&acc_dev->lock);
938 gomp_fatal ("struct not mapped for acc_attach");
941 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
942 0, NULL);
944 gomp_mutex_unlock (&acc_dev->lock);
947 void
948 acc_attach (void **hostaddr)
950 acc_attach_async (hostaddr, acc_async_sync);
953 static void
954 goacc_detach_internal (void **hostaddr, int async, bool finalize)
956 struct goacc_thread *thr = goacc_thread ();
957 struct gomp_device_descr *acc_dev = thr->dev;
958 struct splay_tree_key_s cur_node;
959 splay_tree_key n;
960 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
962 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
963 return;
965 gomp_mutex_lock (&acc_dev->lock);
967 cur_node.host_start = (uintptr_t) hostaddr;
968 cur_node.host_end = cur_node.host_start + sizeof (void *);
969 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
971 if (n == NULL)
973 gomp_mutex_unlock (&acc_dev->lock);
974 gomp_fatal ("struct not mapped for acc_detach");
977 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
979 gomp_mutex_unlock (&acc_dev->lock);
982 void
983 acc_detach (void **hostaddr)
985 goacc_detach_internal (hostaddr, acc_async_sync, false);
988 void
989 acc_detach_async (void **hostaddr, int async)
991 goacc_detach_internal (hostaddr, async, false);
994 void
995 acc_detach_finalize (void **hostaddr)
997 goacc_detach_internal (hostaddr, acc_async_sync, true);
1000 void
1001 acc_detach_finalize_async (void **hostaddr, int async)
1003 goacc_detach_internal (hostaddr, async, true);
1006 /* Some types of (pointer) variables use several consecutive mappings, which
1007 must be treated as a group for enter/exit data directives. This function
1008 returns the last mapping in such a group (inclusive), or POS for singleton
1009 mappings. */
1011 static int
1012 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
1014 unsigned char kind0 = kinds[pos] & 0xff;
1015 int first_pos = pos;
1017 switch (kind0)
1019 case GOMP_MAP_TO_PSET:
1020 if (pos + 1 < mapnum
1021 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1022 return pos + 1;
1024 while (pos + 1 < mapnum
1025 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1026 pos++;
1027 /* We expect at least one GOMP_MAP_POINTER (if not a single
1028 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
1029 assert (pos > first_pos);
1030 break;
1032 case GOMP_MAP_STRUCT:
1033 pos += sizes[pos];
1034 break;
1036 case GOMP_MAP_POINTER:
1037 case GOMP_MAP_ALWAYS_POINTER:
1038 /* These mappings are only expected after some other mapping. If we
1039 see one by itself, something has gone wrong. */
1040 gomp_fatal ("unexpected mapping");
1041 break;
1043 case GOMP_MAP_ATTACH:
1044 break;
1046 default:
1047 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1048 mapping. */
1049 if (pos + 1 < mapnum)
1051 unsigned char kind1 = kinds[pos + 1] & 0xff;
1052 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1053 return pos + 1;
1056 /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1057 mapping. */
1058 if (pos + 1 < mapnum
1059 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1060 return pos + 1;
1062 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1063 (etc.) mapping. */
1064 while (pos + 1 < mapnum
1065 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1066 pos++;
1069 return pos;
1072 /* Map variables for OpenACC "enter data". We can't just call
1073 gomp_map_vars_async once, because individual mapped variables might have
1074 "exit data" called for them at different times. */
1076 static void
1077 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1078 void **hostaddrs, size_t *sizes,
1079 unsigned short *kinds, goacc_aq aq)
1081 gomp_mutex_lock (&acc_dev->lock);
1083 for (size_t i = 0; i < mapnum; i++)
1085 splay_tree_key n;
1086 size_t group_last = find_group_last (i, mapnum, sizes, kinds);
1087 bool struct_p = false;
1088 size_t size, groupnum = (group_last - i) + 1;
1090 switch (kinds[i] & 0xff)
1092 case GOMP_MAP_STRUCT:
1094 size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
1095 - (uintptr_t) hostaddrs[i];
1096 struct_p = true;
1098 break;
1100 case GOMP_MAP_ATTACH:
1101 size = sizeof (void *);
1102 break;
1104 default:
1105 size = sizes[i];
1108 n = lookup_host (acc_dev, hostaddrs[i], size);
1110 if (n && struct_p)
1112 for (size_t j = i + 1; j <= group_last; j++)
1114 struct splay_tree_key_s cur_node;
1115 cur_node.host_start = (uintptr_t) hostaddrs[j];
1116 cur_node.host_end = cur_node.host_start + sizes[j];
1117 splay_tree_key n2
1118 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1119 if (!n2
1120 || n2->tgt != n->tgt
1121 || n2->host_start - n->host_start
1122 != n2->tgt_offset - n->tgt_offset)
1124 gomp_mutex_unlock (&acc_dev->lock);
1125 gomp_fatal ("Trying to map into device [%p..%p) structure "
1126 "element when other mapped elements from the "
1127 "same structure weren't mapped together with "
1128 "it", (void *) cur_node.host_start,
1129 (void *) cur_node.host_end);
1132 /* This is a special case because we must increment the refcount by
1133 the number of mapped struct elements, rather than by one. */
1134 if (n->refcount != REFCOUNT_INFINITY)
1135 n->refcount += groupnum - 1;
1136 n->dynamic_refcount += groupnum - 1;
1138 else if (n && groupnum == 1)
1140 void *h = hostaddrs[i];
1141 size_t s = sizes[i];
1143 if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
1145 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
1146 (uintptr_t) h, s, NULL);
1147 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1148 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1150 else
1151 goacc_map_var_existing (acc_dev, h, s, n);
1153 else if (n && groupnum > 1)
1155 assert (n->refcount != REFCOUNT_INFINITY
1156 && n->refcount != REFCOUNT_LINK);
1158 for (size_t j = i + 1; j <= group_last; j++)
1159 if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
1161 splay_tree_key m
1162 = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
1163 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
1164 (uintptr_t) hostaddrs[j], sizes[j], NULL);
1167 bool processed = false;
1169 struct target_mem_desc *tgt = n->tgt;
1170 for (size_t j = 0; j < tgt->list_count; j++)
1171 if (tgt->list[j].key == n)
1173 /* We are processing a group of mappings (e.g.
1174 [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1175 Find the right group in the target_mem_desc's variable
1176 list, and increment the refcounts for each item in that
1177 group. */
1178 for (size_t k = 0; k < groupnum; k++)
1179 if (j + k < tgt->list_count
1180 && tgt->list[j + k].key
1181 && !tgt->list[j + k].is_attach)
1183 tgt->list[j + k].key->refcount++;
1184 tgt->list[j + k].key->dynamic_refcount++;
1186 processed = true;
1187 break;
1190 if (!processed)
1192 gomp_mutex_unlock (&acc_dev->lock);
1193 gomp_fatal ("dynamic refcount incrementing failed for "
1194 "pointer/pset");
1197 else if (hostaddrs[i])
1199 /* The data is not mapped already. Map it now, unless the first
1200 member in the group has a NULL pointer (e.g. a non-present
1201 optional parameter). */
1202 gomp_mutex_unlock (&acc_dev->lock);
1204 struct target_mem_desc *tgt
1205 = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
1206 &sizes[i], &kinds[i], true,
1207 (GOMP_MAP_VARS_OPENACC
1208 | GOMP_MAP_VARS_ENTER_DATA));
1209 assert (tgt);
1211 gomp_mutex_lock (&acc_dev->lock);
1213 for (size_t j = 0; j < tgt->list_count; j++)
1215 n = tgt->list[j].key;
1216 if (n && !tgt->list[j].is_attach)
1217 n->dynamic_refcount++;
1221 i = group_last;
1224 gomp_mutex_unlock (&acc_dev->lock);
1227 /* Unmap variables for OpenACC "exit data". */
1229 static void
1230 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1231 void **hostaddrs, size_t *sizes,
1232 unsigned short *kinds, goacc_aq aq)
1234 gomp_mutex_lock (&acc_dev->lock);
1236 /* Handle "detach" before copyback/deletion of mapped data. */
1237 for (size_t i = 0; i < mapnum; ++i)
1239 unsigned char kind = kinds[i] & 0xff;
1240 bool finalize = false;
1241 switch (kind)
1243 case GOMP_MAP_FORCE_DETACH:
1244 finalize = true;
1245 /* Fallthrough. */
1247 case GOMP_MAP_DETACH:
1249 struct splay_tree_key_s cur_node;
1250 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1251 cur_node.host_start = hostaddr;
1252 cur_node.host_end = cur_node.host_start + sizeof (void *);
1253 splay_tree_key n
1254 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1256 if (n == NULL)
1258 gomp_mutex_unlock (&acc_dev->lock);
1259 gomp_fatal ("struct not mapped for detach operation");
1262 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1264 break;
1265 default:
1270 for (size_t i = 0; i < mapnum; ++i)
1272 unsigned char kind = kinds[i] & 0xff;
1274 switch (kind)
1276 case GOMP_MAP_FROM:
1277 case GOMP_MAP_FORCE_FROM:
1278 case GOMP_MAP_TO_PSET:
1279 case GOMP_MAP_POINTER:
1280 case GOMP_MAP_DELETE:
1281 case GOMP_MAP_RELEASE:
1283 struct splay_tree_key_s cur_node;
1284 size_t size;
1285 if (kind == GOMP_MAP_POINTER)
1286 size = sizeof (void *);
1287 else
1288 size = sizes[i];
1289 cur_node.host_start = (uintptr_t) hostaddrs[i];
1290 cur_node.host_end = cur_node.host_start + size;
1291 splay_tree_key n
1292 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1294 if (n == NULL)
1295 continue;
1297 goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
1299 break;
1301 case GOMP_MAP_STRUCT:
1302 /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1303 for all its entries. This special handling exists for GCC 10.1
1304 compatibility; afterwards, we're not generating these no-op
1305 'GOMP_MAP_STRUCT's anymore. */
1306 break;
1308 case GOMP_MAP_DETACH:
1309 case GOMP_MAP_FORCE_DETACH:
1310 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1311 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1312 break;
1314 default:
1315 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1316 kind);
1320 gomp_mutex_unlock (&acc_dev->lock);
1323 void
1324 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1325 size_t *sizes, unsigned short *kinds, int async,
1326 int num_waits, ...)
1328 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1330 struct goacc_thread *thr;
1331 struct gomp_device_descr *acc_dev;
1332 bool data_enter = false;
1333 size_t i;
1335 goacc_lazy_initialize ();
1337 thr = goacc_thread ();
1338 acc_dev = thr->dev;
1340 /* Determine if this is an "acc enter data". */
1341 for (i = 0; i < mapnum; ++i)
1343 unsigned char kind = kinds[i] & 0xff;
1345 if (kind == GOMP_MAP_POINTER
1346 || kind == GOMP_MAP_TO_PSET
1347 || kind == GOMP_MAP_STRUCT)
1348 continue;
1350 if (kind == GOMP_MAP_FORCE_ALLOC
1351 || kind == GOMP_MAP_FORCE_PRESENT
1352 || kind == GOMP_MAP_ATTACH
1353 || kind == GOMP_MAP_FORCE_TO
1354 || kind == GOMP_MAP_TO
1355 || kind == GOMP_MAP_ALLOC)
1357 data_enter = true;
1358 break;
1361 if (kind == GOMP_MAP_RELEASE
1362 || kind == GOMP_MAP_DELETE
1363 || kind == GOMP_MAP_DETACH
1364 || kind == GOMP_MAP_FORCE_DETACH
1365 || kind == GOMP_MAP_FROM
1366 || kind == GOMP_MAP_FORCE_FROM)
1367 break;
1369 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1370 kind);
1373 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1375 acc_prof_info prof_info;
1376 if (profiling_p)
1378 thr->prof_info = &prof_info;
1380 prof_info.event_type
1381 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1382 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1383 prof_info.version = _ACC_PROF_INFO_VERSION;
1384 prof_info.device_type = acc_device_type (acc_dev->type);
1385 prof_info.device_number = acc_dev->target_id;
1386 prof_info.thread_id = -1;
1387 prof_info.async = async;
1388 prof_info.async_queue = prof_info.async;
1389 prof_info.src_file = NULL;
1390 prof_info.func_name = NULL;
1391 prof_info.line_no = -1;
1392 prof_info.end_line_no = -1;
1393 prof_info.func_line_no = -1;
1394 prof_info.func_end_line_no = -1;
1396 acc_event_info enter_exit_data_event_info;
1397 if (profiling_p)
1399 enter_exit_data_event_info.other_event.event_type
1400 = prof_info.event_type;
1401 enter_exit_data_event_info.other_event.valid_bytes
1402 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1403 enter_exit_data_event_info.other_event.parent_construct
1404 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1405 enter_exit_data_event_info.other_event.implicit = 0;
1406 enter_exit_data_event_info.other_event.tool_info = NULL;
1408 acc_api_info api_info;
1409 if (profiling_p)
1411 thr->api_info = &api_info;
1413 api_info.device_api = acc_device_api_none;
1414 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1415 api_info.device_type = prof_info.device_type;
1416 api_info.vendor = -1;
1417 api_info.device_handle = NULL;
1418 api_info.context_handle = NULL;
1419 api_info.async_handle = NULL;
1422 if (profiling_p)
1423 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1424 &api_info);
1426 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1427 || (flags & GOACC_FLAG_HOST_FALLBACK))
1429 prof_info.device_type = acc_device_host;
1430 api_info.device_type = prof_info.device_type;
1432 goto out_prof;
1435 if (num_waits)
1437 va_list ap;
1439 va_start (ap, num_waits);
1440 goacc_wait (async, num_waits, &ap);
1441 va_end (ap);
1444 goacc_aq aq = get_goacc_asyncqueue (async);
1446 if (data_enter)
1447 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1448 else
1449 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1451 out_prof:
1452 if (profiling_p)
1454 prof_info.event_type
1455 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1456 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1457 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1458 &api_info);
1460 thr->prof_info = NULL;
1461 thr->api_info = NULL;