value-range: Fix handling of POLY_INT_CST anti-ranges [PR96146]
[official-gcc.git] / libgomp / oacc-mem.c
blob855cad84391192ba9fe65d019e87a58d9ce0665c
1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 struct splay_tree_key_s node;
43 splay_tree_key key;
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
48 key = splay_tree_lookup (&dev->mem_map, &node);
50 return key;
53 /* Helper for lookup_dev. Iterate over splay tree. */
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
68 return key;
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
82 if (!mem_map || !mem_map->root)
83 return NULL;
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
92 void *
93 acc_malloc (size_t s)
95 if (!s)
96 return NULL;
98 goacc_lazy_initialize ();
100 struct goacc_thread *thr = goacc_thread ();
102 assert (thr->dev);
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
113 if (profiling_p)
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
119 return res;
122 void
123 acc_free (void *d)
125 splay_tree_key k;
127 if (!d)
128 return;
130 struct goacc_thread *thr = goacc_thread ();
132 assert (thr && thr->dev);
134 struct gomp_device_descr *acc_dev = thr->dev;
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
143 gomp_mutex_lock (&acc_dev->lock);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
166 if (profiling_p)
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
181 assert (thr && thr->dev);
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
207 if (profiling_p)
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
241 void *
242 acc_deviceptr (void *h)
244 splay_tree_key n;
245 void *d;
246 void *offset;
248 goacc_lazy_initialize ();
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
259 gomp_mutex_lock (&dev->lock);
261 n = lookup_host (dev, h, 1);
263 if (!n)
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
269 offset = h - n->host_start;
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
273 gomp_mutex_unlock (&dev->lock);
275 return d;
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
281 void *
282 acc_hostptr (void *d)
284 splay_tree_key n;
285 void *h;
286 void *offset;
288 goacc_lazy_initialize ();
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
299 gomp_mutex_lock (&acc_dev->lock);
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
303 if (!n)
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
311 h = n->host_start + offset;
313 gomp_mutex_unlock (&acc_dev->lock);
315 return h;
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h, size_t s)
323 splay_tree_key n;
325 if (!s || !h)
326 return 0;
328 goacc_lazy_initialize ();
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
339 gomp_mutex_lock (&acc_dev->lock);
341 n = lookup_host (acc_dev, h, s);
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
348 gomp_mutex_unlock (&acc_dev->lock);
350 return n != NULL;
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
355 void
356 acc_map_data (void *h, void *d, size_t s)
358 size_t mapnum = 1;
359 void *hostaddrs = h;
360 void *devaddrs = d;
361 size_t sizes = s;
362 unsigned short kinds = GOMP_MAP_ALLOC;
364 goacc_lazy_initialize ();
366 struct goacc_thread *thr = goacc_thread ();
367 struct gomp_device_descr *acc_dev = thr->dev;
369 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 if (d != h)
372 gomp_fatal ("cannot map data on shared-memory system");
374 else
376 struct goacc_thread *thr = goacc_thread ();
378 if (!d || !h || !s)
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h, (int)s, (void *)d, (int)s);
382 acc_prof_info prof_info;
383 acc_api_info api_info;
384 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386 gomp_mutex_lock (&acc_dev->lock);
388 if (lookup_host (acc_dev, h, s))
390 gomp_mutex_unlock (&acc_dev->lock);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 (int)s);
395 if (lookup_dev (&thr->dev->mem_map, d, s))
397 gomp_mutex_unlock (&acc_dev->lock);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 (int)s);
402 gomp_mutex_unlock (&acc_dev->lock);
404 struct target_mem_desc *tgt
405 = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 assert (tgt->list_count == 1);
409 splay_tree_key n = tgt->list[0].key;
410 assert (n);
411 assert (n->refcount == 1);
412 assert (n->dynamic_refcount == 0);
413 /* Special reference counting behavior. */
414 n->refcount = REFCOUNT_INFINITY;
416 if (profiling_p)
418 thr->prof_info = NULL;
419 thr->api_info = NULL;
424 void
425 acc_unmap_data (void *h)
427 struct goacc_thread *thr = goacc_thread ();
428 struct gomp_device_descr *acc_dev = thr->dev;
430 /* No need to call lazy open, as the address must have been mapped. */
432 /* This is a no-op on shared-memory targets. */
433 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
434 return;
436 acc_prof_info prof_info;
437 acc_api_info api_info;
438 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
440 gomp_mutex_lock (&acc_dev->lock);
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
444 if (!n)
446 gomp_mutex_unlock (&acc_dev->lock);
447 gomp_fatal ("%p is not a mapped block", (void *)h);
450 size_t host_size = n->host_end - n->host_start;
452 if (n->host_start != (uintptr_t) h)
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n->host_start, (int) host_size, (void *) h);
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462 etc.)? */
463 else if (n->refcount != REFCOUNT_INFINITY)
465 gomp_mutex_unlock (&acc_dev->lock);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h, (int) host_size);
471 struct target_mem_desc *tgt = n->tgt;
473 if (tgt->refcount == REFCOUNT_INFINITY)
475 gomp_mutex_unlock (&acc_dev->lock);
476 gomp_fatal ("cannot unmap target block");
479 /* Above, we've verified that the mapping must have been set up by
480 'acc_map_data'. */
481 assert (tgt->refcount == 1);
483 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484 from freeing the target memory. */
485 tgt->tgt_end = 0;
486 tgt->to_free = NULL;
488 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
489 assert (is_tgt_unmapped);
491 gomp_mutex_unlock (&acc_dev->lock);
493 if (profiling_p)
495 thr->prof_info = NULL;
496 thr->api_info = NULL;
501 /* Helper function to map a single dynamic data item, represented by a single
502 mapping. The acc_dev->lock should be held on entry, and remains locked on
503 exit. */
505 static void *
506 goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
507 size_t size, splay_tree_key n)
509 assert (n);
511 /* Present. */
512 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
513 - n->host_start);
515 if (hostaddr + size > (void *) n->host_end)
517 gomp_mutex_unlock (&acc_dev->lock);
518 gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
521 assert (n->refcount != REFCOUNT_LINK);
522 if (n->refcount != REFCOUNT_INFINITY)
523 n->refcount++;
524 n->dynamic_refcount++;
526 return d;
529 /* Enter dynamic mapping for a single datum. Return the device pointer. */
531 static void *
532 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
534 void *d;
535 splay_tree_key n;
537 if (!hostaddrs[0] || !sizes[0])
538 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
540 goacc_lazy_initialize ();
542 struct goacc_thread *thr = goacc_thread ();
543 struct gomp_device_descr *acc_dev = thr->dev;
545 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
546 return hostaddrs[0];
548 acc_prof_info prof_info;
549 acc_api_info api_info;
550 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
551 if (profiling_p)
553 prof_info.async = async;
554 prof_info.async_queue = prof_info.async;
557 gomp_mutex_lock (&acc_dev->lock);
559 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
560 if (n)
562 d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
563 gomp_mutex_unlock (&acc_dev->lock);
565 else
567 const size_t mapnum = 1;
569 gomp_mutex_unlock (&acc_dev->lock);
571 goacc_aq aq = get_goacc_asyncqueue (async);
573 struct target_mem_desc *tgt
574 = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
575 kinds, true, GOMP_MAP_VARS_ENTER_DATA);
576 assert (tgt);
577 assert (tgt->list_count == 1);
578 n = tgt->list[0].key;
579 assert (n);
580 assert (n->refcount == 1);
581 assert (n->dynamic_refcount == 0);
582 n->dynamic_refcount++;
584 d = (void *) tgt->tgt_start;
587 if (profiling_p)
589 thr->prof_info = NULL;
590 thr->api_info = NULL;
593 return d;
596 void *
597 acc_create (void *h, size_t s)
599 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
600 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
603 void
604 acc_create_async (void *h, size_t s, int async)
606 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
607 goacc_enter_datum (&h, &s, &kinds, async);
610 /* acc_present_or_create used to be what acc_create is now. */
611 /* acc_pcreate is acc_present_or_create by a different name. */
612 #ifdef HAVE_ATTRIBUTE_ALIAS
613 strong_alias (acc_create, acc_present_or_create)
614 strong_alias (acc_create, acc_pcreate)
615 #else
616 void *
617 acc_present_or_create (void *h, size_t s)
619 return acc_create (h, s);
622 void *
623 acc_pcreate (void *h, size_t s)
625 return acc_create (h, s);
627 #endif
629 void *
630 acc_copyin (void *h, size_t s)
632 unsigned short kinds[1] = { GOMP_MAP_TO };
633 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
636 void
637 acc_copyin_async (void *h, size_t s, int async)
639 unsigned short kinds[1] = { GOMP_MAP_TO };
640 goacc_enter_datum (&h, &s, &kinds, async);
643 /* acc_present_or_copyin used to be what acc_copyin is now. */
644 /* acc_pcopyin is acc_present_or_copyin by a different name. */
645 #ifdef HAVE_ATTRIBUTE_ALIAS
646 strong_alias (acc_copyin, acc_present_or_copyin)
647 strong_alias (acc_copyin, acc_pcopyin)
648 #else
649 void *
650 acc_present_or_copyin (void *h, size_t s)
652 return acc_copyin (h, s);
655 void *
656 acc_pcopyin (void *h, size_t s)
658 return acc_copyin (h, s);
660 #endif
663 /* Helper function to unmap a single data item. Device lock should be held on
664 entry, and remains locked on exit. */
666 static void
667 goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
668 unsigned short kind, splay_tree_key n, goacc_aq aq)
670 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
672 size_t host_size = n->host_end - n->host_start;
673 gomp_mutex_unlock (&acc_dev->lock);
674 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
675 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
678 bool finalize = (kind == GOMP_MAP_FORCE_FROM
679 || kind == GOMP_MAP_DELETE
680 || kind == GOMP_MAP_FORCE_DETACH);
682 assert (n->refcount != REFCOUNT_LINK);
683 if (n->refcount != REFCOUNT_INFINITY
684 && n->refcount < n->dynamic_refcount)
686 gomp_mutex_unlock (&acc_dev->lock);
687 gomp_fatal ("Dynamic reference counting assert fail\n");
690 if (finalize)
692 if (n->refcount != REFCOUNT_INFINITY)
693 n->refcount -= n->dynamic_refcount;
694 n->dynamic_refcount = 0;
696 else if (n->dynamic_refcount)
698 if (n->refcount != REFCOUNT_INFINITY)
699 n->refcount--;
700 n->dynamic_refcount--;
703 if (n->refcount == 0)
705 bool copyout = (kind == GOMP_MAP_FROM
706 || kind == GOMP_MAP_FORCE_FROM);
707 if (copyout)
709 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
710 + (uintptr_t) h - n->host_start);
711 gomp_copy_dev2host (acc_dev, aq, h, d, s);
714 if (aq)
715 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
716 'gomp_unref_tgt' comment in
717 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
718 PR92881. */
719 gomp_remove_var_async (acc_dev, n, aq);
720 else
722 size_t num_mappings = 0;
723 /* If the target_mem_desc represents a single data mapping, we can
724 check that it is freed when this splay tree key's refcount reaches
725 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
726 multiple members), fall back to skipping the test. */
727 for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
728 if (n->tgt->list[l_i].key)
729 ++num_mappings;
730 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
731 assert (is_tgt_unmapped || num_mappings > 1);
737 /* Exit a dynamic mapping for a single variable. */
739 static void
740 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
742 /* No need to call lazy open, as the data must already have been
743 mapped. */
745 kind &= 0xff;
747 struct goacc_thread *thr = goacc_thread ();
748 struct gomp_device_descr *acc_dev = thr->dev;
750 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
751 return;
753 acc_prof_info prof_info;
754 acc_api_info api_info;
755 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
756 if (profiling_p)
758 prof_info.async = async;
759 prof_info.async_queue = prof_info.async;
762 gomp_mutex_lock (&acc_dev->lock);
764 splay_tree_key n = lookup_host (acc_dev, h, s);
765 /* Non-present data is a no-op: PR92726, RP92970, PR92984. */
766 if (n)
768 goacc_aq aq = get_goacc_asyncqueue (async);
769 goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
772 gomp_mutex_unlock (&acc_dev->lock);
774 if (profiling_p)
776 thr->prof_info = NULL;
777 thr->api_info = NULL;
781 void
782 acc_delete (void *h , size_t s)
784 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
787 void
788 acc_delete_async (void *h , size_t s, int async)
790 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
793 void
794 acc_delete_finalize (void *h , size_t s)
796 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
799 void
800 acc_delete_finalize_async (void *h , size_t s, int async)
802 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
805 void
806 acc_copyout (void *h, size_t s)
808 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
811 void
812 acc_copyout_async (void *h, size_t s, int async)
814 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
817 void
818 acc_copyout_finalize (void *h, size_t s)
820 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
823 void
824 acc_copyout_finalize_async (void *h, size_t s, int async)
826 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
829 static void
830 update_dev_host (int is_dev, void *h, size_t s, int async)
832 splay_tree_key n;
833 void *d;
835 goacc_lazy_initialize ();
837 struct goacc_thread *thr = goacc_thread ();
838 struct gomp_device_descr *acc_dev = thr->dev;
840 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
841 return;
843 /* Fortran optional arguments that are non-present result in a
844 NULL host address here. This can safely be ignored as it is
845 not possible to 'update' a non-present optional argument. */
846 if (h == NULL)
847 return;
849 acc_prof_info prof_info;
850 acc_api_info api_info;
851 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
852 if (profiling_p)
854 prof_info.async = async;
855 prof_info.async_queue = prof_info.async;
858 gomp_mutex_lock (&acc_dev->lock);
860 n = lookup_host (acc_dev, h, s);
862 if (!n)
864 gomp_mutex_unlock (&acc_dev->lock);
865 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
868 d = (void *) (n->tgt->tgt_start + n->tgt_offset
869 + (uintptr_t) h - n->host_start);
871 goacc_aq aq = get_goacc_asyncqueue (async);
873 if (is_dev)
874 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
875 else
876 gomp_copy_dev2host (acc_dev, aq, h, d, s);
878 gomp_mutex_unlock (&acc_dev->lock);
880 if (profiling_p)
882 thr->prof_info = NULL;
883 thr->api_info = NULL;
887 void
888 acc_update_device (void *h, size_t s)
890 update_dev_host (1, h, s, acc_async_sync);
893 void
894 acc_update_device_async (void *h, size_t s, int async)
896 update_dev_host (1, h, s, async);
899 void
900 acc_update_self (void *h, size_t s)
902 update_dev_host (0, h, s, acc_async_sync);
905 void
906 acc_update_self_async (void *h, size_t s, int async)
908 update_dev_host (0, h, s, async);
911 void
912 acc_attach_async (void **hostaddr, int async)
914 struct goacc_thread *thr = goacc_thread ();
915 struct gomp_device_descr *acc_dev = thr->dev;
916 goacc_aq aq = get_goacc_asyncqueue (async);
918 struct splay_tree_key_s cur_node;
919 splay_tree_key n;
921 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
922 return;
924 gomp_mutex_lock (&acc_dev->lock);
926 cur_node.host_start = (uintptr_t) hostaddr;
927 cur_node.host_end = cur_node.host_start + sizeof (void *);
928 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
930 if (n == NULL)
932 gomp_mutex_unlock (&acc_dev->lock);
933 gomp_fatal ("struct not mapped for acc_attach");
936 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
937 0, NULL);
939 gomp_mutex_unlock (&acc_dev->lock);
942 void
943 acc_attach (void **hostaddr)
945 acc_attach_async (hostaddr, acc_async_sync);
948 static void
949 goacc_detach_internal (void **hostaddr, int async, bool finalize)
951 struct goacc_thread *thr = goacc_thread ();
952 struct gomp_device_descr *acc_dev = thr->dev;
953 struct splay_tree_key_s cur_node;
954 splay_tree_key n;
955 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
957 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
958 return;
960 gomp_mutex_lock (&acc_dev->lock);
962 cur_node.host_start = (uintptr_t) hostaddr;
963 cur_node.host_end = cur_node.host_start + sizeof (void *);
964 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
966 if (n == NULL)
968 gomp_mutex_unlock (&acc_dev->lock);
969 gomp_fatal ("struct not mapped for acc_detach");
972 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
974 gomp_mutex_unlock (&acc_dev->lock);
977 void
978 acc_detach (void **hostaddr)
980 goacc_detach_internal (hostaddr, acc_async_sync, false);
983 void
984 acc_detach_async (void **hostaddr, int async)
986 goacc_detach_internal (hostaddr, async, false);
989 void
990 acc_detach_finalize (void **hostaddr)
992 goacc_detach_internal (hostaddr, acc_async_sync, true);
995 void
996 acc_detach_finalize_async (void **hostaddr, int async)
998 goacc_detach_internal (hostaddr, async, true);
1001 /* Some types of (pointer) variables use several consecutive mappings, which
1002 must be treated as a group for enter/exit data directives. This function
1003 returns the last mapping in such a group (inclusive), or POS for singleton
1004 mappings. */
1006 static int
1007 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
1009 unsigned char kind0 = kinds[pos] & 0xff;
1010 int first_pos = pos;
1012 switch (kind0)
1014 case GOMP_MAP_TO_PSET:
1015 if (pos + 1 < mapnum
1016 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1017 return pos + 1;
1019 while (pos + 1 < mapnum
1020 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1021 pos++;
1022 /* We expect at least one GOMP_MAP_POINTER (if not a single
1023 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
1024 assert (pos > first_pos);
1025 break;
1027 case GOMP_MAP_STRUCT:
1028 pos += sizes[pos];
1029 break;
1031 case GOMP_MAP_POINTER:
1032 case GOMP_MAP_ALWAYS_POINTER:
1033 /* These mappings are only expected after some other mapping. If we
1034 see one by itself, something has gone wrong. */
1035 gomp_fatal ("unexpected mapping");
1036 break;
1038 case GOMP_MAP_ATTACH:
1039 break;
1041 default:
1042 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1043 mapping. */
1044 if (pos + 1 < mapnum)
1046 unsigned char kind1 = kinds[pos + 1] & 0xff;
1047 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1048 return pos + 1;
1051 /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1052 mapping. */
1053 if (pos + 1 < mapnum
1054 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1055 return pos + 1;
1057 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1058 (etc.) mapping. */
1059 while (pos + 1 < mapnum
1060 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1061 pos++;
1064 return pos;
1067 /* Map variables for OpenACC "enter data". We can't just call
1068 gomp_map_vars_async once, because individual mapped variables might have
1069 "exit data" called for them at different times. */
1071 static void
1072 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1073 void **hostaddrs, size_t *sizes,
1074 unsigned short *kinds, goacc_aq aq)
1076 gomp_mutex_lock (&acc_dev->lock);
1078 for (size_t i = 0; i < mapnum; i++)
1080 splay_tree_key n;
1081 size_t group_last = find_group_last (i, mapnum, sizes, kinds);
1082 bool struct_p = false;
1083 size_t size, groupnum = (group_last - i) + 1;
1085 switch (kinds[i] & 0xff)
1087 case GOMP_MAP_STRUCT:
1089 size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
1090 - (uintptr_t) hostaddrs[i];
1091 struct_p = true;
1093 break;
1095 case GOMP_MAP_ATTACH:
1096 size = sizeof (void *);
1097 break;
1099 default:
1100 size = sizes[i];
1103 n = lookup_host (acc_dev, hostaddrs[i], size);
1105 if (n && struct_p)
1107 for (size_t j = i + 1; j <= group_last; j++)
1109 struct splay_tree_key_s cur_node;
1110 cur_node.host_start = (uintptr_t) hostaddrs[j];
1111 cur_node.host_end = cur_node.host_start + sizes[j];
1112 splay_tree_key n2
1113 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1114 if (!n2
1115 || n2->tgt != n->tgt
1116 || n2->host_start - n->host_start
1117 != n2->tgt_offset - n->tgt_offset)
1119 gomp_mutex_unlock (&acc_dev->lock);
1120 gomp_fatal ("Trying to map into device [%p..%p) structure "
1121 "element when other mapped elements from the "
1122 "same structure weren't mapped together with "
1123 "it", (void *) cur_node.host_start,
1124 (void *) cur_node.host_end);
1127 /* This is a special case because we must increment the refcount by
1128 the number of mapped struct elements, rather than by one. */
1129 if (n->refcount != REFCOUNT_INFINITY)
1130 n->refcount += groupnum - 1;
1131 n->dynamic_refcount += groupnum - 1;
1133 else if (n && groupnum == 1)
1135 void *h = hostaddrs[i];
1136 size_t s = sizes[i];
1138 /* A standalone attach clause. */
1139 if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
1140 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
1141 (uintptr_t) h, s, NULL);
1143 goacc_map_var_existing (acc_dev, h, s, n);
1145 else if (n && groupnum > 1)
1147 assert (n->refcount != REFCOUNT_INFINITY
1148 && n->refcount != REFCOUNT_LINK);
1150 for (size_t j = i + 1; j <= group_last; j++)
1151 if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
1153 splay_tree_key m
1154 = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
1155 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
1156 (uintptr_t) hostaddrs[j], sizes[j], NULL);
1159 bool processed = false;
1161 struct target_mem_desc *tgt = n->tgt;
1162 for (size_t j = 0; j < tgt->list_count; j++)
1163 if (tgt->list[j].key == n)
1165 /* We are processing a group of mappings (e.g.
1166 [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1167 Find the right group in the target_mem_desc's variable
1168 list, and increment the refcounts for each item in that
1169 group. */
1170 for (size_t k = 0; k < groupnum; k++)
1171 if (j + k < tgt->list_count && tgt->list[j + k].key)
1173 tgt->list[j + k].key->refcount++;
1174 tgt->list[j + k].key->dynamic_refcount++;
1176 processed = true;
1177 break;
1180 if (!processed)
1182 gomp_mutex_unlock (&acc_dev->lock);
1183 gomp_fatal ("dynamic refcount incrementing failed for "
1184 "pointer/pset");
1187 else if (hostaddrs[i])
1189 /* The data is not mapped already. Map it now, unless the first
1190 member in the group has a NULL pointer (e.g. a non-present
1191 optional parameter). */
1192 gomp_mutex_unlock (&acc_dev->lock);
1194 struct target_mem_desc *tgt
1195 = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
1196 &sizes[i], &kinds[i], true,
1197 GOMP_MAP_VARS_ENTER_DATA);
1198 assert (tgt);
1200 gomp_mutex_lock (&acc_dev->lock);
1202 for (size_t j = 0; j < tgt->list_count; j++)
1204 n = tgt->list[j].key;
1205 if (n)
1206 n->dynamic_refcount++;
1210 i = group_last;
1213 gomp_mutex_unlock (&acc_dev->lock);
1216 /* Unmap variables for OpenACC "exit data". */
1218 static void
1219 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1220 void **hostaddrs, size_t *sizes,
1221 unsigned short *kinds, goacc_aq aq)
1223 gomp_mutex_lock (&acc_dev->lock);
1225 /* Handle "detach" before copyback/deletion of mapped data. */
1226 for (size_t i = 0; i < mapnum; ++i)
1228 unsigned char kind = kinds[i] & 0xff;
1229 bool finalize = false;
1230 switch (kind)
1232 case GOMP_MAP_FORCE_DETACH:
1233 finalize = true;
1234 /* Fallthrough. */
1236 case GOMP_MAP_DETACH:
1238 struct splay_tree_key_s cur_node;
1239 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1240 cur_node.host_start = hostaddr;
1241 cur_node.host_end = cur_node.host_start + sizeof (void *);
1242 splay_tree_key n
1243 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1245 if (n == NULL)
1247 gomp_mutex_unlock (&acc_dev->lock);
1248 gomp_fatal ("struct not mapped for detach operation");
1251 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1253 break;
1254 default:
1259 for (size_t i = 0; i < mapnum; ++i)
1261 unsigned char kind = kinds[i] & 0xff;
1263 switch (kind)
1265 case GOMP_MAP_FROM:
1266 case GOMP_MAP_FORCE_FROM:
1267 case GOMP_MAP_TO_PSET:
1268 case GOMP_MAP_POINTER:
1269 case GOMP_MAP_DELETE:
1270 case GOMP_MAP_RELEASE:
1271 case GOMP_MAP_DETACH:
1272 case GOMP_MAP_FORCE_DETACH:
1274 struct splay_tree_key_s cur_node;
1275 size_t size;
1276 if (kind == GOMP_MAP_POINTER
1277 || kind == GOMP_MAP_DETACH
1278 || kind == GOMP_MAP_FORCE_DETACH)
1279 size = sizeof (void *);
1280 else
1281 size = sizes[i];
1282 cur_node.host_start = (uintptr_t) hostaddrs[i];
1283 cur_node.host_end = cur_node.host_start + size;
1284 splay_tree_key n
1285 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1287 if (n == NULL)
1288 continue;
1290 goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
1292 break;
1294 case GOMP_MAP_STRUCT:
1295 /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1296 for all its entries. This special handling exists for GCC 10.1
1297 compatibility; afterwards, we're not generating these no-op
1298 'GOMP_MAP_STRUCT's anymore. */
1299 break;
1301 default:
1302 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1303 kind);
1307 gomp_mutex_unlock (&acc_dev->lock);
1310 void
1311 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1312 size_t *sizes, unsigned short *kinds, int async,
1313 int num_waits, ...)
1315 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1317 struct goacc_thread *thr;
1318 struct gomp_device_descr *acc_dev;
1319 bool data_enter = false;
1320 size_t i;
1322 goacc_lazy_initialize ();
1324 thr = goacc_thread ();
1325 acc_dev = thr->dev;
1327 /* Determine if this is an "acc enter data". */
1328 for (i = 0; i < mapnum; ++i)
1330 unsigned char kind = kinds[i] & 0xff;
1332 if (kind == GOMP_MAP_POINTER
1333 || kind == GOMP_MAP_TO_PSET
1334 || kind == GOMP_MAP_STRUCT)
1335 continue;
1337 if (kind == GOMP_MAP_FORCE_ALLOC
1338 || kind == GOMP_MAP_FORCE_PRESENT
1339 || kind == GOMP_MAP_ATTACH
1340 || kind == GOMP_MAP_FORCE_TO
1341 || kind == GOMP_MAP_TO
1342 || kind == GOMP_MAP_ALLOC)
1344 data_enter = true;
1345 break;
1348 if (kind == GOMP_MAP_RELEASE
1349 || kind == GOMP_MAP_DELETE
1350 || kind == GOMP_MAP_DETACH
1351 || kind == GOMP_MAP_FORCE_DETACH
1352 || kind == GOMP_MAP_FROM
1353 || kind == GOMP_MAP_FORCE_FROM)
1354 break;
1356 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1357 kind);
1360 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1362 acc_prof_info prof_info;
1363 if (profiling_p)
1365 thr->prof_info = &prof_info;
1367 prof_info.event_type
1368 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1369 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1370 prof_info.version = _ACC_PROF_INFO_VERSION;
1371 prof_info.device_type = acc_device_type (acc_dev->type);
1372 prof_info.device_number = acc_dev->target_id;
1373 prof_info.thread_id = -1;
1374 prof_info.async = async;
1375 prof_info.async_queue = prof_info.async;
1376 prof_info.src_file = NULL;
1377 prof_info.func_name = NULL;
1378 prof_info.line_no = -1;
1379 prof_info.end_line_no = -1;
1380 prof_info.func_line_no = -1;
1381 prof_info.func_end_line_no = -1;
1383 acc_event_info enter_exit_data_event_info;
1384 if (profiling_p)
1386 enter_exit_data_event_info.other_event.event_type
1387 = prof_info.event_type;
1388 enter_exit_data_event_info.other_event.valid_bytes
1389 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1390 enter_exit_data_event_info.other_event.parent_construct
1391 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1392 enter_exit_data_event_info.other_event.implicit = 0;
1393 enter_exit_data_event_info.other_event.tool_info = NULL;
1395 acc_api_info api_info;
1396 if (profiling_p)
1398 thr->api_info = &api_info;
1400 api_info.device_api = acc_device_api_none;
1401 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1402 api_info.device_type = prof_info.device_type;
1403 api_info.vendor = -1;
1404 api_info.device_handle = NULL;
1405 api_info.context_handle = NULL;
1406 api_info.async_handle = NULL;
1409 if (profiling_p)
1410 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1411 &api_info);
1413 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1414 || (flags & GOACC_FLAG_HOST_FALLBACK))
1416 prof_info.device_type = acc_device_host;
1417 api_info.device_type = prof_info.device_type;
1419 goto out_prof;
1422 if (num_waits)
1424 va_list ap;
1426 va_start (ap, num_waits);
1427 goacc_wait (async, num_waits, &ap);
1428 va_end (ap);
1431 goacc_aq aq = get_goacc_asyncqueue (async);
1433 if (data_enter)
1434 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1435 else
1436 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1438 out_prof:
1439 if (profiling_p)
1441 prof_info.event_type
1442 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1443 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1444 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1445 &api_info);
1447 thr->prof_info = NULL;
1448 thr->api_info = NULL;