Update 'Q' constraint documentation.
[official-gcc.git] / libgomp / oacc-mem.c
blob2d4bba78efdc6c4357bd8fddcab0a3b5fc5e9070
1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 struct splay_tree_key_s node;
43 splay_tree_key key;
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
48 key = splay_tree_lookup (&dev->mem_map, &node);
50 return key;
53 /* Helper for lookup_dev. Iterate over splay tree. */
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
68 return key;
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
82 if (!mem_map || !mem_map->root)
83 return NULL;
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
92 void *
93 acc_malloc (size_t s)
95 if (!s)
96 return NULL;
98 goacc_lazy_initialize ();
100 struct goacc_thread *thr = goacc_thread ();
102 assert (thr->dev);
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
113 if (profiling_p)
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
119 return res;
122 void
123 acc_free (void *d)
125 splay_tree_key k;
127 if (!d)
128 return;
130 struct goacc_thread *thr = goacc_thread ();
132 assert (thr && thr->dev);
134 struct gomp_device_descr *acc_dev = thr->dev;
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
143 gomp_mutex_lock (&acc_dev->lock);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
166 if (profiling_p)
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
181 assert (thr && thr->dev);
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
207 if (profiling_p)
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
241 void *
242 acc_deviceptr (void *h)
244 splay_tree_key n;
245 void *d;
246 void *offset;
248 goacc_lazy_initialize ();
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
259 gomp_mutex_lock (&dev->lock);
261 n = lookup_host (dev, h, 1);
263 if (!n)
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
269 offset = h - n->host_start;
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
273 gomp_mutex_unlock (&dev->lock);
275 return d;
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
281 void *
282 acc_hostptr (void *d)
284 splay_tree_key n;
285 void *h;
286 void *offset;
288 goacc_lazy_initialize ();
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
299 gomp_mutex_lock (&acc_dev->lock);
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
303 if (!n)
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
311 h = n->host_start + offset;
313 gomp_mutex_unlock (&acc_dev->lock);
315 return h;
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h, size_t s)
323 splay_tree_key n;
325 if (!s || !h)
326 return 0;
328 goacc_lazy_initialize ();
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
339 gomp_mutex_lock (&acc_dev->lock);
341 n = lookup_host (acc_dev, h, s);
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
348 gomp_mutex_unlock (&acc_dev->lock);
350 return n != NULL;
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
355 void
356 acc_map_data (void *h, void *d, size_t s)
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
365 goacc_lazy_initialize ();
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
375 else
377 struct goacc_thread *thr = goacc_thread ();
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
387 gomp_mutex_lock (&acc_dev->lock);
389 if (lookup_host (acc_dev, h, s))
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
396 if (lookup_dev (&thr->dev->mem_map, d, s))
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
403 gomp_mutex_unlock (&acc_dev->lock);
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 splay_tree_key n = tgt->list[0].key;
409 assert (n->refcount == 1);
410 assert (n->virtual_refcount == 0);
411 /* Special reference counting behavior. */
412 n->refcount = REFCOUNT_INFINITY;
414 if (profiling_p)
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
422 void
423 acc_unmap_data (void *h)
425 struct goacc_thread *thr = goacc_thread ();
426 struct gomp_device_descr *acc_dev = thr->dev;
428 /* No need to call lazy open, as the address must have been mapped. */
430 /* This is a no-op on shared-memory targets. */
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
432 return;
434 acc_prof_info prof_info;
435 acc_api_info api_info;
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
438 gomp_mutex_lock (&acc_dev->lock);
440 splay_tree_key n = lookup_host (acc_dev, h, 1);
442 if (!n)
444 gomp_mutex_unlock (&acc_dev->lock);
445 gomp_fatal ("%p is not a mapped block", (void *)h);
448 size_t host_size = n->host_end - n->host_start;
450 if (n->host_start != (uintptr_t) h)
452 gomp_mutex_unlock (&acc_dev->lock);
453 gomp_fatal ("[%p,%d] surrounds %p",
454 (void *) n->host_start, (int) host_size, (void *) h);
456 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
457 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
458 the different 'REFCOUNT_INFINITY' cases, or simply separate
459 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
460 etc.)? */
461 else if (n->refcount != REFCOUNT_INFINITY)
463 gomp_mutex_unlock (&acc_dev->lock);
464 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
465 " by 'acc_map_data'",
466 (void *) h, (int) host_size);
469 splay_tree_remove (&acc_dev->mem_map, n);
471 struct target_mem_desc *tgt = n->tgt;
473 if (tgt->refcount == REFCOUNT_INFINITY)
475 gomp_mutex_unlock (&acc_dev->lock);
476 gomp_fatal ("cannot unmap target block");
478 else if (tgt->refcount > 1)
479 tgt->refcount--;
480 else
482 free (tgt->array);
483 free (tgt);
486 gomp_mutex_unlock (&acc_dev->lock);
488 if (profiling_p)
490 thr->prof_info = NULL;
491 thr->api_info = NULL;
496 /* Enter dynamic mapping for a single datum. Return the device pointer. */
498 static void *
499 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
501 void *d;
502 splay_tree_key n;
504 if (!hostaddrs[0] || !sizes[0])
505 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
507 goacc_lazy_initialize ();
509 struct goacc_thread *thr = goacc_thread ();
510 struct gomp_device_descr *acc_dev = thr->dev;
512 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
513 return hostaddrs[0];
515 acc_prof_info prof_info;
516 acc_api_info api_info;
517 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
518 if (profiling_p)
520 prof_info.async = async;
521 prof_info.async_queue = prof_info.async;
524 gomp_mutex_lock (&acc_dev->lock);
526 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
527 if (n)
529 void *h = hostaddrs[0];
530 size_t s = sizes[0];
532 /* Present. */
533 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
535 if ((h + s) > (void *)n->host_end)
537 gomp_mutex_unlock (&acc_dev->lock);
538 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
541 assert (n->refcount != REFCOUNT_LINK);
542 if (n->refcount != REFCOUNT_INFINITY)
544 n->refcount++;
545 n->virtual_refcount++;
548 gomp_mutex_unlock (&acc_dev->lock);
550 else
552 const size_t mapnum = 1;
554 gomp_mutex_unlock (&acc_dev->lock);
556 goacc_aq aq = get_goacc_asyncqueue (async);
558 gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
559 true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
561 gomp_mutex_lock (&acc_dev->lock);
562 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
563 assert (n != NULL);
564 assert (n->tgt_offset == 0);
565 assert ((uintptr_t) hostaddrs[0] == n->host_start);
566 d = (void *) n->tgt->tgt_start;
567 gomp_mutex_unlock (&acc_dev->lock);
570 if (profiling_p)
572 thr->prof_info = NULL;
573 thr->api_info = NULL;
576 return d;
579 void *
580 acc_create (void *h, size_t s)
582 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
583 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
586 void
587 acc_create_async (void *h, size_t s, int async)
589 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
590 goacc_enter_datum (&h, &s, &kinds, async);
593 /* acc_present_or_create used to be what acc_create is now. */
594 /* acc_pcreate is acc_present_or_create by a different name. */
595 #ifdef HAVE_ATTRIBUTE_ALIAS
596 strong_alias (acc_create, acc_present_or_create)
597 strong_alias (acc_create, acc_pcreate)
598 #else
599 void *
600 acc_present_or_create (void *h, size_t s)
602 return acc_create (h, s);
605 void *
606 acc_pcreate (void *h, size_t s)
608 return acc_create (h, s);
610 #endif
612 void *
613 acc_copyin (void *h, size_t s)
615 unsigned short kinds[1] = { GOMP_MAP_TO };
616 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
619 void
620 acc_copyin_async (void *h, size_t s, int async)
622 unsigned short kinds[1] = { GOMP_MAP_TO };
623 goacc_enter_datum (&h, &s, &kinds, async);
626 /* acc_present_or_copyin used to be what acc_copyin is now. */
627 /* acc_pcopyin is acc_present_or_copyin by a different name. */
628 #ifdef HAVE_ATTRIBUTE_ALIAS
629 strong_alias (acc_copyin, acc_present_or_copyin)
630 strong_alias (acc_copyin, acc_pcopyin)
631 #else
632 void *
633 acc_present_or_copyin (void *h, size_t s)
635 return acc_copyin (h, s);
638 void *
639 acc_pcopyin (void *h, size_t s)
641 return acc_copyin (h, s);
643 #endif
646 /* Exit a dynamic mapping for a single variable. */
648 static void
649 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
651 /* No need to call lazy open, as the data must already have been
652 mapped. */
654 kind &= 0xff;
656 struct goacc_thread *thr = goacc_thread ();
657 struct gomp_device_descr *acc_dev = thr->dev;
659 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
660 return;
662 acc_prof_info prof_info;
663 acc_api_info api_info;
664 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
665 if (profiling_p)
667 prof_info.async = async;
668 prof_info.async_queue = prof_info.async;
671 gomp_mutex_lock (&acc_dev->lock);
673 splay_tree_key n = lookup_host (acc_dev, h, s);
674 if (!n)
675 /* PR92726, RP92970, PR92984: no-op. */
676 goto out;
678 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
680 size_t host_size = n->host_end - n->host_start;
681 gomp_mutex_unlock (&acc_dev->lock);
682 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
683 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
686 bool finalize = (kind == GOMP_MAP_DELETE
687 || kind == GOMP_MAP_FORCE_FROM);
688 if (finalize)
690 if (n->refcount != REFCOUNT_INFINITY)
691 n->refcount -= n->virtual_refcount;
692 n->virtual_refcount = 0;
695 if (n->virtual_refcount > 0)
697 if (n->refcount != REFCOUNT_INFINITY)
698 n->refcount--;
699 n->virtual_refcount--;
701 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
702 n->refcount--;
704 if (n->refcount == 0)
706 goacc_aq aq = get_goacc_asyncqueue (async);
708 bool copyout = (kind == GOMP_MAP_FROM
709 || kind == GOMP_MAP_FORCE_FROM);
710 if (copyout)
712 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
713 + (uintptr_t) h - n->host_start);
714 gomp_copy_dev2host (acc_dev, aq, h, d, s);
717 if (aq)
718 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
719 'gomp_unref_tgt' comment in
720 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
721 PR92881. */
722 gomp_remove_var_async (acc_dev, n, aq);
723 else
725 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
726 assert (is_tgt_unmapped);
730 out:
731 gomp_mutex_unlock (&acc_dev->lock);
733 if (profiling_p)
735 thr->prof_info = NULL;
736 thr->api_info = NULL;
740 void
741 acc_delete (void *h , size_t s)
743 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
746 void
747 acc_delete_async (void *h , size_t s, int async)
749 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
752 void
753 acc_delete_finalize (void *h , size_t s)
755 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
758 void
759 acc_delete_finalize_async (void *h , size_t s, int async)
761 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
764 void
765 acc_copyout (void *h, size_t s)
767 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
770 void
771 acc_copyout_async (void *h, size_t s, int async)
773 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
776 void
777 acc_copyout_finalize (void *h, size_t s)
779 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
782 void
783 acc_copyout_finalize_async (void *h, size_t s, int async)
785 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
788 static void
789 update_dev_host (int is_dev, void *h, size_t s, int async)
791 splay_tree_key n;
792 void *d;
794 goacc_lazy_initialize ();
796 struct goacc_thread *thr = goacc_thread ();
797 struct gomp_device_descr *acc_dev = thr->dev;
799 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
800 return;
802 /* Fortran optional arguments that are non-present result in a
803 NULL host address here. This can safely be ignored as it is
804 not possible to 'update' a non-present optional argument. */
805 if (h == NULL)
806 return;
808 acc_prof_info prof_info;
809 acc_api_info api_info;
810 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
811 if (profiling_p)
813 prof_info.async = async;
814 prof_info.async_queue = prof_info.async;
817 gomp_mutex_lock (&acc_dev->lock);
819 n = lookup_host (acc_dev, h, s);
821 if (!n)
823 gomp_mutex_unlock (&acc_dev->lock);
824 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
827 d = (void *) (n->tgt->tgt_start + n->tgt_offset
828 + (uintptr_t) h - n->host_start);
830 goacc_aq aq = get_goacc_asyncqueue (async);
832 if (is_dev)
833 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
834 else
835 gomp_copy_dev2host (acc_dev, aq, h, d, s);
837 gomp_mutex_unlock (&acc_dev->lock);
839 if (profiling_p)
841 thr->prof_info = NULL;
842 thr->api_info = NULL;
846 void
847 acc_update_device (void *h, size_t s)
849 update_dev_host (1, h, s, acc_async_sync);
852 void
853 acc_update_device_async (void *h, size_t s, int async)
855 update_dev_host (1, h, s, async);
858 void
859 acc_update_self (void *h, size_t s)
861 update_dev_host (0, h, s, acc_async_sync);
864 void
865 acc_update_self_async (void *h, size_t s, int async)
867 update_dev_host (0, h, s, async);
870 void
871 acc_attach_async (void **hostaddr, int async)
873 struct goacc_thread *thr = goacc_thread ();
874 struct gomp_device_descr *acc_dev = thr->dev;
875 goacc_aq aq = get_goacc_asyncqueue (async);
877 struct splay_tree_key_s cur_node;
878 splay_tree_key n;
880 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
881 return;
883 gomp_mutex_lock (&acc_dev->lock);
885 cur_node.host_start = (uintptr_t) hostaddr;
886 cur_node.host_end = cur_node.host_start + sizeof (void *);
887 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
889 if (n == NULL)
890 gomp_fatal ("struct not mapped for acc_attach");
892 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
893 0, NULL);
895 gomp_mutex_unlock (&acc_dev->lock);
898 void
899 acc_attach (void **hostaddr)
901 acc_attach_async (hostaddr, acc_async_sync);
904 static void
905 goacc_detach_internal (void **hostaddr, int async, bool finalize)
907 struct goacc_thread *thr = goacc_thread ();
908 struct gomp_device_descr *acc_dev = thr->dev;
909 struct splay_tree_key_s cur_node;
910 splay_tree_key n;
911 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
913 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
914 return;
916 gomp_mutex_lock (&acc_dev->lock);
918 cur_node.host_start = (uintptr_t) hostaddr;
919 cur_node.host_end = cur_node.host_start + sizeof (void *);
920 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
922 if (n == NULL)
923 gomp_fatal ("struct not mapped for acc_detach");
925 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
927 gomp_mutex_unlock (&acc_dev->lock);
930 void
931 acc_detach (void **hostaddr)
933 goacc_detach_internal (hostaddr, acc_async_sync, false);
936 void
937 acc_detach_async (void **hostaddr, int async)
939 goacc_detach_internal (hostaddr, async, false);
942 void
943 acc_detach_finalize (void **hostaddr)
945 goacc_detach_internal (hostaddr, acc_async_sync, true);
948 void
949 acc_detach_finalize_async (void **hostaddr, int async)
951 goacc_detach_internal (hostaddr, async, true);
954 /* Some types of (pointer) variables use several consecutive mappings, which
955 must be treated as a group for enter/exit data directives. This function
956 returns the last mapping in such a group (inclusive), or POS for singleton
957 mappings. */
959 static int
960 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
962 unsigned char kind0 = kinds[pos] & 0xff;
963 int first_pos = pos;
965 switch (kind0)
967 case GOMP_MAP_TO_PSET:
968 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
969 pos++;
970 /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
971 assert (pos > first_pos);
972 break;
974 case GOMP_MAP_STRUCT:
975 pos += sizes[pos];
976 break;
978 case GOMP_MAP_POINTER:
979 case GOMP_MAP_ALWAYS_POINTER:
980 /* These mappings are only expected after some other mapping. If we
981 see one by itself, something has gone wrong. */
982 gomp_fatal ("unexpected mapping");
983 break;
985 default:
986 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
987 mapping. */
988 if (pos + 1 < mapnum)
990 unsigned char kind1 = kinds[pos + 1] & 0xff;
991 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
992 return pos + 1;
995 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
996 (etc.) mapping. */
997 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
998 pos++;
1001 return pos;
1004 /* Map variables for OpenACC "enter data". We can't just call
1005 gomp_map_vars_async once, because individual mapped variables might have
1006 "exit data" called for them at different times. */
1008 static void
1009 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1010 void **hostaddrs, size_t *sizes,
1011 unsigned short *kinds, goacc_aq aq)
1013 for (size_t i = 0; i < mapnum; i++)
1015 int group_last = find_group_last (i, mapnum, sizes, kinds);
1017 gomp_map_vars_async (acc_dev, aq,
1018 (group_last - i) + 1,
1019 &hostaddrs[i], NULL,
1020 &sizes[i], &kinds[i], true,
1021 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
1023 i = group_last;
1027 /* Unmap variables for OpenACC "exit data". */
1029 static void
1030 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1031 void **hostaddrs, size_t *sizes,
1032 unsigned short *kinds, goacc_aq aq)
1034 gomp_mutex_lock (&acc_dev->lock);
1036 /* Handle "detach" before copyback/deletion of mapped data. */
1037 for (size_t i = 0; i < mapnum; ++i)
1039 unsigned char kind = kinds[i] & 0xff;
1040 bool finalize = false;
1041 switch (kind)
1043 case GOMP_MAP_FORCE_DETACH:
1044 finalize = true;
1045 /* Fallthrough. */
1047 case GOMP_MAP_DETACH:
1049 struct splay_tree_key_s cur_node;
1050 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1051 cur_node.host_start = hostaddr;
1052 cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 splay_tree_key n
1054 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1056 if (n == NULL)
1057 gomp_fatal ("struct not mapped for detach operation");
1059 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1061 break;
1062 default:
1067 for (size_t i = 0; i < mapnum; ++i)
1069 unsigned char kind = kinds[i] & 0xff;
1070 bool copyfrom = false;
1071 bool finalize = false;
1073 if (kind == GOMP_MAP_FORCE_FROM
1074 || kind == GOMP_MAP_DELETE
1075 || kind == GOMP_MAP_FORCE_DETACH)
1076 finalize = true;
1078 switch (kind)
1080 case GOMP_MAP_FROM:
1081 case GOMP_MAP_FORCE_FROM:
1082 case GOMP_MAP_ALWAYS_FROM:
1083 copyfrom = true;
1084 /* Fallthrough. */
1086 case GOMP_MAP_TO_PSET:
1087 case GOMP_MAP_POINTER:
1088 case GOMP_MAP_DELETE:
1089 case GOMP_MAP_RELEASE:
1090 case GOMP_MAP_DETACH:
1091 case GOMP_MAP_FORCE_DETACH:
1093 struct splay_tree_key_s cur_node;
1094 size_t size;
1095 if (kind == GOMP_MAP_POINTER
1096 || kind == GOMP_MAP_DETACH
1097 || kind == GOMP_MAP_FORCE_DETACH)
1098 size = sizeof (void *);
1099 else
1100 size = sizes[i];
1101 cur_node.host_start = (uintptr_t) hostaddrs[i];
1102 cur_node.host_end = cur_node.host_start + size;
1103 splay_tree_key n
1104 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1106 if (n == NULL)
1107 continue;
1109 if (finalize)
1111 if (n->refcount != REFCOUNT_INFINITY)
1112 n->refcount -= n->virtual_refcount;
1113 n->virtual_refcount = 0;
1116 if (n->virtual_refcount > 0)
1118 if (n->refcount != REFCOUNT_INFINITY)
1119 n->refcount--;
1120 n->virtual_refcount--;
1122 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
1123 n->refcount--;
1125 if (copyfrom
1126 && (kind != GOMP_MAP_FROM || n->refcount == 0))
1127 gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
1128 (void *) (n->tgt->tgt_start + n->tgt_offset
1129 + cur_node.host_start
1130 - n->host_start),
1131 cur_node.host_end - cur_node.host_start);
1133 if (n->refcount == 0)
1134 gomp_remove_var_async (acc_dev, n, aq);
1136 break;
1138 case GOMP_MAP_STRUCT:
1140 int elems = sizes[i];
1141 for (int j = 1; j <= elems; j++)
1143 struct splay_tree_key_s k;
1144 k.host_start = (uintptr_t) hostaddrs[i + j];
1145 k.host_end = k.host_start + sizes[i + j];
1146 splay_tree_key str;
1147 str = splay_tree_lookup (&acc_dev->mem_map, &k);
1148 if (str)
1150 if (finalize)
1152 if (str->refcount != REFCOUNT_INFINITY)
1153 str->refcount -= str->virtual_refcount;
1154 str->virtual_refcount = 0;
1156 if (str->virtual_refcount > 0)
1158 if (str->refcount != REFCOUNT_INFINITY)
1159 str->refcount--;
1160 str->virtual_refcount--;
1162 else if (str->refcount > 0
1163 && str->refcount != REFCOUNT_INFINITY)
1164 str->refcount--;
1165 if (str->refcount == 0)
1166 gomp_remove_var_async (acc_dev, str, aq);
1169 i += elems;
1171 break;
1173 default:
1174 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1175 kind);
1179 gomp_mutex_unlock (&acc_dev->lock);
1182 void
1183 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1184 size_t *sizes, unsigned short *kinds, int async,
1185 int num_waits, ...)
1187 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1189 struct goacc_thread *thr;
1190 struct gomp_device_descr *acc_dev;
1191 bool data_enter = false;
1192 size_t i;
1194 goacc_lazy_initialize ();
1196 thr = goacc_thread ();
1197 acc_dev = thr->dev;
1199 /* Determine if this is an "acc enter data". */
1200 for (i = 0; i < mapnum; ++i)
1202 unsigned char kind = kinds[i] & 0xff;
1204 if (kind == GOMP_MAP_POINTER
1205 || kind == GOMP_MAP_TO_PSET
1206 || kind == GOMP_MAP_STRUCT)
1207 continue;
1209 if (kind == GOMP_MAP_FORCE_ALLOC
1210 || kind == GOMP_MAP_FORCE_PRESENT
1211 || kind == GOMP_MAP_ATTACH
1212 || kind == GOMP_MAP_FORCE_TO
1213 || kind == GOMP_MAP_TO
1214 || kind == GOMP_MAP_ALLOC)
1216 data_enter = true;
1217 break;
1220 if (kind == GOMP_MAP_RELEASE
1221 || kind == GOMP_MAP_DELETE
1222 || kind == GOMP_MAP_DETACH
1223 || kind == GOMP_MAP_FORCE_DETACH
1224 || kind == GOMP_MAP_FROM
1225 || kind == GOMP_MAP_FORCE_FROM)
1226 break;
1228 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1229 kind);
1232 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1234 acc_prof_info prof_info;
1235 if (profiling_p)
1237 thr->prof_info = &prof_info;
1239 prof_info.event_type
1240 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1241 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1242 prof_info.version = _ACC_PROF_INFO_VERSION;
1243 prof_info.device_type = acc_device_type (acc_dev->type);
1244 prof_info.device_number = acc_dev->target_id;
1245 prof_info.thread_id = -1;
1246 prof_info.async = async;
1247 prof_info.async_queue = prof_info.async;
1248 prof_info.src_file = NULL;
1249 prof_info.func_name = NULL;
1250 prof_info.line_no = -1;
1251 prof_info.end_line_no = -1;
1252 prof_info.func_line_no = -1;
1253 prof_info.func_end_line_no = -1;
1255 acc_event_info enter_exit_data_event_info;
1256 if (profiling_p)
1258 enter_exit_data_event_info.other_event.event_type
1259 = prof_info.event_type;
1260 enter_exit_data_event_info.other_event.valid_bytes
1261 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1262 enter_exit_data_event_info.other_event.parent_construct
1263 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1264 enter_exit_data_event_info.other_event.implicit = 0;
1265 enter_exit_data_event_info.other_event.tool_info = NULL;
1267 acc_api_info api_info;
1268 if (profiling_p)
1270 thr->api_info = &api_info;
1272 api_info.device_api = acc_device_api_none;
1273 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1274 api_info.device_type = prof_info.device_type;
1275 api_info.vendor = -1;
1276 api_info.device_handle = NULL;
1277 api_info.context_handle = NULL;
1278 api_info.async_handle = NULL;
1281 if (profiling_p)
1282 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1283 &api_info);
1285 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1286 || (flags & GOACC_FLAG_HOST_FALLBACK))
1288 prof_info.device_type = acc_device_host;
1289 api_info.device_type = prof_info.device_type;
1291 goto out_prof;
1294 if (num_waits)
1296 va_list ap;
1298 va_start (ap, num_waits);
1299 goacc_wait (async, num_waits, &ap);
1300 va_end (ap);
1303 goacc_aq aq = get_goacc_asyncqueue (async);
1305 if (data_enter)
1306 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1307 else
1308 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1310 out_prof:
1311 if (profiling_p)
1313 prof_info.event_type
1314 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1315 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1316 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1317 &api_info);
1319 thr->prof_info = NULL;
1320 thr->api_info = NULL;