1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2018 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
42 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
44 struct splay_tree_key_s node
;
47 node
.host_start
= (uintptr_t) h
;
48 node
.host_end
= (uintptr_t) h
+ s
;
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
72 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
79 for (i
= 0; i
< t
->list_count
; i
++)
83 splay_tree_key k
= &t
->array
[i
].key
;
84 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
86 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
102 goacc_lazy_initialize ();
104 struct goacc_thread
*thr
= goacc_thread ();
108 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
111 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115 the device address is mapped. We choose to check if it mapped,
116 and if it is, to unmap it. */
125 struct goacc_thread
*thr
= goacc_thread ();
127 assert (thr
&& thr
->dev
);
129 struct gomp_device_descr
*acc_dev
= thr
->dev
;
131 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
134 gomp_mutex_lock (&acc_dev
->lock
);
136 /* We don't have to call lazy open here, as the ptr value must have
137 been returned by acc_malloc. It's not permitted to pass NULL in
138 (unless you got that null from acc_malloc). */
139 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
143 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
145 gomp_mutex_unlock (&acc_dev
->lock
);
147 acc_unmap_data ((void *)(k
->host_start
+ offset
));
150 gomp_mutex_unlock (&acc_dev
->lock
);
152 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
153 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__
);
157 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
159 /* No need to call lazy open here, as the device pointer must have
160 been obtained from a routine that did that. */
161 struct goacc_thread
*thr
= goacc_thread ();
163 assert (thr
&& thr
->dev
);
165 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
171 if (!thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
))
172 gomp_fatal ("error in %s", __FUNCTION__
);
176 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
178 /* No need to call lazy open here, as the device pointer must have
179 been obtained from a routine that did that. */
180 struct goacc_thread
*thr
= goacc_thread ();
182 assert (thr
&& thr
->dev
);
184 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
190 if (!thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
))
191 gomp_fatal ("error in %s", __FUNCTION__
);
194 /* Return the device pointer that corresponds to host data H. Or NULL
198 acc_deviceptr (void *h
)
204 goacc_lazy_initialize ();
206 struct goacc_thread
*thr
= goacc_thread ();
207 struct gomp_device_descr
*dev
= thr
->dev
;
209 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
212 gomp_mutex_lock (&dev
->lock
);
214 n
= lookup_host (dev
, h
, 1);
218 gomp_mutex_unlock (&dev
->lock
);
222 offset
= h
- n
->host_start
;
224 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
226 gomp_mutex_unlock (&dev
->lock
);
231 /* Return the host pointer that corresponds to device data D. Or NULL
235 acc_hostptr (void *d
)
241 goacc_lazy_initialize ();
243 struct goacc_thread
*thr
= goacc_thread ();
244 struct gomp_device_descr
*acc_dev
= thr
->dev
;
246 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
249 gomp_mutex_lock (&acc_dev
->lock
);
251 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
255 gomp_mutex_unlock (&acc_dev
->lock
);
259 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
261 h
= n
->host_start
+ offset
;
263 gomp_mutex_unlock (&acc_dev
->lock
);
268 /* Return 1 if host data [H,+S] is present on the device. */
271 acc_is_present (void *h
, size_t s
)
278 goacc_lazy_initialize ();
280 struct goacc_thread
*thr
= goacc_thread ();
281 struct gomp_device_descr
*acc_dev
= thr
->dev
;
283 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
286 gomp_mutex_lock (&acc_dev
->lock
);
288 n
= lookup_host (acc_dev
, h
, s
);
290 if (n
&& ((uintptr_t)h
< n
->host_start
291 || (uintptr_t)h
+ s
> n
->host_end
292 || s
> n
->host_end
- n
->host_start
))
295 gomp_mutex_unlock (&acc_dev
->lock
);
300 /* Create a mapping for host [H,+S] -> device [D,+S] */
303 acc_map_data (void *h
, void *d
, size_t s
)
305 struct target_mem_desc
*tgt
= NULL
;
310 unsigned short kinds
= GOMP_MAP_ALLOC
;
312 goacc_lazy_initialize ();
314 struct goacc_thread
*thr
= goacc_thread ();
315 struct gomp_device_descr
*acc_dev
= thr
->dev
;
317 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
320 gomp_fatal ("cannot map data on shared-memory system");
324 struct goacc_thread
*thr
= goacc_thread ();
327 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
328 (void *)h
, (int)s
, (void *)d
, (int)s
);
330 gomp_mutex_lock (&acc_dev
->lock
);
332 if (lookup_host (acc_dev
, h
, s
))
334 gomp_mutex_unlock (&acc_dev
->lock
);
335 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
339 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
341 gomp_mutex_unlock (&acc_dev
->lock
);
342 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
346 gomp_mutex_unlock (&acc_dev
->lock
);
348 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
349 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
352 gomp_mutex_lock (&acc_dev
->lock
);
353 tgt
->prev
= acc_dev
->openacc
.data_environ
;
354 acc_dev
->openacc
.data_environ
= tgt
;
355 gomp_mutex_unlock (&acc_dev
->lock
);
359 acc_unmap_data (void *h
)
361 struct goacc_thread
*thr
= goacc_thread ();
362 struct gomp_device_descr
*acc_dev
= thr
->dev
;
364 /* No need to call lazy open, as the address must have been mapped. */
366 /* This is a no-op on shared-memory targets. */
367 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
372 gomp_mutex_lock (&acc_dev
->lock
);
374 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
375 struct target_mem_desc
*t
;
379 gomp_mutex_unlock (&acc_dev
->lock
);
380 gomp_fatal ("%p is not a mapped block", (void *)h
);
383 host_size
= n
->host_end
- n
->host_start
;
385 if (n
->host_start
!= (uintptr_t) h
)
387 gomp_mutex_unlock (&acc_dev
->lock
);
388 gomp_fatal ("[%p,%d] surrounds %p",
389 (void *) n
->host_start
, (int) host_size
, (void *) h
);
394 if (t
->refcount
== 2)
396 struct target_mem_desc
*tp
;
398 /* This is the last reference, so pull the descriptor off the
399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400 freeing the device memory. */
404 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
411 acc_dev
->openacc
.data_environ
= t
->prev
;
417 gomp_mutex_unlock (&acc_dev
->lock
);
419 gomp_unmap_vars (t
, true);
422 #define FLAG_PRESENT (1 << 0)
423 #define FLAG_CREATE (1 << 1)
424 #define FLAG_COPY (1 << 2)
427 present_create_copy (unsigned f
, void *h
, size_t s
)
433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
435 goacc_lazy_initialize ();
437 struct goacc_thread
*thr
= goacc_thread ();
438 struct gomp_device_descr
*acc_dev
= thr
->dev
;
440 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
443 gomp_mutex_lock (&acc_dev
->lock
);
445 n
= lookup_host (acc_dev
, h
, s
);
449 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
451 if (!(f
& FLAG_PRESENT
))
453 gomp_mutex_unlock (&acc_dev
->lock
);
454 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
455 (void *)h
, (int)s
, (void *)d
, (int)s
);
457 if ((h
+ s
) > (void *)n
->host_end
)
459 gomp_mutex_unlock (&acc_dev
->lock
);
460 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
463 gomp_mutex_unlock (&acc_dev
->lock
);
465 else if (!(f
& FLAG_CREATE
))
467 gomp_mutex_unlock (&acc_dev
->lock
);
468 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
472 struct target_mem_desc
*tgt
;
474 unsigned short kinds
;
480 kinds
= GOMP_MAP_ALLOC
;
482 gomp_mutex_unlock (&acc_dev
->lock
);
484 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
485 GOMP_MAP_VARS_OPENACC
);
487 gomp_mutex_lock (&acc_dev
->lock
);
490 tgt
->prev
= acc_dev
->openacc
.data_environ
;
491 acc_dev
->openacc
.data_environ
= tgt
;
493 gomp_mutex_unlock (&acc_dev
->lock
);
500 acc_create (void *h
, size_t s
)
502 return present_create_copy (FLAG_CREATE
, h
, s
);
506 acc_copyin (void *h
, size_t s
)
508 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
512 acc_present_or_create (void *h
, size_t s
)
514 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
517 /* acc_pcreate is acc_present_or_create by a different name. */
518 #ifdef HAVE_ATTRIBUTE_ALIAS
519 strong_alias (acc_present_or_create
, acc_pcreate
)
522 acc_pcreate (void *h
, size_t s
)
524 return acc_present_or_create (h
, s
);
529 acc_present_or_copyin (void *h
, size_t s
)
531 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
534 /* acc_pcopyin is acc_present_or_copyin by a different name. */
535 #ifdef HAVE_ATTRIBUTE_ALIAS
536 strong_alias (acc_present_or_copyin
, acc_pcopyin
)
539 acc_pcopyin (void *h
, size_t s
)
541 return acc_present_or_copyin (h
, s
);
545 #define FLAG_COPYOUT (1 << 0)
548 delete_copyout (unsigned f
, void *h
, size_t s
, const char *libfnname
)
553 struct goacc_thread
*thr
= goacc_thread ();
554 struct gomp_device_descr
*acc_dev
= thr
->dev
;
556 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
559 gomp_mutex_lock (&acc_dev
->lock
);
561 n
= lookup_host (acc_dev
, h
, s
);
563 /* No need to call lazy open, as the data must already have been
568 gomp_mutex_unlock (&acc_dev
->lock
);
569 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
572 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
573 + (uintptr_t) h
- n
->host_start
);
575 host_size
= n
->host_end
- n
->host_start
;
577 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
579 gomp_mutex_unlock (&acc_dev
->lock
);
580 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
581 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
584 gomp_mutex_unlock (&acc_dev
->lock
);
586 if (f
& FLAG_COPYOUT
)
587 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
591 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
592 gomp_fatal ("error in freeing device memory in %s", libfnname
);
596 acc_delete (void *h
, size_t s
)
598 delete_copyout (0, h
, s
, __FUNCTION__
);
602 acc_copyout (void *h
, size_t s
)
604 delete_copyout (FLAG_COPYOUT
, h
, s
, __FUNCTION__
);
608 update_dev_host (int is_dev
, void *h
, size_t s
)
613 goacc_lazy_initialize ();
615 struct goacc_thread
*thr
= goacc_thread ();
616 struct gomp_device_descr
*acc_dev
= thr
->dev
;
618 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
621 gomp_mutex_lock (&acc_dev
->lock
);
623 n
= lookup_host (acc_dev
, h
, s
);
627 gomp_mutex_unlock (&acc_dev
->lock
);
628 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
631 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
632 + (uintptr_t) h
- n
->host_start
);
635 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
637 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
639 gomp_mutex_unlock (&acc_dev
->lock
);
643 acc_update_device (void *h
, size_t s
)
645 update_dev_host (1, h
, s
);
649 acc_update_self (void *h
, size_t s
)
651 update_dev_host (0, h
, s
);
655 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
658 struct target_mem_desc
*tgt
;
659 struct goacc_thread
*thr
= goacc_thread ();
660 struct gomp_device_descr
*acc_dev
= thr
->dev
;
662 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
663 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
664 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
665 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
667 gomp_mutex_lock (&acc_dev
->lock
);
668 tgt
->prev
= acc_dev
->openacc
.data_environ
;
669 acc_dev
->openacc
.data_environ
= tgt
;
670 gomp_mutex_unlock (&acc_dev
->lock
);
674 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
676 struct goacc_thread
*thr
= goacc_thread ();
677 struct gomp_device_descr
*acc_dev
= thr
->dev
;
679 struct target_mem_desc
*t
;
680 int minrefs
= (mapnum
== 1) ? 2 : 3;
682 gomp_mutex_lock (&acc_dev
->lock
);
684 n
= lookup_host (acc_dev
, h
, 1);
688 gomp_mutex_unlock (&acc_dev
->lock
);
689 gomp_fatal ("%p is not a mapped block", (void *)h
);
692 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
696 struct target_mem_desc
*tp
;
698 if (t
->refcount
== minrefs
)
700 /* This is the last reference, so pull the descriptor off the
701 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
702 freeing the device memory. */
706 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
714 acc_dev
->openacc
.data_environ
= t
->prev
;
721 t
->list
[0].copy_from
= 1;
723 gomp_mutex_unlock (&acc_dev
->lock
);
725 /* If running synchronously, unmap immediately. */
726 if (async_synchronous_p (async
))
727 gomp_unmap_vars (t
, true);
729 t
->device_descr
->openacc
.register_async_cleanup_func (t
, async
);
731 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);