1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
37 /* Return block containing [H->S), or NULL if not contained. The device lock
38 for DEV must be locked on entry, and remains locked on exit. */
41 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
43 struct splay_tree_key_s node
;
46 node
.host_start
= (uintptr_t) h
;
47 node
.host_end
= (uintptr_t) h
+ s
;
49 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
54 /* Return block containing [D->S), or NULL if not contained.
55 The list isn't ordered by device address, so we have to iterate
56 over the whole array. This is not expected to be a common
57 operation. The device lock associated with TGT must be locked on entry, and
58 remains locked on exit. */
61 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
64 struct target_mem_desc
*t
;
69 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
71 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
78 for (i
= 0; i
< t
->list_count
; i
++)
82 splay_tree_key k
= &t
->array
[i
].key
;
83 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
85 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
92 /* OpenACC is silent on how memory exhaustion is indicated. We return
101 goacc_lazy_initialize ();
103 struct goacc_thread
*thr
= goacc_thread ();
107 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
110 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
111 the device address is mapped. We choose to check if it mapped,
112 and if it is, to unmap it. */
121 struct goacc_thread
*thr
= goacc_thread ();
123 assert (thr
&& thr
->dev
);
125 struct gomp_device_descr
*acc_dev
= thr
->dev
;
127 gomp_mutex_lock (&acc_dev
->lock
);
129 /* We don't have to call lazy open here, as the ptr value must have
130 been returned by acc_malloc. It's not permitted to pass NULL in
131 (unless you got that null from acc_malloc). */
132 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
136 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
138 gomp_mutex_unlock (&acc_dev
->lock
);
140 acc_unmap_data ((void *)(k
->host_start
+ offset
));
143 gomp_mutex_unlock (&acc_dev
->lock
);
145 acc_dev
->free_func (acc_dev
->target_id
, d
);
149 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
151 /* No need to call lazy open here, as the device pointer must have
152 been obtained from a routine that did that. */
153 struct goacc_thread
*thr
= goacc_thread ();
155 assert (thr
&& thr
->dev
);
157 thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
161 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
163 /* No need to call lazy open here, as the device pointer must have
164 been obtained from a routine that did that. */
165 struct goacc_thread
*thr
= goacc_thread ();
167 assert (thr
&& thr
->dev
);
169 thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
172 /* Return the device pointer that corresponds to host data H. Or NULL
176 acc_deviceptr (void *h
)
182 goacc_lazy_initialize ();
184 struct goacc_thread
*thr
= goacc_thread ();
185 struct gomp_device_descr
*dev
= thr
->dev
;
187 gomp_mutex_lock (&dev
->lock
);
189 n
= lookup_host (dev
, h
, 1);
193 gomp_mutex_unlock (&dev
->lock
);
197 offset
= h
- n
->host_start
;
199 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
201 gomp_mutex_unlock (&dev
->lock
);
206 /* Return the host pointer that corresponds to device data D. Or NULL
210 acc_hostptr (void *d
)
216 goacc_lazy_initialize ();
218 struct goacc_thread
*thr
= goacc_thread ();
219 struct gomp_device_descr
*acc_dev
= thr
->dev
;
221 gomp_mutex_lock (&acc_dev
->lock
);
223 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
227 gomp_mutex_unlock (&acc_dev
->lock
);
231 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
233 h
= n
->host_start
+ offset
;
235 gomp_mutex_unlock (&acc_dev
->lock
);
240 /* Return 1 if host data [H,+S] is present on the device. */
243 acc_is_present (void *h
, size_t s
)
250 goacc_lazy_initialize ();
252 struct goacc_thread
*thr
= goacc_thread ();
253 struct gomp_device_descr
*acc_dev
= thr
->dev
;
255 gomp_mutex_lock (&acc_dev
->lock
);
257 n
= lookup_host (acc_dev
, h
, s
);
259 if (n
&& ((uintptr_t)h
< n
->host_start
260 || (uintptr_t)h
+ s
> n
->host_end
261 || s
> n
->host_end
- n
->host_start
))
264 gomp_mutex_unlock (&acc_dev
->lock
);
269 /* Create a mapping for host [H,+S] -> device [D,+S] */
272 acc_map_data (void *h
, void *d
, size_t s
)
274 struct target_mem_desc
*tgt
;
279 unsigned short kinds
= GOMP_MAP_ALLOC
;
281 goacc_lazy_initialize ();
283 struct goacc_thread
*thr
= goacc_thread ();
284 struct gomp_device_descr
*acc_dev
= thr
->dev
;
286 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
289 gomp_fatal ("cannot map data on shared-memory system");
291 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
292 GOMP_MAP_VARS_OPENACC
);
296 struct goacc_thread
*thr
= goacc_thread ();
299 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
300 (void *)h
, (int)s
, (void *)d
, (int)s
);
302 gomp_mutex_lock (&acc_dev
->lock
);
304 if (lookup_host (acc_dev
, h
, s
))
306 gomp_mutex_unlock (&acc_dev
->lock
);
307 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
311 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
313 gomp_mutex_unlock (&acc_dev
->lock
);
314 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
318 gomp_mutex_unlock (&acc_dev
->lock
);
320 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
321 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
324 gomp_mutex_lock (&acc_dev
->lock
);
325 tgt
->prev
= acc_dev
->openacc
.data_environ
;
326 acc_dev
->openacc
.data_environ
= tgt
;
327 gomp_mutex_unlock (&acc_dev
->lock
);
331 acc_unmap_data (void *h
)
333 struct goacc_thread
*thr
= goacc_thread ();
334 struct gomp_device_descr
*acc_dev
= thr
->dev
;
336 /* No need to call lazy open, as the address must have been mapped. */
340 gomp_mutex_lock (&acc_dev
->lock
);
342 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
343 struct target_mem_desc
*t
;
347 gomp_mutex_unlock (&acc_dev
->lock
);
348 gomp_fatal ("%p is not a mapped block", (void *)h
);
351 host_size
= n
->host_end
- n
->host_start
;
353 if (n
->host_start
!= (uintptr_t) h
)
355 gomp_mutex_unlock (&acc_dev
->lock
);
356 gomp_fatal ("[%p,%d] surrounds %p",
357 (void *) n
->host_start
, (int) host_size
, (void *) h
);
362 if (t
->refcount
== 2)
364 struct target_mem_desc
*tp
;
366 /* This is the last reference, so pull the descriptor off the
367 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
368 freeing the device memory. */
372 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
379 acc_dev
->openacc
.data_environ
= t
->prev
;
385 gomp_mutex_unlock (&acc_dev
->lock
);
387 gomp_unmap_vars (t
, true);
390 #define FLAG_PRESENT (1 << 0)
391 #define FLAG_CREATE (1 << 1)
392 #define FLAG_COPY (1 << 2)
395 present_create_copy (unsigned f
, void *h
, size_t s
)
401 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
403 goacc_lazy_initialize ();
405 struct goacc_thread
*thr
= goacc_thread ();
406 struct gomp_device_descr
*acc_dev
= thr
->dev
;
408 gomp_mutex_lock (&acc_dev
->lock
);
410 n
= lookup_host (acc_dev
, h
, s
);
414 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
416 if (!(f
& FLAG_PRESENT
))
418 gomp_mutex_unlock (&acc_dev
->lock
);
419 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
420 (void *)h
, (int)s
, (void *)d
, (int)s
);
422 if ((h
+ s
) > (void *)n
->host_end
)
424 gomp_mutex_unlock (&acc_dev
->lock
);
425 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
428 gomp_mutex_unlock (&acc_dev
->lock
);
430 else if (!(f
& FLAG_CREATE
))
432 gomp_mutex_unlock (&acc_dev
->lock
);
433 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
437 struct target_mem_desc
*tgt
;
439 unsigned short kinds
;
445 kinds
= GOMP_MAP_ALLOC
;
447 gomp_mutex_unlock (&acc_dev
->lock
);
449 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
450 GOMP_MAP_VARS_OPENACC
);
452 gomp_mutex_lock (&acc_dev
->lock
);
455 tgt
->prev
= acc_dev
->openacc
.data_environ
;
456 acc_dev
->openacc
.data_environ
= tgt
;
458 gomp_mutex_unlock (&acc_dev
->lock
);
465 acc_create (void *h
, size_t s
)
467 return present_create_copy (FLAG_CREATE
, h
, s
);
471 acc_copyin (void *h
, size_t s
)
473 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
477 acc_present_or_create (void *h
, size_t s
)
479 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
483 acc_present_or_copyin (void *h
, size_t s
)
485 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
488 #define FLAG_COPYOUT (1 << 0)
491 delete_copyout (unsigned f
, void *h
, size_t s
)
496 struct goacc_thread
*thr
= goacc_thread ();
497 struct gomp_device_descr
*acc_dev
= thr
->dev
;
499 gomp_mutex_lock (&acc_dev
->lock
);
501 n
= lookup_host (acc_dev
, h
, s
);
503 /* No need to call lazy open, as the data must already have been
508 gomp_mutex_unlock (&acc_dev
->lock
);
509 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
512 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
513 + (uintptr_t) h
- n
->host_start
);
515 host_size
= n
->host_end
- n
->host_start
;
517 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
519 gomp_mutex_unlock (&acc_dev
->lock
);
520 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
521 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
524 gomp_mutex_unlock (&acc_dev
->lock
);
526 if (f
& FLAG_COPYOUT
)
527 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
531 acc_dev
->free_func (acc_dev
->target_id
, d
);
535 acc_delete (void *h
, size_t s
)
537 delete_copyout (0, h
, s
);
540 void acc_copyout (void *h
, size_t s
)
542 delete_copyout (FLAG_COPYOUT
, h
, s
);
546 update_dev_host (int is_dev
, void *h
, size_t s
)
551 goacc_lazy_initialize ();
553 struct goacc_thread
*thr
= goacc_thread ();
554 struct gomp_device_descr
*acc_dev
= thr
->dev
;
556 gomp_mutex_lock (&acc_dev
->lock
);
558 n
= lookup_host (acc_dev
, h
, s
);
562 gomp_mutex_unlock (&acc_dev
->lock
);
563 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
566 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
567 + (uintptr_t) h
- n
->host_start
);
569 gomp_mutex_unlock (&acc_dev
->lock
);
572 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
574 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
578 acc_update_device (void *h
, size_t s
)
580 update_dev_host (1, h
, s
);
584 acc_update_self (void *h
, size_t s
)
586 update_dev_host (0, h
, s
);
590 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
593 struct target_mem_desc
*tgt
;
594 struct goacc_thread
*thr
= goacc_thread ();
595 struct gomp_device_descr
*acc_dev
= thr
->dev
;
597 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
598 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
599 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
600 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
602 gomp_mutex_lock (&acc_dev
->lock
);
603 tgt
->prev
= acc_dev
->openacc
.data_environ
;
604 acc_dev
->openacc
.data_environ
= tgt
;
605 gomp_mutex_unlock (&acc_dev
->lock
);
609 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
611 struct goacc_thread
*thr
= goacc_thread ();
612 struct gomp_device_descr
*acc_dev
= thr
->dev
;
614 struct target_mem_desc
*t
;
615 int minrefs
= (mapnum
== 1) ? 2 : 3;
617 gomp_mutex_lock (&acc_dev
->lock
);
619 n
= lookup_host (acc_dev
, h
, 1);
623 gomp_mutex_unlock (&acc_dev
->lock
);
624 gomp_fatal ("%p is not a mapped block", (void *)h
);
627 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
631 struct target_mem_desc
*tp
;
633 if (t
->refcount
== minrefs
)
635 /* This is the last reference, so pull the descriptor off the
636 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
637 freeing the device memory. */
641 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
649 acc_dev
->openacc
.data_environ
= t
->prev
;
656 t
->list
[0].copy_from
= 1;
658 gomp_mutex_unlock (&acc_dev
->lock
);
660 /* If running synchronously, unmap immediately. */
661 if (async
< acc_async_noval
)
662 gomp_unmap_vars (t
, true);
665 gomp_copy_from_async (t
);
666 acc_dev
->openacc
.register_async_cleanup_func (t
);
669 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);