1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
34 #include "splay-tree.h"
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
42 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
44 struct splay_tree_key_s node
;
47 node
.host_start
= (uintptr_t) h
;
48 node
.host_end
= (uintptr_t) h
+ s
;
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
72 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
79 for (i
= 0; i
< t
->list_count
; i
++)
83 splay_tree_key k
= &t
->array
[i
].key
;
84 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
86 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
102 goacc_lazy_initialize ();
104 struct goacc_thread
*thr
= goacc_thread ();
108 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
111 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
112 the device address is mapped. We choose to check if it mapped,
113 and if it is, to unmap it. */
122 struct goacc_thread
*thr
= goacc_thread ();
124 assert (thr
&& thr
->dev
);
126 struct gomp_device_descr
*acc_dev
= thr
->dev
;
128 gomp_mutex_lock (&acc_dev
->lock
);
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
137 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
139 gomp_mutex_unlock (&acc_dev
->lock
);
141 acc_unmap_data ((void *)(k
->host_start
+ offset
));
144 gomp_mutex_unlock (&acc_dev
->lock
);
146 acc_dev
->free_func (acc_dev
->target_id
, d
);
150 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
152 /* No need to call lazy open here, as the device pointer must have
153 been obtained from a routine that did that. */
154 struct goacc_thread
*thr
= goacc_thread ();
156 assert (thr
&& thr
->dev
);
158 thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
162 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
164 /* No need to call lazy open here, as the device pointer must have
165 been obtained from a routine that did that. */
166 struct goacc_thread
*thr
= goacc_thread ();
168 assert (thr
&& thr
->dev
);
170 thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
173 /* Return the device pointer that corresponds to host data H. Or NULL
177 acc_deviceptr (void *h
)
183 goacc_lazy_initialize ();
185 struct goacc_thread
*thr
= goacc_thread ();
186 struct gomp_device_descr
*dev
= thr
->dev
;
188 gomp_mutex_lock (&dev
->lock
);
190 n
= lookup_host (dev
, h
, 1);
194 gomp_mutex_unlock (&dev
->lock
);
198 offset
= h
- n
->host_start
;
200 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
202 gomp_mutex_unlock (&dev
->lock
);
207 /* This function is used as a helper in generated code to implement pointer
208 lookup in host_data regions. Unlike acc_deviceptr, it returns its argument
209 unchanged on a shared-memory system (e.g. the host). */
212 GOACC_deviceptr (void *h
)
218 goacc_lazy_initialize ();
220 struct goacc_thread
*thr
= goacc_thread ();
222 if ((thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) == 0)
224 n
= lookup_host (thr
->dev
, h
, 1);
229 offset
= h
- n
->host_start
;
231 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
239 /* Return the host pointer that corresponds to device data D. Or NULL
243 acc_hostptr (void *d
)
249 goacc_lazy_initialize ();
251 struct goacc_thread
*thr
= goacc_thread ();
252 struct gomp_device_descr
*acc_dev
= thr
->dev
;
254 gomp_mutex_lock (&acc_dev
->lock
);
256 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
260 gomp_mutex_unlock (&acc_dev
->lock
);
264 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
266 h
= n
->host_start
+ offset
;
268 gomp_mutex_unlock (&acc_dev
->lock
);
273 /* Return 1 if host data [H,+S] is present on the device. */
276 acc_is_present (void *h
, size_t s
)
283 goacc_lazy_initialize ();
285 struct goacc_thread
*thr
= goacc_thread ();
286 struct gomp_device_descr
*acc_dev
= thr
->dev
;
288 gomp_mutex_lock (&acc_dev
->lock
);
290 n
= lookup_host (acc_dev
, h
, s
);
292 if (n
&& ((uintptr_t)h
< n
->host_start
293 || (uintptr_t)h
+ s
> n
->host_end
294 || s
> n
->host_end
- n
->host_start
))
297 gomp_mutex_unlock (&acc_dev
->lock
);
302 /* Create a mapping for host [H,+S] -> device [D,+S] */
305 acc_map_data (void *h
, void *d
, size_t s
)
307 struct target_mem_desc
*tgt
;
312 unsigned short kinds
= GOMP_MAP_ALLOC
;
314 goacc_lazy_initialize ();
316 struct goacc_thread
*thr
= goacc_thread ();
317 struct gomp_device_descr
*acc_dev
= thr
->dev
;
319 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
322 gomp_fatal ("cannot map data on shared-memory system");
324 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
328 struct goacc_thread
*thr
= goacc_thread ();
331 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
332 (void *)h
, (int)s
, (void *)d
, (int)s
);
334 gomp_mutex_lock (&acc_dev
->lock
);
336 if (lookup_host (acc_dev
, h
, s
))
338 gomp_mutex_unlock (&acc_dev
->lock
);
339 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
343 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
345 gomp_mutex_unlock (&acc_dev
->lock
);
346 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
350 gomp_mutex_unlock (&acc_dev
->lock
);
352 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
353 &kinds
, true, false);
356 gomp_mutex_lock (&acc_dev
->lock
);
357 tgt
->prev
= acc_dev
->openacc
.data_environ
;
358 acc_dev
->openacc
.data_environ
= tgt
;
359 gomp_mutex_unlock (&acc_dev
->lock
);
363 acc_unmap_data (void *h
)
365 struct goacc_thread
*thr
= goacc_thread ();
366 struct gomp_device_descr
*acc_dev
= thr
->dev
;
368 /* No need to call lazy open, as the address must have been mapped. */
372 gomp_mutex_lock (&acc_dev
->lock
);
374 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
375 struct target_mem_desc
*t
;
379 gomp_mutex_unlock (&acc_dev
->lock
);
380 gomp_fatal ("%p is not a mapped block", (void *)h
);
383 host_size
= n
->host_end
- n
->host_start
;
385 if (n
->host_start
!= (uintptr_t) h
)
387 gomp_mutex_unlock (&acc_dev
->lock
);
388 gomp_fatal ("[%p,%d] surrounds %p",
389 (void *) n
->host_start
, (int) host_size
, (void *) h
);
394 if (t
->refcount
== 2)
396 struct target_mem_desc
*tp
;
398 /* This is the last reference, so pull the descriptor off the
399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400 freeing the device memory. */
404 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
411 acc_dev
->openacc
.data_environ
= t
->prev
;
417 gomp_mutex_unlock (&acc_dev
->lock
);
419 gomp_unmap_vars (t
, true);
422 #define FLAG_PRESENT (1 << 0)
423 #define FLAG_CREATE (1 << 1)
424 #define FLAG_COPY (1 << 2)
427 present_create_copy (unsigned f
, void *h
, size_t s
)
433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
435 goacc_lazy_initialize ();
437 struct goacc_thread
*thr
= goacc_thread ();
438 struct gomp_device_descr
*acc_dev
= thr
->dev
;
440 gomp_mutex_lock (&acc_dev
->lock
);
442 n
= lookup_host (acc_dev
, h
, s
);
446 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
448 if (!(f
& FLAG_PRESENT
))
450 gomp_mutex_unlock (&acc_dev
->lock
);
451 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
452 (void *)h
, (int)s
, (void *)d
, (int)s
);
454 if ((h
+ s
) > (void *)n
->host_end
)
456 gomp_mutex_unlock (&acc_dev
->lock
);
457 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
460 gomp_mutex_unlock (&acc_dev
->lock
);
462 else if (!(f
& FLAG_CREATE
))
464 gomp_mutex_unlock (&acc_dev
->lock
);
465 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
469 struct target_mem_desc
*tgt
;
471 unsigned short kinds
;
477 kinds
= GOMP_MAP_ALLOC
;
479 gomp_mutex_unlock (&acc_dev
->lock
);
481 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
484 gomp_mutex_lock (&acc_dev
->lock
);
487 tgt
->prev
= acc_dev
->openacc
.data_environ
;
488 acc_dev
->openacc
.data_environ
= tgt
;
490 gomp_mutex_unlock (&acc_dev
->lock
);
497 acc_create (void *h
, size_t s
)
499 return present_create_copy (FLAG_CREATE
, h
, s
);
503 acc_copyin (void *h
, size_t s
)
505 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
509 acc_present_or_create (void *h
, size_t s
)
511 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
515 acc_present_or_copyin (void *h
, size_t s
)
517 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
520 #define FLAG_COPYOUT (1 << 0)
523 delete_copyout (unsigned f
, void *h
, size_t s
)
528 struct goacc_thread
*thr
= goacc_thread ();
529 struct gomp_device_descr
*acc_dev
= thr
->dev
;
531 gomp_mutex_lock (&acc_dev
->lock
);
533 n
= lookup_host (acc_dev
, h
, s
);
535 /* No need to call lazy open, as the data must already have been
540 gomp_mutex_unlock (&acc_dev
->lock
);
541 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
544 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
546 host_size
= n
->host_end
- n
->host_start
;
548 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
550 gomp_mutex_unlock (&acc_dev
->lock
);
551 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
552 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
555 gomp_mutex_unlock (&acc_dev
->lock
);
557 if (f
& FLAG_COPYOUT
)
558 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
562 acc_dev
->free_func (acc_dev
->target_id
, d
);
566 acc_delete (void *h
, size_t s
)
568 delete_copyout (0, h
, s
);
571 void acc_copyout (void *h
, size_t s
)
573 delete_copyout (FLAG_COPYOUT
, h
, s
);
577 update_dev_host (int is_dev
, void *h
, size_t s
)
581 struct goacc_thread
*thr
;
582 struct gomp_device_descr
*acc_dev
;
584 goacc_lazy_initialize ();
586 thr
= goacc_thread ();
589 gomp_mutex_lock (&acc_dev
->lock
);
591 n
= lookup_host (acc_dev
, h
, s
);
593 /* No need to call lazy open, as the data must already have been
598 gomp_mutex_unlock (&acc_dev
->lock
);
599 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
602 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
604 gomp_mutex_unlock (&acc_dev
->lock
);
607 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
609 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
613 acc_update_device (void *h
, size_t s
)
615 update_dev_host (1, h
, s
);
619 acc_update_self (void *h
, size_t s
)
621 update_dev_host (0, h
, s
);
625 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
628 struct target_mem_desc
*tgt
;
629 struct goacc_thread
*thr
= goacc_thread ();
630 struct gomp_device_descr
*acc_dev
= thr
->dev
;
632 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
633 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
634 NULL
, sizes
, kinds
, true, false);
635 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
637 gomp_mutex_lock (&acc_dev
->lock
);
638 tgt
->prev
= acc_dev
->openacc
.data_environ
;
639 acc_dev
->openacc
.data_environ
= tgt
;
640 gomp_mutex_unlock (&acc_dev
->lock
);
644 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
646 struct goacc_thread
*thr
= goacc_thread ();
647 struct gomp_device_descr
*acc_dev
= thr
->dev
;
649 struct target_mem_desc
*t
;
650 int minrefs
= (mapnum
== 1) ? 2 : 3;
652 gomp_mutex_lock (&acc_dev
->lock
);
654 n
= lookup_host (acc_dev
, h
, 1);
658 gomp_mutex_unlock (&acc_dev
->lock
);
659 gomp_fatal ("%p is not a mapped block", (void *)h
);
662 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
666 struct target_mem_desc
*tp
;
668 if (t
->refcount
== minrefs
)
670 /* This is the last reference, so pull the descriptor off the
671 chain. This pevents gomp_unmap_vars via gomp_unmap_tgt from
672 freeing the device memory. */
674 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
682 acc_dev
->openacc
.data_environ
= t
->prev
;
688 t
->list
[0]->copy_from
= force_copyfrom
? 1 : 0;
690 gomp_mutex_unlock (&acc_dev
->lock
);
692 /* If running synchronously, unmap immediately. */
693 if (async
< acc_async_noval
)
694 gomp_unmap_vars (t
, true);
697 gomp_copy_from_async (t
);
698 acc_dev
->openacc
.register_async_cleanup_func (t
);
701 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);