1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
34 #include "splay-tree.h"
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
42 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
44 struct splay_tree_key_s node
;
47 node
.host_start
= (uintptr_t) h
;
48 node
.host_end
= (uintptr_t) h
+ s
;
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
72 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
79 for (i
= 0; i
< t
->list_count
; i
++)
83 splay_tree_key k
= &t
->array
[i
].key
;
84 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
86 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
102 goacc_lazy_initialize ();
104 struct goacc_thread
*thr
= goacc_thread ();
108 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
111 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
112 the device address is mapped. We choose to check if it mapped,
113 and if it is, to unmap it. */
122 struct goacc_thread
*thr
= goacc_thread ();
124 assert (thr
&& thr
->dev
);
126 struct gomp_device_descr
*acc_dev
= thr
->dev
;
128 gomp_mutex_lock (&acc_dev
->lock
);
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
137 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
139 gomp_mutex_unlock (&acc_dev
->lock
);
141 acc_unmap_data ((void *)(k
->host_start
+ offset
));
144 gomp_mutex_unlock (&acc_dev
->lock
);
146 acc_dev
->free_func (acc_dev
->target_id
, d
);
150 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
152 /* No need to call lazy open here, as the device pointer must have
153 been obtained from a routine that did that. */
154 struct goacc_thread
*thr
= goacc_thread ();
156 assert (thr
&& thr
->dev
);
158 thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
162 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
164 /* No need to call lazy open here, as the device pointer must have
165 been obtained from a routine that did that. */
166 struct goacc_thread
*thr
= goacc_thread ();
168 assert (thr
&& thr
->dev
);
170 thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
173 /* Return the device pointer that corresponds to host data H. Or NULL
177 acc_deviceptr (void *h
)
183 goacc_lazy_initialize ();
185 struct goacc_thread
*thr
= goacc_thread ();
186 struct gomp_device_descr
*dev
= thr
->dev
;
188 gomp_mutex_lock (&dev
->lock
);
190 n
= lookup_host (dev
, h
, 1);
194 gomp_mutex_unlock (&dev
->lock
);
198 offset
= h
- n
->host_start
;
200 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
202 gomp_mutex_unlock (&dev
->lock
);
207 /* Return the host pointer that corresponds to device data D. Or NULL
211 acc_hostptr (void *d
)
217 goacc_lazy_initialize ();
219 struct goacc_thread
*thr
= goacc_thread ();
220 struct gomp_device_descr
*acc_dev
= thr
->dev
;
222 gomp_mutex_lock (&acc_dev
->lock
);
224 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
228 gomp_mutex_unlock (&acc_dev
->lock
);
232 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
234 h
= n
->host_start
+ offset
;
236 gomp_mutex_unlock (&acc_dev
->lock
);
241 /* Return 1 if host data [H,+S] is present on the device. */
244 acc_is_present (void *h
, size_t s
)
251 goacc_lazy_initialize ();
253 struct goacc_thread
*thr
= goacc_thread ();
254 struct gomp_device_descr
*acc_dev
= thr
->dev
;
256 gomp_mutex_lock (&acc_dev
->lock
);
258 n
= lookup_host (acc_dev
, h
, s
);
260 if (n
&& ((uintptr_t)h
< n
->host_start
261 || (uintptr_t)h
+ s
> n
->host_end
262 || s
> n
->host_end
- n
->host_start
))
265 gomp_mutex_unlock (&acc_dev
->lock
);
270 /* Create a mapping for host [H,+S] -> device [D,+S] */
273 acc_map_data (void *h
, void *d
, size_t s
)
275 struct target_mem_desc
*tgt
;
280 unsigned short kinds
= GOMP_MAP_ALLOC
;
282 goacc_lazy_initialize ();
284 struct goacc_thread
*thr
= goacc_thread ();
285 struct gomp_device_descr
*acc_dev
= thr
->dev
;
287 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
290 gomp_fatal ("cannot map data on shared-memory system");
292 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
293 GOMP_MAP_VARS_OPENACC
);
297 struct goacc_thread
*thr
= goacc_thread ();
300 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
301 (void *)h
, (int)s
, (void *)d
, (int)s
);
303 gomp_mutex_lock (&acc_dev
->lock
);
305 if (lookup_host (acc_dev
, h
, s
))
307 gomp_mutex_unlock (&acc_dev
->lock
);
308 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
312 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
314 gomp_mutex_unlock (&acc_dev
->lock
);
315 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
319 gomp_mutex_unlock (&acc_dev
->lock
);
321 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
322 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
325 gomp_mutex_lock (&acc_dev
->lock
);
326 tgt
->prev
= acc_dev
->openacc
.data_environ
;
327 acc_dev
->openacc
.data_environ
= tgt
;
328 gomp_mutex_unlock (&acc_dev
->lock
);
332 acc_unmap_data (void *h
)
334 struct goacc_thread
*thr
= goacc_thread ();
335 struct gomp_device_descr
*acc_dev
= thr
->dev
;
337 /* No need to call lazy open, as the address must have been mapped. */
341 gomp_mutex_lock (&acc_dev
->lock
);
343 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
344 struct target_mem_desc
*t
;
348 gomp_mutex_unlock (&acc_dev
->lock
);
349 gomp_fatal ("%p is not a mapped block", (void *)h
);
352 host_size
= n
->host_end
- n
->host_start
;
354 if (n
->host_start
!= (uintptr_t) h
)
356 gomp_mutex_unlock (&acc_dev
->lock
);
357 gomp_fatal ("[%p,%d] surrounds %p",
358 (void *) n
->host_start
, (int) host_size
, (void *) h
);
363 if (t
->refcount
== 2)
365 struct target_mem_desc
*tp
;
367 /* This is the last reference, so pull the descriptor off the
368 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
369 freeing the device memory. */
373 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
380 acc_dev
->openacc
.data_environ
= t
->prev
;
386 gomp_mutex_unlock (&acc_dev
->lock
);
388 gomp_unmap_vars (t
, true);
391 #define FLAG_PRESENT (1 << 0)
392 #define FLAG_CREATE (1 << 1)
393 #define FLAG_COPY (1 << 2)
396 present_create_copy (unsigned f
, void *h
, size_t s
)
402 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
404 goacc_lazy_initialize ();
406 struct goacc_thread
*thr
= goacc_thread ();
407 struct gomp_device_descr
*acc_dev
= thr
->dev
;
409 gomp_mutex_lock (&acc_dev
->lock
);
411 n
= lookup_host (acc_dev
, h
, s
);
415 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
417 if (!(f
& FLAG_PRESENT
))
419 gomp_mutex_unlock (&acc_dev
->lock
);
420 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
421 (void *)h
, (int)s
, (void *)d
, (int)s
);
423 if ((h
+ s
) > (void *)n
->host_end
)
425 gomp_mutex_unlock (&acc_dev
->lock
);
426 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
429 gomp_mutex_unlock (&acc_dev
->lock
);
431 else if (!(f
& FLAG_CREATE
))
433 gomp_mutex_unlock (&acc_dev
->lock
);
434 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
438 struct target_mem_desc
*tgt
;
440 unsigned short kinds
;
446 kinds
= GOMP_MAP_ALLOC
;
448 gomp_mutex_unlock (&acc_dev
->lock
);
450 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
451 GOMP_MAP_VARS_OPENACC
);
453 gomp_mutex_lock (&acc_dev
->lock
);
456 tgt
->prev
= acc_dev
->openacc
.data_environ
;
457 acc_dev
->openacc
.data_environ
= tgt
;
459 gomp_mutex_unlock (&acc_dev
->lock
);
466 acc_create (void *h
, size_t s
)
468 return present_create_copy (FLAG_CREATE
, h
, s
);
472 acc_copyin (void *h
, size_t s
)
474 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
478 acc_present_or_create (void *h
, size_t s
)
480 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
484 acc_present_or_copyin (void *h
, size_t s
)
486 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
489 #define FLAG_COPYOUT (1 << 0)
492 delete_copyout (unsigned f
, void *h
, size_t s
)
497 struct goacc_thread
*thr
= goacc_thread ();
498 struct gomp_device_descr
*acc_dev
= thr
->dev
;
500 gomp_mutex_lock (&acc_dev
->lock
);
502 n
= lookup_host (acc_dev
, h
, s
);
504 /* No need to call lazy open, as the data must already have been
509 gomp_mutex_unlock (&acc_dev
->lock
);
510 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
513 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
515 host_size
= n
->host_end
- n
->host_start
;
517 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
519 gomp_mutex_unlock (&acc_dev
->lock
);
520 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
521 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
524 gomp_mutex_unlock (&acc_dev
->lock
);
526 if (f
& FLAG_COPYOUT
)
527 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
531 acc_dev
->free_func (acc_dev
->target_id
, d
);
535 acc_delete (void *h
, size_t s
)
537 delete_copyout (0, h
, s
);
540 void acc_copyout (void *h
, size_t s
)
542 delete_copyout (FLAG_COPYOUT
, h
, s
);
546 update_dev_host (int is_dev
, void *h
, size_t s
)
550 struct goacc_thread
*thr
= goacc_thread ();
551 struct gomp_device_descr
*acc_dev
= thr
->dev
;
553 gomp_mutex_lock (&acc_dev
->lock
);
555 n
= lookup_host (acc_dev
, h
, s
);
557 /* No need to call lazy open, as the data must already have been
562 gomp_mutex_unlock (&acc_dev
->lock
);
563 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
566 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
568 gomp_mutex_unlock (&acc_dev
->lock
);
571 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
573 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
577 acc_update_device (void *h
, size_t s
)
579 update_dev_host (1, h
, s
);
583 acc_update_self (void *h
, size_t s
)
585 update_dev_host (0, h
, s
);
589 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
592 struct target_mem_desc
*tgt
;
593 struct goacc_thread
*thr
= goacc_thread ();
594 struct gomp_device_descr
*acc_dev
= thr
->dev
;
596 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
597 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
598 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
599 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
601 gomp_mutex_lock (&acc_dev
->lock
);
602 tgt
->prev
= acc_dev
->openacc
.data_environ
;
603 acc_dev
->openacc
.data_environ
= tgt
;
604 gomp_mutex_unlock (&acc_dev
->lock
);
608 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
610 struct goacc_thread
*thr
= goacc_thread ();
611 struct gomp_device_descr
*acc_dev
= thr
->dev
;
613 struct target_mem_desc
*t
;
614 int minrefs
= (mapnum
== 1) ? 2 : 3;
616 gomp_mutex_lock (&acc_dev
->lock
);
618 n
= lookup_host (acc_dev
, h
, 1);
622 gomp_mutex_unlock (&acc_dev
->lock
);
623 gomp_fatal ("%p is not a mapped block", (void *)h
);
626 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
630 struct target_mem_desc
*tp
;
632 if (t
->refcount
== minrefs
)
634 /* This is the last reference, so pull the descriptor off the
635 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
636 freeing the device memory. */
640 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
648 acc_dev
->openacc
.data_environ
= t
->prev
;
655 t
->list
[0].copy_from
= 1;
657 gomp_mutex_unlock (&acc_dev
->lock
);
659 /* If running synchronously, unmap immediately. */
660 if (async
< acc_async_noval
)
661 gomp_unmap_vars (t
, true);
664 gomp_copy_from_async (t
);
665 acc_dev
->openacc
.register_async_cleanup_func (t
);
668 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);