1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
34 #include "splay-tree.h"
38 /* Return block containing [H->S), or NULL if not contained. */
41 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
43 struct splay_tree_key_s node
;
46 node
.host_start
= (uintptr_t) h
;
47 node
.host_end
= (uintptr_t) h
+ s
;
49 gomp_mutex_lock (&dev
->lock
);
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
51 gomp_mutex_unlock (&dev
->lock
);
56 /* Return block containing [D->S), or NULL if not contained.
57 The list isn't ordered by device address, so we have to iterate
58 over the whole array. This is not expected to be a common
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 gomp_mutex_lock (&tgt
->device_descr
->lock
);
72 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
74 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
78 gomp_mutex_unlock (&tgt
->device_descr
->lock
);
83 for (i
= 0; i
< t
->list_count
; i
++)
87 splay_tree_key k
= &t
->array
[i
].key
;
88 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
90 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
97 /* OpenACC is silent on how memory exhaustion is indicated. We return
101 acc_malloc (size_t s
)
106 goacc_lazy_initialize ();
108 struct goacc_thread
*thr
= goacc_thread ();
110 return base_dev
->alloc_func (thr
->dev
->target_id
, s
);
113 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
114 the device address is mapped. We choose to check if it mapped,
115 and if it is, to unmap it. */
120 struct goacc_thread
*thr
= goacc_thread ();
125 /* We don't have to call lazy open here, as the ptr value must have
126 been returned by acc_malloc. It's not permitted to pass NULL in
127 (unless you got that null from acc_malloc). */
128 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
132 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
134 acc_unmap_data ((void *)(k
->host_start
+ offset
));
137 base_dev
->free_func (thr
->dev
->target_id
, d
);
141 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
143 /* No need to call lazy open here, as the device pointer must have
144 been obtained from a routine that did that. */
145 struct goacc_thread
*thr
= goacc_thread ();
147 base_dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
151 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
153 /* No need to call lazy open here, as the device pointer must have
154 been obtained from a routine that did that. */
155 struct goacc_thread
*thr
= goacc_thread ();
157 base_dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
160 /* Return the device pointer that corresponds to host data H. Or NULL
164 acc_deviceptr (void *h
)
170 goacc_lazy_initialize ();
172 struct goacc_thread
*thr
= goacc_thread ();
174 n
= lookup_host (thr
->dev
, h
, 1);
179 offset
= h
- n
->host_start
;
181 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
186 /* Return the host pointer that corresponds to device data D. Or NULL
190 acc_hostptr (void *d
)
196 goacc_lazy_initialize ();
198 struct goacc_thread
*thr
= goacc_thread ();
200 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
205 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
207 h
= n
->host_start
+ offset
;
212 /* Return 1 if host data [H,+S] is present on the device. */
215 acc_is_present (void *h
, size_t s
)
222 goacc_lazy_initialize ();
224 struct goacc_thread
*thr
= goacc_thread ();
225 struct gomp_device_descr
*acc_dev
= thr
->dev
;
227 n
= lookup_host (acc_dev
, h
, s
);
229 if (n
&& ((uintptr_t)h
< n
->host_start
230 || (uintptr_t)h
+ s
> n
->host_end
231 || s
> n
->host_end
- n
->host_start
))
237 /* Create a mapping for host [H,+S] -> device [D,+S] */
240 acc_map_data (void *h
, void *d
, size_t s
)
242 struct target_mem_desc
*tgt
;
247 unsigned short kinds
= GOMP_MAP_ALLOC
;
249 goacc_lazy_initialize ();
251 struct goacc_thread
*thr
= goacc_thread ();
252 struct gomp_device_descr
*acc_dev
= thr
->dev
;
254 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
257 gomp_fatal ("cannot map data on shared-memory system");
259 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
263 struct goacc_thread
*thr
= goacc_thread ();
266 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
267 (void *)h
, (int)s
, (void *)d
, (int)s
);
269 if (lookup_host (acc_dev
, h
, s
))
270 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
273 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
274 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
277 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
278 &kinds
, true, false);
281 tgt
->prev
= acc_dev
->openacc
.data_environ
;
282 acc_dev
->openacc
.data_environ
= tgt
;
286 acc_unmap_data (void *h
)
288 struct goacc_thread
*thr
= goacc_thread ();
289 struct gomp_device_descr
*acc_dev
= thr
->dev
;
291 /* No need to call lazy open, as the address must have been mapped. */
294 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
295 struct target_mem_desc
*t
;
298 gomp_fatal ("%p is not a mapped block", (void *)h
);
300 host_size
= n
->host_end
- n
->host_start
;
302 if (n
->host_start
!= (uintptr_t) h
)
303 gomp_fatal ("[%p,%d] surrounds1 %p",
304 (void *) n
->host_start
, (int) host_size
, (void *) h
);
308 if (t
->refcount
== 2)
310 struct target_mem_desc
*tp
;
312 /* This is the last reference, so pull the descriptor off the
313 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
314 freeing the device memory. */
318 gomp_mutex_lock (&acc_dev
->lock
);
320 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
327 acc_dev
->openacc
.data_environ
= t
->prev
;
332 gomp_mutex_unlock (&acc_dev
->lock
);
335 gomp_unmap_vars (t
, true);
338 #define FLAG_PRESENT (1 << 0)
339 #define FLAG_CREATE (1 << 1)
340 #define FLAG_COPY (1 << 2)
343 present_create_copy (unsigned f
, void *h
, size_t s
)
349 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
351 goacc_lazy_initialize ();
353 struct goacc_thread
*thr
= goacc_thread ();
354 struct gomp_device_descr
*acc_dev
= thr
->dev
;
356 n
= lookup_host (acc_dev
, h
, s
);
360 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
362 if (!(f
& FLAG_PRESENT
))
363 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
364 (void *)h
, (int)s
, (void *)d
, (int)s
);
365 if ((h
+ s
) > (void *)n
->host_end
)
366 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
368 else if (!(f
& FLAG_CREATE
))
370 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
374 struct target_mem_desc
*tgt
;
376 unsigned short kinds
;
382 kinds
= GOMP_MAP_ALLOC
;
384 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
387 gomp_mutex_lock (&acc_dev
->lock
);
390 tgt
->prev
= acc_dev
->openacc
.data_environ
;
391 acc_dev
->openacc
.data_environ
= tgt
;
393 gomp_mutex_unlock (&acc_dev
->lock
);
400 acc_create (void *h
, size_t s
)
402 return present_create_copy (FLAG_CREATE
, h
, s
);
406 acc_copyin (void *h
, size_t s
)
408 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
412 acc_present_or_create (void *h
, size_t s
)
414 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
418 acc_present_or_copyin (void *h
, size_t s
)
420 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
423 #define FLAG_COPYOUT (1 << 0)
426 delete_copyout (unsigned f
, void *h
, size_t s
)
431 struct goacc_thread
*thr
= goacc_thread ();
432 struct gomp_device_descr
*acc_dev
= thr
->dev
;
434 n
= lookup_host (acc_dev
, h
, s
);
436 /* No need to call lazy open, as the data must already have been
440 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
442 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
444 host_size
= n
->host_end
- n
->host_start
;
446 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
447 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
448 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
450 if (f
& FLAG_COPYOUT
)
451 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
455 acc_dev
->free_func (acc_dev
->target_id
, d
);
459 acc_delete (void *h
, size_t s
)
461 delete_copyout (0, h
, s
);
464 void acc_copyout (void *h
, size_t s
)
466 delete_copyout (FLAG_COPYOUT
, h
, s
);
470 update_dev_host (int is_dev
, void *h
, size_t s
)
474 struct goacc_thread
*thr
= goacc_thread ();
475 struct gomp_device_descr
*acc_dev
= thr
->dev
;
477 n
= lookup_host (acc_dev
, h
, s
);
479 /* No need to call lazy open, as the data must already have been
483 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
485 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
488 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
490 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
494 acc_update_device (void *h
, size_t s
)
496 update_dev_host (1, h
, s
);
500 acc_update_self (void *h
, size_t s
)
502 update_dev_host (0, h
, s
);
506 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
509 struct target_mem_desc
*tgt
;
510 struct goacc_thread
*thr
= goacc_thread ();
511 struct gomp_device_descr
*acc_dev
= thr
->dev
;
513 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
514 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
515 NULL
, sizes
, kinds
, true, false);
516 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
517 tgt
->prev
= acc_dev
->openacc
.data_environ
;
518 acc_dev
->openacc
.data_environ
= tgt
;
522 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
524 struct goacc_thread
*thr
= goacc_thread ();
525 struct gomp_device_descr
*acc_dev
= thr
->dev
;
527 struct target_mem_desc
*t
;
528 int minrefs
= (mapnum
== 1) ? 2 : 3;
530 n
= lookup_host (acc_dev
, h
, 1);
533 gomp_fatal ("%p is not a mapped block", (void *)h
);
535 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
539 struct target_mem_desc
*tp
;
541 gomp_mutex_lock (&acc_dev
->lock
);
543 if (t
->refcount
== minrefs
)
545 /* This is the last reference, so pull the descriptor off the
546 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
547 freeing the device memory. */
551 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
559 acc_dev
->openacc
.data_environ
= t
->prev
;
566 t
->list
[0]->copy_from
= 1;
568 gomp_mutex_unlock (&acc_dev
->lock
);
570 /* If running synchronously, unmap immediately. */
571 if (async
< acc_async_noval
)
572 gomp_unmap_vars (t
, true);
575 gomp_copy_from_async (t
);
576 acc_dev
->openacc
.register_async_cleanup_func (t
);
579 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);