1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
34 #include "splay-tree.h"
38 /* Return block containing [H->S), or NULL if not contained. */
41 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
43 struct splay_tree_key_s node
;
46 node
.host_start
= (uintptr_t) h
;
47 node
.host_end
= (uintptr_t) h
+ s
;
49 gomp_mutex_lock (&dev
->lock
);
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
51 gomp_mutex_unlock (&dev
->lock
);
56 /* Return block containing [D->S), or NULL if not contained.
57 The list isn't ordered by device address, so we have to iterate
58 over the whole array. This is not expected to be a common
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 gomp_mutex_lock (&tgt
->device_descr
->lock
);
72 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
74 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
78 gomp_mutex_unlock (&tgt
->device_descr
->lock
);
83 for (i
= 0; i
< t
->list_count
; i
++)
87 splay_tree_key k
= &t
->array
[i
].key
;
88 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
90 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
97 /* OpenACC is silent on how memory exhaustion is indicated. We return
101 acc_malloc (size_t s
)
106 goacc_lazy_initialize ();
108 struct goacc_thread
*thr
= goacc_thread ();
112 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
115 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
116 the device address is mapped. We choose to check if it mapped,
117 and if it is, to unmap it. */
122 struct goacc_thread
*thr
= goacc_thread ();
127 assert (thr
&& thr
->dev
);
129 /* We don't have to call lazy open here, as the ptr value must have
130 been returned by acc_malloc. It's not permitted to pass NULL in
131 (unless you got that null from acc_malloc). */
132 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
136 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
138 acc_unmap_data ((void *)(k
->host_start
+ offset
));
141 thr
->dev
->free_func (thr
->dev
->target_id
, d
);
145 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
147 /* No need to call lazy open here, as the device pointer must have
148 been obtained from a routine that did that. */
149 struct goacc_thread
*thr
= goacc_thread ();
151 assert (thr
&& thr
->dev
);
153 thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
157 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
159 /* No need to call lazy open here, as the device pointer must have
160 been obtained from a routine that did that. */
161 struct goacc_thread
*thr
= goacc_thread ();
163 assert (thr
&& thr
->dev
);
165 thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
168 /* Return the device pointer that corresponds to host data H. Or NULL
172 acc_deviceptr (void *h
)
178 goacc_lazy_initialize ();
180 struct goacc_thread
*thr
= goacc_thread ();
182 n
= lookup_host (thr
->dev
, h
, 1);
187 offset
= h
- n
->host_start
;
189 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
194 /* Return the host pointer that corresponds to device data D. Or NULL
198 acc_hostptr (void *d
)
204 goacc_lazy_initialize ();
206 struct goacc_thread
*thr
= goacc_thread ();
208 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
213 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
215 h
= n
->host_start
+ offset
;
220 /* Return 1 if host data [H,+S] is present on the device. */
223 acc_is_present (void *h
, size_t s
)
230 goacc_lazy_initialize ();
232 struct goacc_thread
*thr
= goacc_thread ();
233 struct gomp_device_descr
*acc_dev
= thr
->dev
;
235 n
= lookup_host (acc_dev
, h
, s
);
237 if (n
&& ((uintptr_t)h
< n
->host_start
238 || (uintptr_t)h
+ s
> n
->host_end
239 || s
> n
->host_end
- n
->host_start
))
245 /* Create a mapping for host [H,+S] -> device [D,+S] */
248 acc_map_data (void *h
, void *d
, size_t s
)
250 struct target_mem_desc
*tgt
;
255 unsigned short kinds
= GOMP_MAP_ALLOC
;
257 goacc_lazy_initialize ();
259 struct goacc_thread
*thr
= goacc_thread ();
260 struct gomp_device_descr
*acc_dev
= thr
->dev
;
262 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
265 gomp_fatal ("cannot map data on shared-memory system");
267 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
271 struct goacc_thread
*thr
= goacc_thread ();
274 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
275 (void *)h
, (int)s
, (void *)d
, (int)s
);
277 if (lookup_host (acc_dev
, h
, s
))
278 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
281 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
282 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
285 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
286 &kinds
, true, false);
289 tgt
->prev
= acc_dev
->openacc
.data_environ
;
290 acc_dev
->openacc
.data_environ
= tgt
;
294 acc_unmap_data (void *h
)
296 struct goacc_thread
*thr
= goacc_thread ();
297 struct gomp_device_descr
*acc_dev
= thr
->dev
;
299 /* No need to call lazy open, as the address must have been mapped. */
302 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
303 struct target_mem_desc
*t
;
306 gomp_fatal ("%p is not a mapped block", (void *)h
);
308 host_size
= n
->host_end
- n
->host_start
;
310 if (n
->host_start
!= (uintptr_t) h
)
311 gomp_fatal ("[%p,%d] surrounds1 %p",
312 (void *) n
->host_start
, (int) host_size
, (void *) h
);
316 if (t
->refcount
== 2)
318 struct target_mem_desc
*tp
;
320 /* This is the last reference, so pull the descriptor off the
321 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
322 freeing the device memory. */
326 gomp_mutex_lock (&acc_dev
->lock
);
328 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
335 acc_dev
->openacc
.data_environ
= t
->prev
;
340 gomp_mutex_unlock (&acc_dev
->lock
);
343 gomp_unmap_vars (t
, true);
346 #define FLAG_PRESENT (1 << 0)
347 #define FLAG_CREATE (1 << 1)
348 #define FLAG_COPY (1 << 2)
351 present_create_copy (unsigned f
, void *h
, size_t s
)
357 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
359 goacc_lazy_initialize ();
361 struct goacc_thread
*thr
= goacc_thread ();
362 struct gomp_device_descr
*acc_dev
= thr
->dev
;
364 n
= lookup_host (acc_dev
, h
, s
);
368 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
370 if (!(f
& FLAG_PRESENT
))
371 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
372 (void *)h
, (int)s
, (void *)d
, (int)s
);
373 if ((h
+ s
) > (void *)n
->host_end
)
374 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
376 else if (!(f
& FLAG_CREATE
))
378 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
382 struct target_mem_desc
*tgt
;
384 unsigned short kinds
;
390 kinds
= GOMP_MAP_ALLOC
;
392 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
395 gomp_mutex_lock (&acc_dev
->lock
);
398 tgt
->prev
= acc_dev
->openacc
.data_environ
;
399 acc_dev
->openacc
.data_environ
= tgt
;
401 gomp_mutex_unlock (&acc_dev
->lock
);
408 acc_create (void *h
, size_t s
)
410 return present_create_copy (FLAG_CREATE
, h
, s
);
414 acc_copyin (void *h
, size_t s
)
416 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
420 acc_present_or_create (void *h
, size_t s
)
422 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
426 acc_present_or_copyin (void *h
, size_t s
)
428 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
431 #define FLAG_COPYOUT (1 << 0)
434 delete_copyout (unsigned f
, void *h
, size_t s
)
439 struct goacc_thread
*thr
= goacc_thread ();
440 struct gomp_device_descr
*acc_dev
= thr
->dev
;
442 n
= lookup_host (acc_dev
, h
, s
);
444 /* No need to call lazy open, as the data must already have been
448 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
450 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
452 host_size
= n
->host_end
- n
->host_start
;
454 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
455 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
456 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
458 if (f
& FLAG_COPYOUT
)
459 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
463 acc_dev
->free_func (acc_dev
->target_id
, d
);
467 acc_delete (void *h
, size_t s
)
469 delete_copyout (0, h
, s
);
472 void acc_copyout (void *h
, size_t s
)
474 delete_copyout (FLAG_COPYOUT
, h
, s
);
478 update_dev_host (int is_dev
, void *h
, size_t s
)
482 struct goacc_thread
*thr
= goacc_thread ();
483 struct gomp_device_descr
*acc_dev
= thr
->dev
;
485 n
= lookup_host (acc_dev
, h
, s
);
487 /* No need to call lazy open, as the data must already have been
491 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
493 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
496 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
498 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
502 acc_update_device (void *h
, size_t s
)
504 update_dev_host (1, h
, s
);
508 acc_update_self (void *h
, size_t s
)
510 update_dev_host (0, h
, s
);
514 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
517 struct target_mem_desc
*tgt
;
518 struct goacc_thread
*thr
= goacc_thread ();
519 struct gomp_device_descr
*acc_dev
= thr
->dev
;
521 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
522 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
523 NULL
, sizes
, kinds
, true, false);
524 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
525 tgt
->prev
= acc_dev
->openacc
.data_environ
;
526 acc_dev
->openacc
.data_environ
= tgt
;
530 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
532 struct goacc_thread
*thr
= goacc_thread ();
533 struct gomp_device_descr
*acc_dev
= thr
->dev
;
535 struct target_mem_desc
*t
;
536 int minrefs
= (mapnum
== 1) ? 2 : 3;
538 n
= lookup_host (acc_dev
, h
, 1);
541 gomp_fatal ("%p is not a mapped block", (void *)h
);
543 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
547 struct target_mem_desc
*tp
;
549 gomp_mutex_lock (&acc_dev
->lock
);
551 if (t
->refcount
== minrefs
)
553 /* This is the last reference, so pull the descriptor off the
554 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
555 freeing the device memory. */
559 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
567 acc_dev
->openacc
.data_environ
= t
->prev
;
574 t
->list
[0]->copy_from
= 1;
576 gomp_mutex_unlock (&acc_dev
->lock
);
578 /* If running synchronously, unmap immediately. */
579 if (async
< acc_async_noval
)
580 gomp_unmap_vars (t
, true);
583 gomp_copy_from_async (t
);
584 acc_dev
->openacc
.register_async_cleanup_func (t
);
587 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);