1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2014 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU OpenMP Library (libgomp).
9 Libgomp is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 Under Section 7 of GPL version 3, you are granted additional
20 permissions described in the GCC Runtime Library Exception, version
21 3.1, as published by the Free Software Foundation.
23 You should have received a copy of the GNU General Public License and
24 a copy of the GCC Runtime Library Exception along with this program;
25 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 <http://www.gnu.org/licenses/>. */
31 #include "gomp-constants.h"
38 #include "splay-tree.h"
40 /* Return block containing [H->S), or NULL if not contained. */
42 attribute_hidden splay_tree_key
43 lookup_host (struct gomp_memory_mapping
*mem_map
, void *h
, size_t s
)
45 struct splay_tree_key_s node
;
48 node
.host_start
= (uintptr_t) h
;
49 node
.host_end
= (uintptr_t) h
+ s
;
51 gomp_mutex_lock (&mem_map
->lock
);
53 key
= splay_tree_lookup (&mem_map
->splay_tree
, &node
);
55 gomp_mutex_unlock (&mem_map
->lock
);
60 /* Return block containing [D->S), or NULL if not contained.
61 The list isn't ordered by device address, so we have to iterate
62 over the whole array. This is not expected to be a common
66 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
69 struct target_mem_desc
*t
;
70 struct gomp_memory_mapping
*mem_map
;
75 mem_map
= tgt
->mem_map
;
77 gomp_mutex_lock (&mem_map
->lock
);
79 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
81 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
85 gomp_mutex_unlock (&mem_map
->lock
);
90 for (i
= 0; i
< t
->list_count
; i
++)
94 splay_tree_key k
= &t
->array
[i
].key
;
95 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
97 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
104 /* OpenACC is silent on how memory exhaustion is indicated. We return
108 acc_malloc (size_t s
)
113 ACC_lazy_initialize ();
115 return base_dev
->alloc_func (s
);
118 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
119 the device address is mapped. We choose to check if it mapped,
120 and if it is, to unmap it. */
125 struct goacc_thread
*thr
= goacc_thread ();
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
137 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
139 acc_unmap_data ((void *)(k
->host_start
+ offset
));
142 base_dev
->free_func (d
);
146 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
148 /* No need to call lazy open here, as the device pointer must have
149 been obtained from a routine that did that. */
150 base_dev
->host2dev_func (d
, h
, s
);
154 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
156 /* No need to call lazy open here, as the device pointer must have
157 been obtained from a routine that did that. */
158 base_dev
->dev2host_func (h
, d
, s
);
161 /* Return the device pointer that corresponds to host data H. Or NULL
165 acc_deviceptr (void *h
)
171 ACC_lazy_initialize ();
173 struct goacc_thread
*thr
= goacc_thread ();
175 n
= lookup_host (&thr
->dev
->mem_map
, h
, 1);
180 offset
= h
- n
->host_start
;
182 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
187 /* Return the host pointer that corresponds to device data D. Or NULL
191 acc_hostptr (void *d
)
197 ACC_lazy_initialize ();
199 struct goacc_thread
*thr
= goacc_thread ();
201 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
206 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
208 h
= n
->host_start
+ offset
;
213 /* Return 1 if host data [H,+S] is present on the device. */
216 acc_is_present (void *h
, size_t s
)
223 ACC_lazy_initialize ();
225 struct goacc_thread
*thr
= goacc_thread ();
226 struct gomp_device_descr
*acc_dev
= thr
->dev
;
228 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
230 if (n
&& ((uintptr_t)h
< n
->host_start
231 || (uintptr_t)h
+ s
> n
->host_end
232 || s
> n
->host_end
- n
->host_start
))
238 /* Create a mapping for host [H,+S] -> device [D,+S] */
241 acc_map_data (void *h
, void *d
, size_t s
)
243 struct target_mem_desc
*tgt
;
248 unsigned short kinds
= GOMP_MAP_ALLOC
;
250 ACC_lazy_initialize ();
252 struct goacc_thread
*thr
= goacc_thread ();
253 struct gomp_device_descr
*acc_dev
= thr
->dev
;
255 if (acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
258 gomp_fatal ("cannot map data on shared-memory system");
260 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
264 struct goacc_thread
*thr
= goacc_thread ();
267 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
268 (void *)h
, (int)s
, (void *)d
, (int)s
);
270 if (lookup_host (&acc_dev
->mem_map
, h
, s
))
271 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
274 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
275 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
278 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
279 &kinds
, true, false);
282 tgt
->prev
= acc_dev
->openacc
.data_environ
;
283 acc_dev
->openacc
.data_environ
= tgt
;
287 acc_unmap_data (void *h
)
289 struct goacc_thread
*thr
= goacc_thread ();
290 struct gomp_device_descr
*acc_dev
= thr
->dev
;
292 /* No need to call lazy open, as the address must have been mapped. */
295 splay_tree_key n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
296 struct target_mem_desc
*t
;
299 gomp_fatal ("%p is not a mapped block", (void *)h
);
301 host_size
= n
->host_end
- n
->host_start
;
303 if (n
->host_start
!= (uintptr_t) h
)
304 gomp_fatal ("[%p,%d] surrounds1 %p",
305 (void *) n
->host_start
, (int) host_size
, (void *) h
);
309 if (t
->refcount
== 2)
311 struct target_mem_desc
*tp
;
313 /* This is the last reference, so pull the descriptor off the
314 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
315 freeing the device memory. */
319 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
321 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
328 acc_dev
->openacc
.data_environ
= t
->prev
;
333 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
336 gomp_unmap_vars (t
, true);
339 #define PCC_Present (1 << 0)
340 #define PCC_Create (1 << 1)
341 #define PCC_Copy (1 << 2)
343 attribute_hidden
void *
344 present_create_copy (unsigned f
, void *h
, size_t s
)
350 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
352 ACC_lazy_initialize ();
354 struct goacc_thread
*thr
= goacc_thread ();
355 struct gomp_device_descr
*acc_dev
= thr
->dev
;
357 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
361 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
363 if (!(f
& PCC_Present
))
364 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
365 (void *)h
, (int)s
, (void *)d
, (int)s
);
366 if ((h
+ s
) > (void *)n
->host_end
)
367 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
369 else if (!(f
& PCC_Create
))
371 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
375 struct target_mem_desc
*tgt
;
377 unsigned short kinds
;
381 kinds
= GOMP_MAP_ALLOC_TO
;
383 kinds
= GOMP_MAP_ALLOC
;
385 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
388 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
391 tgt
->prev
= acc_dev
->openacc
.data_environ
;
392 acc_dev
->openacc
.data_environ
= tgt
;
394 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
401 acc_create (void *h
, size_t s
)
403 return present_create_copy (PCC_Create
, h
, s
);
407 acc_copyin (void *h
, size_t s
)
409 return present_create_copy (PCC_Create
| PCC_Copy
, h
, s
);
413 acc_present_or_create (void *h
, size_t s
)
415 return present_create_copy (PCC_Present
| PCC_Create
, h
, s
);
419 acc_present_or_copyin (void *h
, size_t s
)
421 return present_create_copy (PCC_Present
| PCC_Create
| PCC_Copy
, h
, s
);
424 #define DC_Copyout (1 << 0)
427 delete_copyout (unsigned f
, void *h
, size_t s
)
432 struct goacc_thread
*thr
= goacc_thread ();
433 struct gomp_device_descr
*acc_dev
= thr
->dev
;
435 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
437 /* No need to call lazy open, as the data must already have been
441 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
443 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
445 host_size
= n
->host_end
- n
->host_start
;
447 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
448 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
449 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
452 acc_dev
->dev2host_func (h
, d
, s
);
456 acc_dev
->free_func (d
);
460 acc_delete (void *h
, size_t s
)
462 delete_copyout (0, h
, s
);
465 void acc_copyout (void *h
, size_t s
)
467 delete_copyout (DC_Copyout
, h
, s
);
471 update_dev_host (int is_dev
, void *h
, size_t s
)
475 struct goacc_thread
*thr
= goacc_thread ();
476 struct gomp_device_descr
*acc_dev
= thr
->dev
;
478 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
480 /* No need to call lazy open, as the data must already have been
484 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
486 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
489 acc_dev
->host2dev_func (d
, h
, s
);
491 acc_dev
->dev2host_func (h
, d
, s
);
495 acc_update_device (void *h
, size_t s
)
497 update_dev_host (1, h
, s
);
501 acc_update_self (void *h
, size_t s
)
503 update_dev_host (0, h
, s
);
507 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
510 struct target_mem_desc
*tgt
;
511 struct goacc_thread
*thr
= goacc_thread ();
512 struct gomp_device_descr
*acc_dev
= thr
->dev
;
514 gomp_notify (" %s: prepare mappings\n", __FUNCTION__
);
515 tgt
= gomp_map_vars ((struct gomp_device_descr
*) acc_dev
, mapnum
, hostaddrs
,
516 NULL
, sizes
, kinds
, true, false);
517 gomp_notify (" %s: mappings prepared\n", __FUNCTION__
);
518 tgt
->prev
= acc_dev
->openacc
.data_environ
;
519 acc_dev
->openacc
.data_environ
= tgt
;
523 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
525 struct goacc_thread
*thr
= goacc_thread ();
526 struct gomp_device_descr
*acc_dev
= thr
->dev
;
528 struct target_mem_desc
*t
;
529 int minrefs
= (mapnum
== 1) ? 2 : 3;
531 n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
534 gomp_fatal ("%p is not a mapped block", (void *)h
);
536 gomp_notify (" %s: restore mappings\n", __FUNCTION__
);
540 struct target_mem_desc
*tp
;
542 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
544 if (t
->refcount
== minrefs
)
546 /* This is the last reference, so pull the descriptor off the
547 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
548 freeing the device memory. */
552 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
560 acc_dev
->openacc
.data_environ
= t
->prev
;
567 t
->list
[0]->copy_from
= 1;
569 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
571 /* If running synchronously, unmap immediately. */
572 if (async
< acc_async_noval
)
573 gomp_unmap_vars (t
, true);
576 gomp_copy_from_async (t
);
577 acc_dev
->openacc
.register_async_cleanup_func (t
);
580 gomp_notify (" %s: mappings restored\n", __FUNCTION__
);