1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2014 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU OpenMP Library (libgomp).
9 Libgomp is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 Under Section 7 of GPL version 3, you are granted additional
20 permissions described in the GCC Runtime Library Exception, version
21 3.1, as published by the Free Software Foundation.
23 You should have received a copy of the GNU General Public License and
24 a copy of the GCC Runtime Library Exception along with this program;
25 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 <http://www.gnu.org/licenses/>. */
31 #include "gomp-constants.h"
32 #include "libgomp_target.h"
38 #include "splay-tree.h"
40 /* Return block containing [H->S), or NULL if not contained. */
42 attribute_hidden splay_tree_key
43 lookup_host (struct gomp_memory_mapping
*mem_map
, void *h
, size_t s
)
45 struct splay_tree_key_s node
;
48 node
.host_start
= (uintptr_t) h
;
49 node
.host_end
= (uintptr_t) h
+ s
;
51 gomp_mutex_lock (&mem_map
->lock
);
53 key
= splay_tree_lookup (&mem_map
->splay_tree
, &node
);
55 gomp_mutex_unlock (&mem_map
->lock
);
60 /* Return block containing [D->S), or NULL if not contained.
61 The list isn't ordered by device address, so we have to iterate
62 over the whole array. This is not expected to be a common
66 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
69 struct target_mem_desc
*t
;
70 struct gomp_memory_mapping
*mem_map
;
75 mem_map
= tgt
->mem_map
;
77 gomp_mutex_lock (&mem_map
->lock
);
79 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
81 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
85 gomp_mutex_unlock (&mem_map
->lock
);
90 for (i
= 0; i
< t
->list_count
; i
++)
94 splay_tree_key k
= &t
->array
[i
].key
;
95 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
97 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
104 /* OpenACC is silent on how memory exhaustion is indicated. We return
108 acc_malloc (size_t s
)
113 ACC_lazy_initialize ();
115 struct goacc_thread
*thr
= goacc_thread ();
117 return base_dev
->alloc_func (thr
->dev
->target_id
, s
);
120 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
121 the device address is mapped. We choose to check if it mapped,
122 and if it is, to unmap it. */
127 struct goacc_thread
*thr
= goacc_thread ();
132 /* We don't have to call lazy open here, as the ptr value must have
133 been returned by acc_malloc. It's not permitted to pass NULL in
134 (unless you got that null from acc_malloc). */
135 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
139 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
141 acc_unmap_data ((void *)(k
->host_start
+ offset
));
144 base_dev
->free_func (thr
->dev
->target_id
, d
);
148 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
150 /* No need to call lazy open here, as the device pointer must have
151 been obtained from a routine that did that. */
152 struct goacc_thread
*thr
= goacc_thread ();
154 base_dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
158 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
160 /* No need to call lazy open here, as the device pointer must have
161 been obtained from a routine that did that. */
162 struct goacc_thread
*thr
= goacc_thread ();
164 base_dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
167 /* Return the device pointer that corresponds to host data H. Or NULL
171 acc_deviceptr (void *h
)
177 ACC_lazy_initialize ();
179 struct goacc_thread
*thr
= goacc_thread ();
181 n
= lookup_host (&thr
->dev
->mem_map
, h
, 1);
186 offset
= h
- n
->host_start
;
188 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
193 /* Return the host pointer that corresponds to device data D. Or NULL
197 acc_hostptr (void *d
)
203 ACC_lazy_initialize ();
205 struct goacc_thread
*thr
= goacc_thread ();
207 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
212 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
214 h
= n
->host_start
+ offset
;
219 /* Return 1 if host data [H,+S] is present on the device. */
222 acc_is_present (void *h
, size_t s
)
229 ACC_lazy_initialize ();
231 struct goacc_thread
*thr
= goacc_thread ();
232 struct gomp_device_descr
*acc_dev
= thr
->dev
;
234 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
236 if (n
&& ((uintptr_t)h
< n
->host_start
237 || (uintptr_t)h
+ s
> n
->host_end
238 || s
> n
->host_end
- n
->host_start
))
244 /* Create a mapping for host [H,+S] -> device [D,+S] */
247 acc_map_data (void *h
, void *d
, size_t s
)
249 struct target_mem_desc
*tgt
;
254 unsigned short kinds
= GOMP_MAP_ALLOC
;
256 ACC_lazy_initialize ();
258 struct goacc_thread
*thr
= goacc_thread ();
259 struct gomp_device_descr
*acc_dev
= thr
->dev
;
261 if (acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
264 gomp_fatal ("cannot map data on shared-memory system");
266 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
270 struct goacc_thread
*thr
= goacc_thread ();
273 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
274 (void *)h
, (int)s
, (void *)d
, (int)s
);
276 if (lookup_host (&acc_dev
->mem_map
, h
, s
))
277 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
280 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
281 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
284 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
285 &kinds
, true, false);
288 tgt
->prev
= acc_dev
->openacc
.data_environ
;
289 acc_dev
->openacc
.data_environ
= tgt
;
293 acc_unmap_data (void *h
)
295 struct goacc_thread
*thr
= goacc_thread ();
296 struct gomp_device_descr
*acc_dev
= thr
->dev
;
298 /* No need to call lazy open, as the address must have been mapped. */
301 splay_tree_key n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
302 struct target_mem_desc
*t
;
305 gomp_fatal ("%p is not a mapped block", (void *)h
);
307 host_size
= n
->host_end
- n
->host_start
;
309 if (n
->host_start
!= (uintptr_t) h
)
310 gomp_fatal ("[%p,%d] surrounds1 %p",
311 (void *) n
->host_start
, (int) host_size
, (void *) h
);
315 if (t
->refcount
== 2)
317 struct target_mem_desc
*tp
;
319 /* This is the last reference, so pull the descriptor off the
320 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
321 freeing the device memory. */
325 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
327 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
334 acc_dev
->openacc
.data_environ
= t
->prev
;
339 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
342 gomp_unmap_vars (t
, true);
345 #define PCC_Present (1 << 0)
346 #define PCC_Create (1 << 1)
347 #define PCC_Copy (1 << 2)
349 attribute_hidden
void *
350 present_create_copy (unsigned f
, void *h
, size_t s
)
356 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
358 ACC_lazy_initialize ();
360 struct goacc_thread
*thr
= goacc_thread ();
361 struct gomp_device_descr
*acc_dev
= thr
->dev
;
363 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
367 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
369 if (!(f
& PCC_Present
))
370 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
371 (void *)h
, (int)s
, (void *)d
, (int)s
);
372 if ((h
+ s
) > (void *)n
->host_end
)
373 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
375 else if (!(f
& PCC_Create
))
377 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
381 struct target_mem_desc
*tgt
;
383 unsigned short kinds
;
387 kinds
= GOMP_MAP_ALLOC_TO
;
389 kinds
= GOMP_MAP_ALLOC
;
391 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
394 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
397 tgt
->prev
= acc_dev
->openacc
.data_environ
;
398 acc_dev
->openacc
.data_environ
= tgt
;
400 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
407 acc_create (void *h
, size_t s
)
409 return present_create_copy (PCC_Create
, h
, s
);
413 acc_copyin (void *h
, size_t s
)
415 return present_create_copy (PCC_Create
| PCC_Copy
, h
, s
);
419 acc_present_or_create (void *h
, size_t s
)
421 return present_create_copy (PCC_Present
| PCC_Create
, h
, s
);
425 acc_present_or_copyin (void *h
, size_t s
)
427 return present_create_copy (PCC_Present
| PCC_Create
| PCC_Copy
, h
, s
);
430 #define DC_Copyout (1 << 0)
433 delete_copyout (unsigned f
, void *h
, size_t s
)
438 struct goacc_thread
*thr
= goacc_thread ();
439 struct gomp_device_descr
*acc_dev
= thr
->dev
;
441 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
443 /* No need to call lazy open, as the data must already have been
447 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
449 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
451 host_size
= n
->host_end
- n
->host_start
;
453 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
454 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
455 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
458 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
462 acc_dev
->free_func (acc_dev
->target_id
, d
);
466 acc_delete (void *h
, size_t s
)
468 delete_copyout (0, h
, s
);
471 void acc_copyout (void *h
, size_t s
)
473 delete_copyout (DC_Copyout
, h
, s
);
477 update_dev_host (int is_dev
, void *h
, size_t s
)
481 struct goacc_thread
*thr
= goacc_thread ();
482 struct gomp_device_descr
*acc_dev
= thr
->dev
;
484 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
486 /* No need to call lazy open, as the data must already have been
490 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
492 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
495 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
497 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
501 acc_update_device (void *h
, size_t s
)
503 update_dev_host (1, h
, s
);
507 acc_update_self (void *h
, size_t s
)
509 update_dev_host (0, h
, s
);
513 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
516 struct target_mem_desc
*tgt
;
517 struct goacc_thread
*thr
= goacc_thread ();
518 struct gomp_device_descr
*acc_dev
= thr
->dev
;
520 gomp_notify (" %s: prepare mappings\n", __FUNCTION__
);
521 tgt
= gomp_map_vars ((struct gomp_device_descr
*) acc_dev
, mapnum
, hostaddrs
,
522 NULL
, sizes
, kinds
, true, false);
523 gomp_notify (" %s: mappings prepared\n", __FUNCTION__
);
524 tgt
->prev
= acc_dev
->openacc
.data_environ
;
525 acc_dev
->openacc
.data_environ
= tgt
;
529 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
531 struct goacc_thread
*thr
= goacc_thread ();
532 struct gomp_device_descr
*acc_dev
= thr
->dev
;
534 struct target_mem_desc
*t
;
535 int minrefs
= (mapnum
== 1) ? 2 : 3;
537 n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
540 gomp_fatal ("%p is not a mapped block", (void *)h
);
542 gomp_notify (" %s: restore mappings\n", __FUNCTION__
);
546 struct target_mem_desc
*tp
;
548 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
550 if (t
->refcount
== minrefs
)
552 /* This is the last reference, so pull the descriptor off the
553 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
554 freeing the device memory. */
558 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
566 acc_dev
->openacc
.data_environ
= t
->prev
;
573 t
->list
[0]->copy_from
= 1;
575 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
577 /* If running synchronously, unmap immediately. */
578 if (async
< acc_async_noval
)
579 gomp_unmap_vars (t
, true);
582 gomp_copy_from_async (t
);
583 acc_dev
->openacc
.register_async_cleanup_func (t
);
586 gomp_notify (" %s: mappings restored\n", __FUNCTION__
);