1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
34 #include "splay-tree.h"
38 /* Return block containing [H->S), or NULL if not contained. */
41 lookup_host (struct gomp_memory_mapping
*mem_map
, void *h
, size_t s
)
43 struct splay_tree_key_s node
;
46 node
.host_start
= (uintptr_t) h
;
47 node
.host_end
= (uintptr_t) h
+ s
;
49 gomp_mutex_lock (&mem_map
->lock
);
51 key
= splay_tree_lookup (&mem_map
->splay_tree
, &node
);
53 gomp_mutex_unlock (&mem_map
->lock
);
58 /* Return block containing [D->S), or NULL if not contained.
59 The list isn't ordered by device address, so we have to iterate
60 over the whole array. This is not expected to be a common
64 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
67 struct target_mem_desc
*t
;
68 struct gomp_memory_mapping
*mem_map
;
73 mem_map
= tgt
->mem_map
;
75 gomp_mutex_lock (&mem_map
->lock
);
77 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
79 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
83 gomp_mutex_unlock (&mem_map
->lock
);
88 for (i
= 0; i
< t
->list_count
; i
++)
92 splay_tree_key k
= &t
->array
[i
].key
;
93 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
95 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
102 /* OpenACC is silent on how memory exhaustion is indicated. We return
106 acc_malloc (size_t s
)
111 goacc_lazy_initialize ();
113 struct goacc_thread
*thr
= goacc_thread ();
115 return base_dev
->alloc_func (thr
->dev
->target_id
, s
);
118 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
119 the device address is mapped. We choose to check if it mapped,
120 and if it is, to unmap it. */
125 struct goacc_thread
*thr
= goacc_thread ();
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
137 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
139 acc_unmap_data ((void *)(k
->host_start
+ offset
));
142 base_dev
->free_func (thr
->dev
->target_id
, d
);
146 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
148 /* No need to call lazy open here, as the device pointer must have
149 been obtained from a routine that did that. */
150 struct goacc_thread
*thr
= goacc_thread ();
152 base_dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
156 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
158 /* No need to call lazy open here, as the device pointer must have
159 been obtained from a routine that did that. */
160 struct goacc_thread
*thr
= goacc_thread ();
162 base_dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
165 /* Return the device pointer that corresponds to host data H. Or NULL
169 acc_deviceptr (void *h
)
175 goacc_lazy_initialize ();
177 struct goacc_thread
*thr
= goacc_thread ();
179 n
= lookup_host (&thr
->dev
->mem_map
, h
, 1);
184 offset
= h
- n
->host_start
;
186 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
191 /* Return the host pointer that corresponds to device data D. Or NULL
195 acc_hostptr (void *d
)
201 goacc_lazy_initialize ();
203 struct goacc_thread
*thr
= goacc_thread ();
205 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
210 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
212 h
= n
->host_start
+ offset
;
217 /* Return 1 if host data [H,+S] is present on the device. */
220 acc_is_present (void *h
, size_t s
)
227 goacc_lazy_initialize ();
229 struct goacc_thread
*thr
= goacc_thread ();
230 struct gomp_device_descr
*acc_dev
= thr
->dev
;
232 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
234 if (n
&& ((uintptr_t)h
< n
->host_start
235 || (uintptr_t)h
+ s
> n
->host_end
236 || s
> n
->host_end
- n
->host_start
))
242 /* Create a mapping for host [H,+S] -> device [D,+S] */
245 acc_map_data (void *h
, void *d
, size_t s
)
247 struct target_mem_desc
*tgt
;
252 unsigned short kinds
= GOMP_MAP_ALLOC
;
254 goacc_lazy_initialize ();
256 struct goacc_thread
*thr
= goacc_thread ();
257 struct gomp_device_descr
*acc_dev
= thr
->dev
;
259 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
262 gomp_fatal ("cannot map data on shared-memory system");
264 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
268 struct goacc_thread
*thr
= goacc_thread ();
271 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
272 (void *)h
, (int)s
, (void *)d
, (int)s
);
274 if (lookup_host (&acc_dev
->mem_map
, h
, s
))
275 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
278 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
279 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
282 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
283 &kinds
, true, false);
286 tgt
->prev
= acc_dev
->openacc
.data_environ
;
287 acc_dev
->openacc
.data_environ
= tgt
;
291 acc_unmap_data (void *h
)
293 struct goacc_thread
*thr
= goacc_thread ();
294 struct gomp_device_descr
*acc_dev
= thr
->dev
;
296 /* No need to call lazy open, as the address must have been mapped. */
299 splay_tree_key n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
300 struct target_mem_desc
*t
;
303 gomp_fatal ("%p is not a mapped block", (void *)h
);
305 host_size
= n
->host_end
- n
->host_start
;
307 if (n
->host_start
!= (uintptr_t) h
)
308 gomp_fatal ("[%p,%d] surrounds1 %p",
309 (void *) n
->host_start
, (int) host_size
, (void *) h
);
313 if (t
->refcount
== 2)
315 struct target_mem_desc
*tp
;
317 /* This is the last reference, so pull the descriptor off the
318 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
319 freeing the device memory. */
323 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
325 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
332 acc_dev
->openacc
.data_environ
= t
->prev
;
337 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
340 gomp_unmap_vars (t
, true);
343 #define FLAG_PRESENT (1 << 0)
344 #define FLAG_CREATE (1 << 1)
345 #define FLAG_COPY (1 << 2)
348 present_create_copy (unsigned f
, void *h
, size_t s
)
354 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
356 goacc_lazy_initialize ();
358 struct goacc_thread
*thr
= goacc_thread ();
359 struct gomp_device_descr
*acc_dev
= thr
->dev
;
361 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
365 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
367 if (!(f
& FLAG_PRESENT
))
368 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
369 (void *)h
, (int)s
, (void *)d
, (int)s
);
370 if ((h
+ s
) > (void *)n
->host_end
)
371 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
373 else if (!(f
& FLAG_CREATE
))
375 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
379 struct target_mem_desc
*tgt
;
381 unsigned short kinds
;
387 kinds
= GOMP_MAP_ALLOC
;
389 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
392 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
395 tgt
->prev
= acc_dev
->openacc
.data_environ
;
396 acc_dev
->openacc
.data_environ
= tgt
;
398 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
405 acc_create (void *h
, size_t s
)
407 return present_create_copy (FLAG_CREATE
, h
, s
);
411 acc_copyin (void *h
, size_t s
)
413 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
417 acc_present_or_create (void *h
, size_t s
)
419 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
423 acc_present_or_copyin (void *h
, size_t s
)
425 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
428 #define FLAG_COPYOUT (1 << 0)
431 delete_copyout (unsigned f
, void *h
, size_t s
)
436 struct goacc_thread
*thr
= goacc_thread ();
437 struct gomp_device_descr
*acc_dev
= thr
->dev
;
439 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
441 /* No need to call lazy open, as the data must already have been
445 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
447 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
449 host_size
= n
->host_end
- n
->host_start
;
451 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
452 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
453 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
455 if (f
& FLAG_COPYOUT
)
456 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
460 acc_dev
->free_func (acc_dev
->target_id
, d
);
464 acc_delete (void *h
, size_t s
)
466 delete_copyout (0, h
, s
);
469 void acc_copyout (void *h
, size_t s
)
471 delete_copyout (FLAG_COPYOUT
, h
, s
);
475 update_dev_host (int is_dev
, void *h
, size_t s
)
479 struct goacc_thread
*thr
= goacc_thread ();
480 struct gomp_device_descr
*acc_dev
= thr
->dev
;
482 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
484 /* No need to call lazy open, as the data must already have been
488 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
490 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
493 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
495 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
499 acc_update_device (void *h
, size_t s
)
501 update_dev_host (1, h
, s
);
505 acc_update_self (void *h
, size_t s
)
507 update_dev_host (0, h
, s
);
511 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
514 struct target_mem_desc
*tgt
;
515 struct goacc_thread
*thr
= goacc_thread ();
516 struct gomp_device_descr
*acc_dev
= thr
->dev
;
518 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
519 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
520 NULL
, sizes
, kinds
, true, false);
521 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
522 tgt
->prev
= acc_dev
->openacc
.data_environ
;
523 acc_dev
->openacc
.data_environ
= tgt
;
527 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
529 struct goacc_thread
*thr
= goacc_thread ();
530 struct gomp_device_descr
*acc_dev
= thr
->dev
;
532 struct target_mem_desc
*t
;
533 int minrefs
= (mapnum
== 1) ? 2 : 3;
535 n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
538 gomp_fatal ("%p is not a mapped block", (void *)h
);
540 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
544 struct target_mem_desc
*tp
;
546 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
548 if (t
->refcount
== minrefs
)
550 /* This is the last reference, so pull the descriptor off the
551 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
552 freeing the device memory. */
556 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
564 acc_dev
->openacc
.data_environ
= t
->prev
;
571 t
->list
[0]->copy_from
= 1;
573 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
575 /* If running synchronously, unmap immediately. */
576 if (async
< acc_async_noval
)
577 gomp_unmap_vars (t
, true);
580 gomp_copy_from_async (t
);
581 acc_dev
->openacc
.register_async_cleanup_func (t
);
584 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);