1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2014 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
33 #include "libgomp_target.h"
39 #include "splay-tree.h"
41 /* Return block containing [H->S), or NULL if not contained. */
43 attribute_hidden splay_tree_key
44 lookup_host (struct gomp_memory_mapping
*mem_map
, void *h
, size_t s
)
46 struct splay_tree_key_s node
;
49 node
.host_start
= (uintptr_t) h
;
50 node
.host_end
= (uintptr_t) h
+ s
;
52 gomp_mutex_lock (&mem_map
->lock
);
54 key
= splay_tree_lookup (&mem_map
->splay_tree
, &node
);
56 gomp_mutex_unlock (&mem_map
->lock
);
61 /* Return block containing [D->S), or NULL if not contained.
62 The list isn't ordered by device address, so we have to iterate
63 over the whole array. This is not expected to be a common
67 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
70 struct target_mem_desc
*t
;
71 struct gomp_memory_mapping
*mem_map
;
76 mem_map
= tgt
->mem_map
;
78 gomp_mutex_lock (&mem_map
->lock
);
80 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
82 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
86 gomp_mutex_unlock (&mem_map
->lock
);
91 for (i
= 0; i
< t
->list_count
; i
++)
95 splay_tree_key k
= &t
->array
[i
].key
;
96 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
98 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
105 /* OpenACC is silent on how memory exhaustion is indicated. We return
109 acc_malloc (size_t s
)
114 goacc_lazy_initialize ();
116 struct goacc_thread
*thr
= goacc_thread ();
118 return base_dev
->alloc_func (thr
->dev
->target_id
, s
);
121 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
122 the device address is mapped. We choose to check if it mapped,
123 and if it is, to unmap it. */
128 struct goacc_thread
*thr
= goacc_thread ();
133 /* We don't have to call lazy open here, as the ptr value must have
134 been returned by acc_malloc. It's not permitted to pass NULL in
135 (unless you got that null from acc_malloc). */
136 if ((k
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1)))
140 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
142 acc_unmap_data ((void *)(k
->host_start
+ offset
));
145 base_dev
->free_func (thr
->dev
->target_id
, d
);
149 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
151 /* No need to call lazy open here, as the device pointer must have
152 been obtained from a routine that did that. */
153 struct goacc_thread
*thr
= goacc_thread ();
155 base_dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
159 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
161 /* No need to call lazy open here, as the device pointer must have
162 been obtained from a routine that did that. */
163 struct goacc_thread
*thr
= goacc_thread ();
165 base_dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
168 /* Return the device pointer that corresponds to host data H. Or NULL
172 acc_deviceptr (void *h
)
178 goacc_lazy_initialize ();
180 struct goacc_thread
*thr
= goacc_thread ();
182 n
= lookup_host (&thr
->dev
->mem_map
, h
, 1);
187 offset
= h
- n
->host_start
;
189 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
194 /* Return the host pointer that corresponds to device data D. Or NULL
198 acc_hostptr (void *d
)
204 goacc_lazy_initialize ();
206 struct goacc_thread
*thr
= goacc_thread ();
208 n
= lookup_dev (thr
->dev
->openacc
.data_environ
, d
, 1);
213 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
215 h
= n
->host_start
+ offset
;
220 /* Return 1 if host data [H,+S] is present on the device. */
223 acc_is_present (void *h
, size_t s
)
230 goacc_lazy_initialize ();
232 struct goacc_thread
*thr
= goacc_thread ();
233 struct gomp_device_descr
*acc_dev
= thr
->dev
;
235 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
237 if (n
&& ((uintptr_t)h
< n
->host_start
238 || (uintptr_t)h
+ s
> n
->host_end
239 || s
> n
->host_end
- n
->host_start
))
245 /* Create a mapping for host [H,+S] -> device [D,+S] */
248 acc_map_data (void *h
, void *d
, size_t s
)
250 struct target_mem_desc
*tgt
;
255 unsigned short kinds
= GOMP_MAP_ALLOC
;
257 goacc_lazy_initialize ();
259 struct goacc_thread
*thr
= goacc_thread ();
260 struct gomp_device_descr
*acc_dev
= thr
->dev
;
262 if (acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
265 gomp_fatal ("cannot map data on shared-memory system");
267 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
271 struct goacc_thread
*thr
= goacc_thread ();
274 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
275 (void *)h
, (int)s
, (void *)d
, (int)s
);
277 if (lookup_host (&acc_dev
->mem_map
, h
, s
))
278 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
281 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
282 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
285 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
286 &kinds
, true, false);
289 tgt
->prev
= acc_dev
->openacc
.data_environ
;
290 acc_dev
->openacc
.data_environ
= tgt
;
294 acc_unmap_data (void *h
)
296 struct goacc_thread
*thr
= goacc_thread ();
297 struct gomp_device_descr
*acc_dev
= thr
->dev
;
299 /* No need to call lazy open, as the address must have been mapped. */
302 splay_tree_key n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
303 struct target_mem_desc
*t
;
306 gomp_fatal ("%p is not a mapped block", (void *)h
);
308 host_size
= n
->host_end
- n
->host_start
;
310 if (n
->host_start
!= (uintptr_t) h
)
311 gomp_fatal ("[%p,%d] surrounds1 %p",
312 (void *) n
->host_start
, (int) host_size
, (void *) h
);
316 if (t
->refcount
== 2)
318 struct target_mem_desc
*tp
;
320 /* This is the last reference, so pull the descriptor off the
321 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
322 freeing the device memory. */
326 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
328 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
335 acc_dev
->openacc
.data_environ
= t
->prev
;
340 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
343 gomp_unmap_vars (t
, true);
346 #define PCC_Present (1 << 0)
347 #define PCC_Create (1 << 1)
348 #define PCC_Copy (1 << 2)
350 attribute_hidden
void *
351 present_create_copy (unsigned f
, void *h
, size_t s
)
357 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
359 goacc_lazy_initialize ();
361 struct goacc_thread
*thr
= goacc_thread ();
362 struct gomp_device_descr
*acc_dev
= thr
->dev
;
364 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
368 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
370 if (!(f
& PCC_Present
))
371 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
372 (void *)h
, (int)s
, (void *)d
, (int)s
);
373 if ((h
+ s
) > (void *)n
->host_end
)
374 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
376 else if (!(f
& PCC_Create
))
378 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
382 struct target_mem_desc
*tgt
;
384 unsigned short kinds
;
388 kinds
= GOMP_MAP_ALLOC_TO
;
390 kinds
= GOMP_MAP_ALLOC
;
392 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
395 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
398 tgt
->prev
= acc_dev
->openacc
.data_environ
;
399 acc_dev
->openacc
.data_environ
= tgt
;
401 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
408 acc_create (void *h
, size_t s
)
410 return present_create_copy (PCC_Create
, h
, s
);
414 acc_copyin (void *h
, size_t s
)
416 return present_create_copy (PCC_Create
| PCC_Copy
, h
, s
);
420 acc_present_or_create (void *h
, size_t s
)
422 return present_create_copy (PCC_Present
| PCC_Create
, h
, s
);
426 acc_present_or_copyin (void *h
, size_t s
)
428 return present_create_copy (PCC_Present
| PCC_Create
| PCC_Copy
, h
, s
);
431 #define DC_Copyout (1 << 0)
434 delete_copyout (unsigned f
, void *h
, size_t s
)
439 struct goacc_thread
*thr
= goacc_thread ();
440 struct gomp_device_descr
*acc_dev
= thr
->dev
;
442 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
444 /* No need to call lazy open, as the data must already have been
448 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
450 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
452 host_size
= n
->host_end
- n
->host_start
;
454 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
455 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
456 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
459 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
463 acc_dev
->free_func (acc_dev
->target_id
, d
);
467 acc_delete (void *h
, size_t s
)
469 delete_copyout (0, h
, s
);
472 void acc_copyout (void *h
, size_t s
)
474 delete_copyout (DC_Copyout
, h
, s
);
478 update_dev_host (int is_dev
, void *h
, size_t s
)
482 struct goacc_thread
*thr
= goacc_thread ();
483 struct gomp_device_descr
*acc_dev
= thr
->dev
;
485 n
= lookup_host (&acc_dev
->mem_map
, h
, s
);
487 /* No need to call lazy open, as the data must already have been
491 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
493 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
496 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
498 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
502 acc_update_device (void *h
, size_t s
)
504 update_dev_host (1, h
, s
);
508 acc_update_self (void *h
, size_t s
)
510 update_dev_host (0, h
, s
);
514 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
517 struct target_mem_desc
*tgt
;
518 struct goacc_thread
*thr
= goacc_thread ();
519 struct gomp_device_descr
*acc_dev
= thr
->dev
;
521 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
522 tgt
= gomp_map_vars ((struct gomp_device_descr
*) acc_dev
, mapnum
, hostaddrs
,
523 NULL
, sizes
, kinds
, true, false);
524 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
525 tgt
->prev
= acc_dev
->openacc
.data_environ
;
526 acc_dev
->openacc
.data_environ
= tgt
;
530 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
532 struct goacc_thread
*thr
= goacc_thread ();
533 struct gomp_device_descr
*acc_dev
= thr
->dev
;
535 struct target_mem_desc
*t
;
536 int minrefs
= (mapnum
== 1) ? 2 : 3;
538 n
= lookup_host (&acc_dev
->mem_map
, h
, 1);
541 gomp_fatal ("%p is not a mapped block", (void *)h
);
543 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
547 struct target_mem_desc
*tp
;
549 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
551 if (t
->refcount
== minrefs
)
553 /* This is the last reference, so pull the descriptor off the
554 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
555 freeing the device memory. */
559 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
567 acc_dev
->openacc
.data_environ
= t
->prev
;
574 t
->list
[0]->copy_from
= 1;
576 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
578 /* If running synchronously, unmap immediately. */
579 if (async
< acc_async_noval
)
580 gomp_unmap_vars (t
, true);
583 gomp_copy_from_async (t
);
584 acc_dev
->openacc
.register_async_cleanup_func (t
);
587 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);