gcc/testsuite
[official-gcc.git] / libgomp / oacc-mem.c
blob7fcf1990ce123d0f98da976bbe28646d20a90104
1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include "splay-tree.h"
35 #include <stdint.h>
36 #include <assert.h>
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
41 static splay_tree_key
42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
44 struct splay_tree_key_s node;
45 splay_tree_key key;
47 node.host_start = (uintptr_t) h;
48 node.host_end = (uintptr_t) h + s;
50 key = splay_tree_lookup (&dev->mem_map, &node);
52 return key;
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
64 int i;
65 struct target_mem_desc *t;
67 if (!tgt)
68 return NULL;
70 for (t = tgt; t != NULL; t = t->prev)
72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73 break;
76 if (!t)
77 return NULL;
79 for (i = 0; i < t->list_count; i++)
81 void * offset;
83 splay_tree_key k = &t->array[i].key;
84 offset = d - t->tgt_start + k->tgt_offset;
86 if (k->host_start + offset <= (void *) k->host_end)
87 return k;
90 return NULL;
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
94 NULL. */
96 void *
97 acc_malloc (size_t s)
99 if (!s)
100 return NULL;
102 goacc_lazy_initialize ();
104 struct goacc_thread *thr = goacc_thread ();
106 assert (thr->dev);
108 return thr->dev->alloc_func (thr->dev->target_id, s);
111 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
112 the device address is mapped. We choose to check if it mapped,
113 and if it is, to unmap it. */
114 void
115 acc_free (void *d)
117 splay_tree_key k;
119 if (!d)
120 return;
122 struct goacc_thread *thr = goacc_thread ();
124 assert (thr && thr->dev);
126 struct gomp_device_descr *acc_dev = thr->dev;
128 gomp_mutex_lock (&acc_dev->lock);
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
135 void *offset;
137 offset = d - k->tgt->tgt_start + k->tgt_offset;
139 gomp_mutex_unlock (&acc_dev->lock);
141 acc_unmap_data ((void *)(k->host_start + offset));
143 else
144 gomp_mutex_unlock (&acc_dev->lock);
146 acc_dev->free_func (acc_dev->target_id, d);
149 void
150 acc_memcpy_to_device (void *d, void *h, size_t s)
152 /* No need to call lazy open here, as the device pointer must have
153 been obtained from a routine that did that. */
154 struct goacc_thread *thr = goacc_thread ();
156 assert (thr && thr->dev);
158 thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
161 void
162 acc_memcpy_from_device (void *h, void *d, size_t s)
164 /* No need to call lazy open here, as the device pointer must have
165 been obtained from a routine that did that. */
166 struct goacc_thread *thr = goacc_thread ();
168 assert (thr && thr->dev);
170 thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
173 /* Return the device pointer that corresponds to host data H. Or NULL
174 if no mapping. */
176 void *
177 acc_deviceptr (void *h)
179 splay_tree_key n;
180 void *d;
181 void *offset;
183 goacc_lazy_initialize ();
185 struct goacc_thread *thr = goacc_thread ();
186 struct gomp_device_descr *dev = thr->dev;
188 gomp_mutex_lock (&dev->lock);
190 n = lookup_host (dev, h, 1);
192 if (!n)
194 gomp_mutex_unlock (&dev->lock);
195 return NULL;
198 offset = h - n->host_start;
200 d = n->tgt->tgt_start + n->tgt_offset + offset;
202 gomp_mutex_unlock (&dev->lock);
204 return d;
207 /* This function is used as a helper in generated code to implement pointer
208 lookup in host_data regions. Unlike acc_deviceptr, it returns its argument
209 unchanged on a shared-memory system (e.g. the host). */
211 void *
212 GOACC_deviceptr (void *h)
214 splay_tree_key n;
215 void *d;
216 void *offset;
218 goacc_lazy_initialize ();
220 struct goacc_thread *thr = goacc_thread ();
222 if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0)
224 n = lookup_host (thr->dev, h, 1);
226 if (!n)
227 return NULL;
229 offset = h - n->host_start;
231 d = n->tgt->tgt_start + n->tgt_offset + offset;
233 return d;
235 else
236 return h;
239 /* Return the host pointer that corresponds to device data D. Or NULL
240 if no mapping. */
242 void *
243 acc_hostptr (void *d)
245 splay_tree_key n;
246 void *h;
247 void *offset;
249 goacc_lazy_initialize ();
251 struct goacc_thread *thr = goacc_thread ();
252 struct gomp_device_descr *acc_dev = thr->dev;
254 gomp_mutex_lock (&acc_dev->lock);
256 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
258 if (!n)
260 gomp_mutex_unlock (&acc_dev->lock);
261 return NULL;
264 offset = d - n->tgt->tgt_start + n->tgt_offset;
266 h = n->host_start + offset;
268 gomp_mutex_unlock (&acc_dev->lock);
270 return h;
273 /* Return 1 if host data [H,+S] is present on the device. */
276 acc_is_present (void *h, size_t s)
278 splay_tree_key n;
280 if (!s || !h)
281 return 0;
283 goacc_lazy_initialize ();
285 struct goacc_thread *thr = goacc_thread ();
286 struct gomp_device_descr *acc_dev = thr->dev;
288 gomp_mutex_lock (&acc_dev->lock);
290 n = lookup_host (acc_dev, h, s);
292 if (n && ((uintptr_t)h < n->host_start
293 || (uintptr_t)h + s > n->host_end
294 || s > n->host_end - n->host_start))
295 n = NULL;
297 gomp_mutex_unlock (&acc_dev->lock);
299 return n != NULL;
302 /* Create a mapping for host [H,+S] -> device [D,+S] */
304 void
305 acc_map_data (void *h, void *d, size_t s)
307 struct target_mem_desc *tgt;
308 size_t mapnum = 1;
309 void *hostaddrs = h;
310 void *devaddrs = d;
311 size_t sizes = s;
312 unsigned short kinds = GOMP_MAP_ALLOC;
314 goacc_lazy_initialize ();
316 struct goacc_thread *thr = goacc_thread ();
317 struct gomp_device_descr *acc_dev = thr->dev;
319 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
321 if (d != h)
322 gomp_fatal ("cannot map data on shared-memory system");
324 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
326 else
328 struct goacc_thread *thr = goacc_thread ();
330 if (!d || !h || !s)
331 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
332 (void *)h, (int)s, (void *)d, (int)s);
334 gomp_mutex_lock (&acc_dev->lock);
336 if (lookup_host (acc_dev, h, s))
338 gomp_mutex_unlock (&acc_dev->lock);
339 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
340 (int)s);
343 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
345 gomp_mutex_unlock (&acc_dev->lock);
346 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
347 (int)s);
350 gomp_mutex_unlock (&acc_dev->lock);
352 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
353 &kinds, true, false);
356 gomp_mutex_lock (&acc_dev->lock);
357 tgt->prev = acc_dev->openacc.data_environ;
358 acc_dev->openacc.data_environ = tgt;
359 gomp_mutex_unlock (&acc_dev->lock);
362 void
363 acc_unmap_data (void *h)
365 struct goacc_thread *thr = goacc_thread ();
366 struct gomp_device_descr *acc_dev = thr->dev;
368 /* No need to call lazy open, as the address must have been mapped. */
370 size_t host_size;
372 gomp_mutex_lock (&acc_dev->lock);
374 splay_tree_key n = lookup_host (acc_dev, h, 1);
375 struct target_mem_desc *t;
377 if (!n)
379 gomp_mutex_unlock (&acc_dev->lock);
380 gomp_fatal ("%p is not a mapped block", (void *)h);
383 host_size = n->host_end - n->host_start;
385 if (n->host_start != (uintptr_t) h)
387 gomp_mutex_unlock (&acc_dev->lock);
388 gomp_fatal ("[%p,%d] surrounds %p",
389 (void *) n->host_start, (int) host_size, (void *) h);
392 t = n->tgt;
394 if (t->refcount == 2)
396 struct target_mem_desc *tp;
398 /* This is the last reference, so pull the descriptor off the
399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400 freeing the device memory. */
401 t->tgt_end = 0;
402 t->to_free = 0;
404 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
405 tp = t, t = t->prev)
406 if (n->tgt == t)
408 if (tp)
409 tp->prev = t->prev;
410 else
411 acc_dev->openacc.data_environ = t->prev;
413 break;
417 gomp_mutex_unlock (&acc_dev->lock);
419 gomp_unmap_vars (t, true);
422 #define FLAG_PRESENT (1 << 0)
423 #define FLAG_CREATE (1 << 1)
424 #define FLAG_COPY (1 << 2)
426 static void *
427 present_create_copy (unsigned f, void *h, size_t s)
429 void *d;
430 splay_tree_key n;
432 if (!h || !s)
433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
435 goacc_lazy_initialize ();
437 struct goacc_thread *thr = goacc_thread ();
438 struct gomp_device_descr *acc_dev = thr->dev;
440 gomp_mutex_lock (&acc_dev->lock);
442 n = lookup_host (acc_dev, h, s);
443 if (n)
445 /* Present. */
446 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
448 if (!(f & FLAG_PRESENT))
450 gomp_mutex_unlock (&acc_dev->lock);
451 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
452 (void *)h, (int)s, (void *)d, (int)s);
454 if ((h + s) > (void *)n->host_end)
456 gomp_mutex_unlock (&acc_dev->lock);
457 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
460 gomp_mutex_unlock (&acc_dev->lock);
462 else if (!(f & FLAG_CREATE))
464 gomp_mutex_unlock (&acc_dev->lock);
465 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
467 else
469 struct target_mem_desc *tgt;
470 size_t mapnum = 1;
471 unsigned short kinds;
472 void *hostaddrs = h;
474 if (f & FLAG_COPY)
475 kinds = GOMP_MAP_TO;
476 else
477 kinds = GOMP_MAP_ALLOC;
479 gomp_mutex_unlock (&acc_dev->lock);
481 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
482 false);
484 gomp_mutex_lock (&acc_dev->lock);
486 d = tgt->to_free;
487 tgt->prev = acc_dev->openacc.data_environ;
488 acc_dev->openacc.data_environ = tgt;
490 gomp_mutex_unlock (&acc_dev->lock);
493 return d;
496 void *
497 acc_create (void *h, size_t s)
499 return present_create_copy (FLAG_CREATE, h, s);
502 void *
503 acc_copyin (void *h, size_t s)
505 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
508 void *
509 acc_present_or_create (void *h, size_t s)
511 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
514 void *
515 acc_present_or_copyin (void *h, size_t s)
517 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
520 #define FLAG_COPYOUT (1 << 0)
522 static void
523 delete_copyout (unsigned f, void *h, size_t s)
525 size_t host_size;
526 splay_tree_key n;
527 void *d;
528 struct goacc_thread *thr = goacc_thread ();
529 struct gomp_device_descr *acc_dev = thr->dev;
531 gomp_mutex_lock (&acc_dev->lock);
533 n = lookup_host (acc_dev, h, s);
535 /* No need to call lazy open, as the data must already have been
536 mapped. */
538 if (!n)
540 gomp_mutex_unlock (&acc_dev->lock);
541 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
544 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
546 host_size = n->host_end - n->host_start;
548 if (n->host_start != (uintptr_t) h || host_size != s)
550 gomp_mutex_unlock (&acc_dev->lock);
551 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
552 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
555 gomp_mutex_unlock (&acc_dev->lock);
557 if (f & FLAG_COPYOUT)
558 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
560 acc_unmap_data (h);
562 acc_dev->free_func (acc_dev->target_id, d);
565 void
566 acc_delete (void *h , size_t s)
568 delete_copyout (0, h, s);
571 void acc_copyout (void *h, size_t s)
573 delete_copyout (FLAG_COPYOUT, h, s);
576 static void
577 update_dev_host (int is_dev, void *h, size_t s)
579 splay_tree_key n;
580 void *d;
581 struct goacc_thread *thr;
582 struct gomp_device_descr *acc_dev;
584 goacc_lazy_initialize ();
586 thr = goacc_thread ();
587 acc_dev = thr->dev;
589 gomp_mutex_lock (&acc_dev->lock);
591 n = lookup_host (acc_dev, h, s);
593 /* No need to call lazy open, as the data must already have been
594 mapped. */
596 if (!n)
598 gomp_mutex_unlock (&acc_dev->lock);
599 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
602 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
604 gomp_mutex_unlock (&acc_dev->lock);
606 if (is_dev)
607 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
608 else
609 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
612 void
613 acc_update_device (void *h, size_t s)
615 update_dev_host (1, h, s);
618 void
619 acc_update_self (void *h, size_t s)
621 update_dev_host (0, h, s);
624 void
625 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
626 void *kinds)
628 struct target_mem_desc *tgt;
629 struct goacc_thread *thr = goacc_thread ();
630 struct gomp_device_descr *acc_dev = thr->dev;
632 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
633 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
634 NULL, sizes, kinds, true, false);
635 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
637 gomp_mutex_lock (&acc_dev->lock);
638 tgt->prev = acc_dev->openacc.data_environ;
639 acc_dev->openacc.data_environ = tgt;
640 gomp_mutex_unlock (&acc_dev->lock);
643 void
644 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
646 struct goacc_thread *thr = goacc_thread ();
647 struct gomp_device_descr *acc_dev = thr->dev;
648 splay_tree_key n;
649 struct target_mem_desc *t;
650 int minrefs = (mapnum == 1) ? 2 : 3;
652 gomp_mutex_lock (&acc_dev->lock);
654 n = lookup_host (acc_dev, h, 1);
656 if (!n)
658 gomp_mutex_unlock (&acc_dev->lock);
659 gomp_fatal ("%p is not a mapped block", (void *)h);
662 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
664 t = n->tgt;
666 struct target_mem_desc *tp;
668 if (t->refcount == minrefs)
670 /* This is the last reference, so pull the descriptor off the
671 chain. This pevents gomp_unmap_vars via gomp_unmap_tgt from
672 freeing the device memory. */
674 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
675 tp = t, t = t->prev)
677 if (n->tgt == t)
679 if (tp)
680 tp->prev = t->prev;
681 else
682 acc_dev->openacc.data_environ = t->prev;
683 break;
688 t->list[0]->copy_from = force_copyfrom ? 1 : 0;
690 gomp_mutex_unlock (&acc_dev->lock);
692 /* If running synchronously, unmap immediately. */
693 if (async < acc_async_noval)
694 gomp_unmap_vars (t, true);
695 else
697 gomp_copy_from_async (t);
698 acc_dev->openacc.register_async_cleanup_func (t);
701 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);