2015-01-18 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / libgomp / oacc-mem.c
blob0096d5142977269a8e68f73de85a5fff16e47b94
1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2015 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include "splay-tree.h"
35 #include <stdint.h>
36 #include <assert.h>
38 /* Return block containing [H->S), or NULL if not contained. */
40 static splay_tree_key
41 lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s)
43 struct splay_tree_key_s node;
44 splay_tree_key key;
46 node.host_start = (uintptr_t) h;
47 node.host_end = (uintptr_t) h + s;
49 gomp_mutex_lock (&mem_map->lock);
51 key = splay_tree_lookup (&mem_map->splay_tree, &node);
53 gomp_mutex_unlock (&mem_map->lock);
55 return key;
58 /* Return block containing [D->S), or NULL if not contained.
59 The list isn't ordered by device address, so we have to iterate
60 over the whole array. This is not expected to be a common
61 operation. */
63 static splay_tree_key
64 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
66 int i;
67 struct target_mem_desc *t;
68 struct gomp_memory_mapping *mem_map;
70 if (!tgt)
71 return NULL;
73 mem_map = tgt->mem_map;
75 gomp_mutex_lock (&mem_map->lock);
77 for (t = tgt; t != NULL; t = t->prev)
79 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
80 break;
83 gomp_mutex_unlock (&mem_map->lock);
85 if (!t)
86 return NULL;
88 for (i = 0; i < t->list_count; i++)
90 void * offset;
92 splay_tree_key k = &t->array[i].key;
93 offset = d - t->tgt_start + k->tgt_offset;
95 if (k->host_start + offset <= (void *) k->host_end)
96 return k;
99 return NULL;
102 /* OpenACC is silent on how memory exhaustion is indicated. We return
103 NULL. */
105 void *
106 acc_malloc (size_t s)
108 if (!s)
109 return NULL;
111 goacc_lazy_initialize ();
113 struct goacc_thread *thr = goacc_thread ();
115 return base_dev->alloc_func (thr->dev->target_id, s);
118 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
119 the device address is mapped. We choose to check if it mapped,
120 and if it is, to unmap it. */
121 void
122 acc_free (void *d)
124 splay_tree_key k;
125 struct goacc_thread *thr = goacc_thread ();
127 if (!d)
128 return;
130 /* We don't have to call lazy open here, as the ptr value must have
131 been returned by acc_malloc. It's not permitted to pass NULL in
132 (unless you got that null from acc_malloc). */
133 if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
135 void *offset;
137 offset = d - k->tgt->tgt_start + k->tgt_offset;
139 acc_unmap_data ((void *)(k->host_start + offset));
142 base_dev->free_func (thr->dev->target_id, d);
145 void
146 acc_memcpy_to_device (void *d, void *h, size_t s)
148 /* No need to call lazy open here, as the device pointer must have
149 been obtained from a routine that did that. */
150 struct goacc_thread *thr = goacc_thread ();
152 base_dev->host2dev_func (thr->dev->target_id, d, h, s);
155 void
156 acc_memcpy_from_device (void *h, void *d, size_t s)
158 /* No need to call lazy open here, as the device pointer must have
159 been obtained from a routine that did that. */
160 struct goacc_thread *thr = goacc_thread ();
162 base_dev->dev2host_func (thr->dev->target_id, h, d, s);
165 /* Return the device pointer that corresponds to host data H. Or NULL
166 if no mapping. */
168 void *
169 acc_deviceptr (void *h)
171 splay_tree_key n;
172 void *d;
173 void *offset;
175 goacc_lazy_initialize ();
177 struct goacc_thread *thr = goacc_thread ();
179 n = lookup_host (&thr->dev->mem_map, h, 1);
181 if (!n)
182 return NULL;
184 offset = h - n->host_start;
186 d = n->tgt->tgt_start + n->tgt_offset + offset;
188 return d;
191 /* Return the host pointer that corresponds to device data D. Or NULL
192 if no mapping. */
194 void *
195 acc_hostptr (void *d)
197 splay_tree_key n;
198 void *h;
199 void *offset;
201 goacc_lazy_initialize ();
203 struct goacc_thread *thr = goacc_thread ();
205 n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
207 if (!n)
208 return NULL;
210 offset = d - n->tgt->tgt_start + n->tgt_offset;
212 h = n->host_start + offset;
214 return h;
217 /* Return 1 if host data [H,+S] is present on the device. */
220 acc_is_present (void *h, size_t s)
222 splay_tree_key n;
224 if (!s || !h)
225 return 0;
227 goacc_lazy_initialize ();
229 struct goacc_thread *thr = goacc_thread ();
230 struct gomp_device_descr *acc_dev = thr->dev;
232 n = lookup_host (&acc_dev->mem_map, h, s);
234 if (n && ((uintptr_t)h < n->host_start
235 || (uintptr_t)h + s > n->host_end
236 || s > n->host_end - n->host_start))
237 n = NULL;
239 return n != NULL;
242 /* Create a mapping for host [H,+S] -> device [D,+S] */
244 void
245 acc_map_data (void *h, void *d, size_t s)
247 struct target_mem_desc *tgt;
248 size_t mapnum = 1;
249 void *hostaddrs = h;
250 void *devaddrs = d;
251 size_t sizes = s;
252 unsigned short kinds = GOMP_MAP_ALLOC;
254 goacc_lazy_initialize ();
256 struct goacc_thread *thr = goacc_thread ();
257 struct gomp_device_descr *acc_dev = thr->dev;
259 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
261 if (d != h)
262 gomp_fatal ("cannot map data on shared-memory system");
264 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
266 else
268 struct goacc_thread *thr = goacc_thread ();
270 if (!d || !h || !s)
271 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
272 (void *)h, (int)s, (void *)d, (int)s);
274 if (lookup_host (&acc_dev->mem_map, h, s))
275 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
276 (int)s);
278 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
279 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
280 (int)s);
282 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
283 &kinds, true, false);
286 tgt->prev = acc_dev->openacc.data_environ;
287 acc_dev->openacc.data_environ = tgt;
290 void
291 acc_unmap_data (void *h)
293 struct goacc_thread *thr = goacc_thread ();
294 struct gomp_device_descr *acc_dev = thr->dev;
296 /* No need to call lazy open, as the address must have been mapped. */
298 size_t host_size;
299 splay_tree_key n = lookup_host (&acc_dev->mem_map, h, 1);
300 struct target_mem_desc *t;
302 if (!n)
303 gomp_fatal ("%p is not a mapped block", (void *)h);
305 host_size = n->host_end - n->host_start;
307 if (n->host_start != (uintptr_t) h)
308 gomp_fatal ("[%p,%d] surrounds1 %p",
309 (void *) n->host_start, (int) host_size, (void *) h);
311 t = n->tgt;
313 if (t->refcount == 2)
315 struct target_mem_desc *tp;
317 /* This is the last reference, so pull the descriptor off the
318 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
319 freeing the device memory. */
320 t->tgt_end = 0;
321 t->to_free = 0;
323 gomp_mutex_lock (&acc_dev->mem_map.lock);
325 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
326 tp = t, t = t->prev)
327 if (n->tgt == t)
329 if (tp)
330 tp->prev = t->prev;
331 else
332 acc_dev->openacc.data_environ = t->prev;
334 break;
337 gomp_mutex_unlock (&acc_dev->mem_map.lock);
340 gomp_unmap_vars (t, true);
343 #define FLAG_PRESENT (1 << 0)
344 #define FLAG_CREATE (1 << 1)
345 #define FLAG_COPY (1 << 2)
347 static void *
348 present_create_copy (unsigned f, void *h, size_t s)
350 void *d;
351 splay_tree_key n;
353 if (!h || !s)
354 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
356 goacc_lazy_initialize ();
358 struct goacc_thread *thr = goacc_thread ();
359 struct gomp_device_descr *acc_dev = thr->dev;
361 n = lookup_host (&acc_dev->mem_map, h, s);
362 if (n)
364 /* Present. */
365 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
367 if (!(f & FLAG_PRESENT))
368 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
369 (void *)h, (int)s, (void *)d, (int)s);
370 if ((h + s) > (void *)n->host_end)
371 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
373 else if (!(f & FLAG_CREATE))
375 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
377 else
379 struct target_mem_desc *tgt;
380 size_t mapnum = 1;
381 unsigned short kinds;
382 void *hostaddrs = h;
384 if (f & FLAG_COPY)
385 kinds = GOMP_MAP_TO;
386 else
387 kinds = GOMP_MAP_ALLOC;
389 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
390 false);
392 gomp_mutex_lock (&acc_dev->mem_map.lock);
394 d = tgt->to_free;
395 tgt->prev = acc_dev->openacc.data_environ;
396 acc_dev->openacc.data_environ = tgt;
398 gomp_mutex_unlock (&acc_dev->mem_map.lock);
401 return d;
404 void *
405 acc_create (void *h, size_t s)
407 return present_create_copy (FLAG_CREATE, h, s);
410 void *
411 acc_copyin (void *h, size_t s)
413 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
416 void *
417 acc_present_or_create (void *h, size_t s)
419 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
422 void *
423 acc_present_or_copyin (void *h, size_t s)
425 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
428 #define FLAG_COPYOUT (1 << 0)
430 static void
431 delete_copyout (unsigned f, void *h, size_t s)
433 size_t host_size;
434 splay_tree_key n;
435 void *d;
436 struct goacc_thread *thr = goacc_thread ();
437 struct gomp_device_descr *acc_dev = thr->dev;
439 n = lookup_host (&acc_dev->mem_map, h, s);
441 /* No need to call lazy open, as the data must already have been
442 mapped. */
444 if (!n)
445 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
447 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
449 host_size = n->host_end - n->host_start;
451 if (n->host_start != (uintptr_t) h || host_size != s)
452 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
453 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
455 if (f & FLAG_COPYOUT)
456 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
458 acc_unmap_data (h);
460 acc_dev->free_func (acc_dev->target_id, d);
463 void
464 acc_delete (void *h , size_t s)
466 delete_copyout (0, h, s);
469 void acc_copyout (void *h, size_t s)
471 delete_copyout (FLAG_COPYOUT, h, s);
474 static void
475 update_dev_host (int is_dev, void *h, size_t s)
477 splay_tree_key n;
478 void *d;
479 struct goacc_thread *thr = goacc_thread ();
480 struct gomp_device_descr *acc_dev = thr->dev;
482 n = lookup_host (&acc_dev->mem_map, h, s);
484 /* No need to call lazy open, as the data must already have been
485 mapped. */
487 if (!n)
488 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
490 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
492 if (is_dev)
493 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
494 else
495 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
498 void
499 acc_update_device (void *h, size_t s)
501 update_dev_host (1, h, s);
504 void
505 acc_update_self (void *h, size_t s)
507 update_dev_host (0, h, s);
510 void
511 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
512 void *kinds)
514 struct target_mem_desc *tgt;
515 struct goacc_thread *thr = goacc_thread ();
516 struct gomp_device_descr *acc_dev = thr->dev;
518 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
519 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
520 NULL, sizes, kinds, true, false);
521 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
522 tgt->prev = acc_dev->openacc.data_environ;
523 acc_dev->openacc.data_environ = tgt;
526 void
527 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
529 struct goacc_thread *thr = goacc_thread ();
530 struct gomp_device_descr *acc_dev = thr->dev;
531 splay_tree_key n;
532 struct target_mem_desc *t;
533 int minrefs = (mapnum == 1) ? 2 : 3;
535 n = lookup_host (&acc_dev->mem_map, h, 1);
537 if (!n)
538 gomp_fatal ("%p is not a mapped block", (void *)h);
540 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
542 t = n->tgt;
544 struct target_mem_desc *tp;
546 gomp_mutex_lock (&acc_dev->mem_map.lock);
548 if (t->refcount == minrefs)
550 /* This is the last reference, so pull the descriptor off the
551 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
552 freeing the device memory. */
553 t->tgt_end = 0;
554 t->to_free = 0;
556 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
557 tp = t, t = t->prev)
559 if (n->tgt == t)
561 if (tp)
562 tp->prev = t->prev;
563 else
564 acc_dev->openacc.data_environ = t->prev;
565 break;
570 if (force_copyfrom)
571 t->list[0]->copy_from = 1;
573 gomp_mutex_unlock (&acc_dev->mem_map.lock);
575 /* If running synchronously, unmap immediately. */
576 if (async < acc_async_noval)
577 gomp_unmap_vars (t, true);
578 else
580 gomp_copy_from_async (t);
581 acc_dev->openacc.register_async_cleanup_func (t);
584 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);