1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
42 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
44 if (pos
+ 1 >= mapnum
)
47 unsigned char kind
= kinds
[pos
+1] & 0xff;
49 return kind
== GOMP_MAP_TO_PSET
;
52 static void goacc_wait (int async
, int num_waits
, va_list ap
);
55 GOACC_parallel (int device
, void (*fn
) (void *),
56 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
57 unsigned short *kinds
,
58 int num_gangs
, int num_workers
, int vector_length
,
59 int async
, int num_waits
, ...)
61 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
63 struct goacc_thread
*thr
;
64 struct gomp_device_descr
*acc_dev
;
65 struct target_mem_desc
*tgt
;
68 struct splay_tree_key_s k
;
69 splay_tree_key tgt_fn_key
;
73 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
76 gomp_fatal ("num_workers (%d) different from one is not yet supported",
79 #ifdef HAVE_INTTYPES_H
80 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p, "
82 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
, async
);
84 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
85 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
,
88 goacc_lazy_initialize ();
90 thr
= goacc_thread ();
93 /* Host fallback if "if" clause is false or if the current device is set to
97 goacc_save_and_set_bind (acc_device_host
);
99 goacc_restore_bind ();
102 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
108 va_start (ap
, num_waits
);
111 goacc_wait (async
, num_waits
, ap
);
115 acc_dev
->openacc
.async_set_async_func (async
);
117 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
119 k
.host_start
= (uintptr_t) fn
;
120 k
.host_end
= k
.host_start
+ 1;
121 gomp_mutex_lock (&acc_dev
->lock
);
122 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
, &k
);
123 gomp_mutex_unlock (&acc_dev
->lock
);
125 if (tgt_fn_key
== NULL
)
126 gomp_fatal ("target function wasn't mapped");
128 tgt_fn
= (void (*)) tgt_fn_key
->tgt_offset
;
131 tgt_fn
= (void (*)) fn
;
133 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
136 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
137 for (i
= 0; i
< mapnum
; i
++)
138 devaddrs
[i
] = (void *) (tgt
->list
[i
]->tgt
->tgt_start
139 + tgt
->list
[i
]->tgt_offset
);
141 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, sizes
, kinds
,
142 num_gangs
, num_workers
, vector_length
, async
,
145 /* If running synchronously, unmap immediately. */
146 if (async
< acc_async_noval
)
147 gomp_unmap_vars (tgt
, true);
150 gomp_copy_from_async (tgt
);
151 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
154 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
158 GOACC_data_start (int device
, size_t mapnum
,
159 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
161 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
162 struct target_mem_desc
*tgt
;
164 #ifdef HAVE_INTTYPES_H
165 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
166 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
168 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
169 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
172 goacc_lazy_initialize ();
174 struct goacc_thread
*thr
= goacc_thread ();
175 struct gomp_device_descr
*acc_dev
= thr
->dev
;
177 /* Host fallback or 'do nothing'. */
178 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
181 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
182 tgt
->prev
= thr
->mapped_data
;
183 thr
->mapped_data
= tgt
;
188 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
189 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
191 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
192 tgt
->prev
= thr
->mapped_data
;
193 thr
->mapped_data
= tgt
;
197 GOACC_data_end (void)
199 struct goacc_thread
*thr
= goacc_thread ();
200 struct target_mem_desc
*tgt
= thr
->mapped_data
;
202 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
203 thr
->mapped_data
= tgt
->prev
;
204 gomp_unmap_vars (tgt
, true);
205 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
209 GOACC_enter_exit_data (int device
, size_t mapnum
,
210 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
211 int async
, int num_waits
, ...)
213 struct goacc_thread
*thr
;
214 struct gomp_device_descr
*acc_dev
;
215 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
216 bool data_enter
= false;
219 goacc_lazy_initialize ();
221 thr
= goacc_thread ();
224 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
232 va_start (ap
, num_waits
);
234 goacc_wait (async
, num_waits
, ap
);
239 acc_dev
->openacc
.async_set_async_func (async
);
241 /* Determine if this is an "acc enter data". */
242 for (i
= 0; i
< mapnum
; ++i
)
244 unsigned char kind
= kinds
[i
] & 0xff;
246 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
249 if (kind
== GOMP_MAP_FORCE_ALLOC
250 || kind
== GOMP_MAP_FORCE_PRESENT
251 || kind
== GOMP_MAP_FORCE_TO
)
257 if (kind
== GOMP_MAP_FORCE_DEALLOC
258 || kind
== GOMP_MAP_FORCE_FROM
)
261 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
267 for (i
= 0; i
< mapnum
; i
++)
269 unsigned char kind
= kinds
[i
] & 0xff;
271 /* Scan for PSETs. */
272 int psets
= find_pset (i
, mapnum
, kinds
);
278 case GOMP_MAP_POINTER
:
279 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
282 case GOMP_MAP_FORCE_ALLOC
:
283 acc_create (hostaddrs
[i
], sizes
[i
]);
285 case GOMP_MAP_FORCE_PRESENT
:
286 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
288 case GOMP_MAP_FORCE_TO
:
289 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
292 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
299 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
300 /* Increment 'i' by two because OpenACC requires fortran
301 arrays to be contiguous, so each PSET is associated with
302 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
309 for (i
= 0; i
< mapnum
; ++i
)
311 unsigned char kind
= kinds
[i
] & 0xff;
313 int psets
= find_pset (i
, mapnum
, kinds
);
319 case GOMP_MAP_POINTER
:
320 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
321 == GOMP_MAP_FORCE_FROM
,
324 case GOMP_MAP_FORCE_DEALLOC
:
325 acc_delete (hostaddrs
[i
], sizes
[i
]);
327 case GOMP_MAP_FORCE_FROM
:
328 acc_copyout (hostaddrs
[i
], sizes
[i
]);
331 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
338 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
339 == GOMP_MAP_FORCE_FROM
, async
, 3);
340 /* See the above comment. */
345 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
349 goacc_wait (int async
, int num_waits
, va_list ap
)
351 struct goacc_thread
*thr
= goacc_thread ();
352 struct gomp_device_descr
*acc_dev
= thr
->dev
;
355 assert (num_waits
>= 0);
357 if (async
== acc_async_sync
&& num_waits
== 0)
363 if (async
== acc_async_sync
&& num_waits
)
365 for (i
= 0; i
< num_waits
; i
++)
367 int qid
= va_arg (ap
, int);
369 if (acc_async_test (qid
))
377 if (async
== acc_async_noval
&& num_waits
== 0)
379 acc_dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
383 for (i
= 0; i
< num_waits
; i
++)
385 int qid
= va_arg (ap
, int);
387 if (acc_async_test (qid
))
390 /* If we're waiting on the same asynchronous queue as we're launching on,
391 the queue itself will order work as required, so there's no need to
394 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
399 GOACC_update (int device
, size_t mapnum
,
400 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
401 int async
, int num_waits
, ...)
403 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
406 goacc_lazy_initialize ();
408 struct goacc_thread
*thr
= goacc_thread ();
409 struct gomp_device_descr
*acc_dev
= thr
->dev
;
411 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
419 va_start (ap
, num_waits
);
421 goacc_wait (async
, num_waits
, ap
);
426 acc_dev
->openacc
.async_set_async_func (async
);
428 for (i
= 0; i
< mapnum
; ++i
)
430 unsigned char kind
= kinds
[i
] & 0xff;
434 case GOMP_MAP_POINTER
:
435 case GOMP_MAP_TO_PSET
:
438 case GOMP_MAP_FORCE_TO
:
439 acc_update_device (hostaddrs
[i
], sizes
[i
]);
442 case GOMP_MAP_FORCE_FROM
:
443 acc_update_self (hostaddrs
[i
], sizes
[i
]);
447 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
452 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
456 GOACC_wait (int async
, int num_waits
, ...)
460 va_start (ap
, num_waits
);
462 goacc_wait (async
, num_waits
, ap
);
468 GOACC_get_num_threads (void)
474 GOACC_get_thread_num (void)