2015-01-18 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / libgomp / oacc-parallel.c
blobb5e80603004a50c8ed6263d57066159d54c85e31
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <string.h>
35 #include <stdarg.h>
36 #include <assert.h>
38 static int
39 find_pset (int pos, size_t mapnum, unsigned short *kinds)
41 if (pos + 1 >= mapnum)
42 return 0;
44 unsigned char kind = kinds[pos+1] & 0xff;
46 return kind == GOMP_MAP_TO_PSET;
50 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
51 plugins have been loaded if appropriate). The ACC_dev variable for the
52 current thread will be set appropriately for the given device type on
53 return. */
55 attribute_hidden void
56 select_acc_device (int device_type)
58 goacc_lazy_initialize ();
60 if (device_type == GOMP_DEVICE_HOST_FALLBACK)
61 return;
63 if (device_type == acc_device_none)
64 device_type = acc_device_host;
66 if (device_type >= 0)
68 /* NOTE: this will go badly if the surrounding data environment is set up
69 to use a different device type. We'll just have to trust that users
70 know what they're doing... */
71 acc_set_device_type (device_type);
75 static void goacc_wait (int async, int num_waits, va_list ap);
77 void
78 GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
79 size_t mapnum, void **hostaddrs, size_t *sizes,
80 unsigned short *kinds,
81 int num_gangs, int num_workers, int vector_length,
82 int async, int num_waits, ...)
84 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
85 va_list ap;
86 struct goacc_thread *thr;
87 struct gomp_device_descr *acc_dev;
88 struct target_mem_desc *tgt;
89 void **devaddrs;
90 unsigned int i;
91 struct splay_tree_key_s k;
92 splay_tree_key tgt_fn_key;
93 void (*tgt_fn);
95 if (num_gangs != 1)
96 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
97 num_gangs);
98 if (num_workers != 1)
99 gomp_fatal ("num_workers (%d) different from one is not yet supported",
100 num_workers);
102 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
103 __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
105 select_acc_device (device);
107 thr = goacc_thread ();
108 acc_dev = thr->dev;
110 /* Host fallback if "if" clause is false or if the current device is set to
111 the host. */
112 if (host_fallback)
114 goacc_save_and_set_bind (acc_device_host);
115 fn (hostaddrs);
116 goacc_restore_bind ();
117 return;
119 else if (acc_device_type (acc_dev->type) == acc_device_host)
121 fn (hostaddrs);
122 return;
125 va_start (ap, num_waits);
127 if (num_waits > 0)
128 goacc_wait (async, num_waits, ap);
130 va_end (ap);
132 acc_dev->openacc.async_set_async_func (async);
134 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
136 k.host_start = (uintptr_t) fn;
137 k.host_end = k.host_start + 1;
138 gomp_mutex_lock (&acc_dev->mem_map.lock);
139 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
140 gomp_mutex_unlock (&acc_dev->mem_map.lock);
142 if (tgt_fn_key == NULL)
143 gomp_fatal ("target function wasn't mapped");
145 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
147 else
148 tgt_fn = (void (*)) fn;
150 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
151 false);
153 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
154 for (i = 0; i < mapnum; i++)
155 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
156 + tgt->list[i]->tgt_offset);
158 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
159 num_gangs, num_workers, vector_length, async,
160 tgt);
162 /* If running synchronously, unmap immediately. */
163 if (async < acc_async_noval)
164 gomp_unmap_vars (tgt, true);
165 else
167 gomp_copy_from_async (tgt);
168 acc_dev->openacc.register_async_cleanup_func (tgt);
171 acc_dev->openacc.async_set_async_func (acc_async_sync);
174 void
175 GOACC_data_start (int device, const void *offload_table, size_t mapnum,
176 void **hostaddrs, size_t *sizes, unsigned short *kinds)
178 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
179 struct target_mem_desc *tgt;
181 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
182 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
184 select_acc_device (device);
186 struct goacc_thread *thr = goacc_thread ();
187 struct gomp_device_descr *acc_dev = thr->dev;
189 /* Host fallback or 'do nothing'. */
190 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
191 || host_fallback)
193 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
194 tgt->prev = thr->mapped_data;
195 thr->mapped_data = tgt;
197 return;
200 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
201 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
202 false);
203 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
204 tgt->prev = thr->mapped_data;
205 thr->mapped_data = tgt;
208 void
209 GOACC_data_end (void)
211 struct goacc_thread *thr = goacc_thread ();
212 struct target_mem_desc *tgt = thr->mapped_data;
214 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
215 thr->mapped_data = tgt->prev;
216 gomp_unmap_vars (tgt, true);
217 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
220 void
221 GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum,
222 void **hostaddrs, size_t *sizes, unsigned short *kinds,
223 int async, int num_waits, ...)
225 struct goacc_thread *thr;
226 struct gomp_device_descr *acc_dev;
227 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
228 bool data_enter = false;
229 size_t i;
231 select_acc_device (device);
233 thr = goacc_thread ();
234 acc_dev = thr->dev;
236 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
237 || host_fallback)
238 return;
240 if (num_waits > 0)
242 va_list ap;
244 va_start (ap, num_waits);
246 goacc_wait (async, num_waits, ap);
248 va_end (ap);
251 acc_dev->openacc.async_set_async_func (async);
253 /* Determine if this is an "acc enter data". */
254 for (i = 0; i < mapnum; ++i)
256 unsigned char kind = kinds[i] & 0xff;
258 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
259 continue;
261 if (kind == GOMP_MAP_FORCE_ALLOC
262 || kind == GOMP_MAP_FORCE_PRESENT
263 || kind == GOMP_MAP_FORCE_TO)
265 data_enter = true;
266 break;
269 if (kind == GOMP_MAP_FORCE_DEALLOC
270 || kind == GOMP_MAP_FORCE_FROM)
271 break;
273 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
274 kind);
277 if (data_enter)
279 for (i = 0; i < mapnum; i++)
281 unsigned char kind = kinds[i] & 0xff;
283 /* Scan for PSETs. */
284 int psets = find_pset (i, mapnum, kinds);
286 if (!psets)
288 switch (kind)
290 case GOMP_MAP_POINTER:
291 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
292 &kinds[i]);
293 break;
294 case GOMP_MAP_FORCE_ALLOC:
295 acc_create (hostaddrs[i], sizes[i]);
296 break;
297 case GOMP_MAP_FORCE_PRESENT:
298 acc_present_or_copyin (hostaddrs[i], sizes[i]);
299 break;
300 case GOMP_MAP_FORCE_TO:
301 acc_present_or_copyin (hostaddrs[i], sizes[i]);
302 break;
303 default:
304 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
305 kind);
306 break;
309 else
311 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
312 /* Increment 'i' by two because OpenACC requires fortran
313 arrays to be contiguous, so each PSET is associated with
314 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
315 one MAP_POINTER. */
316 i += 2;
320 else
321 for (i = 0; i < mapnum; ++i)
323 unsigned char kind = kinds[i] & 0xff;
325 int psets = find_pset (i, mapnum, kinds);
327 if (!psets)
329 switch (kind)
331 case GOMP_MAP_POINTER:
332 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
333 == GOMP_MAP_FORCE_FROM,
334 async, 1);
335 break;
336 case GOMP_MAP_FORCE_DEALLOC:
337 acc_delete (hostaddrs[i], sizes[i]);
338 break;
339 case GOMP_MAP_FORCE_FROM:
340 acc_copyout (hostaddrs[i], sizes[i]);
341 break;
342 default:
343 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
344 kind);
345 break;
348 else
350 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
351 == GOMP_MAP_FORCE_FROM, async, 3);
352 /* See the above comment. */
353 i += 2;
357 acc_dev->openacc.async_set_async_func (acc_async_sync);
360 static void
361 goacc_wait (int async, int num_waits, va_list ap)
363 struct goacc_thread *thr = goacc_thread ();
364 struct gomp_device_descr *acc_dev = thr->dev;
365 int i;
367 assert (num_waits >= 0);
369 if (async == acc_async_sync && num_waits == 0)
371 acc_wait_all ();
372 return;
375 if (async == acc_async_sync && num_waits)
377 for (i = 0; i < num_waits; i++)
379 int qid = va_arg (ap, int);
381 if (acc_async_test (qid))
382 continue;
384 acc_wait (qid);
386 return;
389 if (async == acc_async_noval && num_waits == 0)
391 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
392 return;
395 for (i = 0; i < num_waits; i++)
397 int qid = va_arg (ap, int);
399 if (acc_async_test (qid))
400 continue;
402 /* If we're waiting on the same asynchronous queue as we're launching on,
403 the queue itself will order work as required, so there's no need to
404 wait explicitly. */
405 if (qid != async)
406 acc_dev->openacc.async_wait_async_func (qid, async);
410 void
411 GOACC_update (int device, const void *offload_table, size_t mapnum,
412 void **hostaddrs, size_t *sizes, unsigned short *kinds,
413 int async, int num_waits, ...)
415 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
416 size_t i;
418 select_acc_device (device);
420 struct goacc_thread *thr = goacc_thread ();
421 struct gomp_device_descr *acc_dev = thr->dev;
423 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
424 || host_fallback)
425 return;
427 if (num_waits > 0)
429 va_list ap;
431 va_start (ap, num_waits);
433 goacc_wait (async, num_waits, ap);
435 va_end (ap);
438 acc_dev->openacc.async_set_async_func (async);
440 for (i = 0; i < mapnum; ++i)
442 unsigned char kind = kinds[i] & 0xff;
444 switch (kind)
446 case GOMP_MAP_POINTER:
447 case GOMP_MAP_TO_PSET:
448 break;
450 case GOMP_MAP_FORCE_TO:
451 acc_update_device (hostaddrs[i], sizes[i]);
452 break;
454 case GOMP_MAP_FORCE_FROM:
455 acc_update_self (hostaddrs[i], sizes[i]);
456 break;
458 default:
459 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
460 break;
464 acc_dev->openacc.async_set_async_func (acc_async_sync);
467 void
468 GOACC_wait (int async, int num_waits, ...)
470 va_list ap;
472 va_start (ap, num_waits);
474 goacc_wait (async, num_waits, ap);
476 va_end (ap);
480 GOACC_get_num_threads (void)
482 return 1;
486 GOACC_get_thread_num (void)
488 return 0;