svn merge -r 217500:218679 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / libgomp / oacc-parallel.c
blobff51808ed2d8ca51fbd0c246cb1453cb7b7cf940
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
3 Contributed by Thomas Schwinge <thomas@codesourcery.com>.
5 This file is part of the GNU OpenMP Library (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles OpenACC constructs. */
28 #include "openacc.h"
29 #include "libgomp.h"
30 #include "libgomp_g.h"
31 #include "gomp-constants.h"
32 #include "libgomp_target.h"
33 #include "oacc-int.h"
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdarg.h>
37 #include <assert.h>
38 #include <alloca.h>
40 static void
41 dump_var (char *s, size_t idx, void *hostaddr, size_t size, unsigned char kind)
43 gomp_notify(" %2zi: %3s 0x%.2x -", idx, s, kind & 0xff);
45 switch (kind & 0xff)
47 case 0x00: gomp_notify(" ALLOC "); break;
48 case 0x01: gomp_notify(" ALLOC TO "); break;
49 case 0x02: gomp_notify(" ALLOC FROM "); break;
50 case 0x03: gomp_notify(" ALLOC TOFROM "); break;
51 case 0x04: gomp_notify(" POINTER "); break;
52 case 0x05: gomp_notify(" TO_PSET "); break;
54 case 0x08: gomp_notify(" FORCE_ALLOC "); break;
55 case 0x09: gomp_notify(" FORCE_TO "); break;
56 case 0x0a: gomp_notify(" FORCE_FROM "); break;
57 case 0x0b: gomp_notify(" FORCE_TOFROM "); break;
58 case 0x0c: gomp_notify(" FORCE_PRESENT "); break;
59 case 0x0d: gomp_notify(" FORCE_DEALLOC "); break;
60 case 0x0e: gomp_notify(" FORCE_DEVICEPTR "); break;
62 case 0x18: gomp_notify(" FORCE_PRIVATE "); break;
63 case 0x19: gomp_notify(" FORCE_FIRSTPRIVATE "); break;
65 case (unsigned char) -1: gomp_notify(" DUMMY "); break;
66 default: gomp_notify("UGH! 0x%x\n", kind);
69 gomp_notify("- %d - %4d/0x%04x ", 1 << (kind >> 8), (int)size, (int)size);
70 gomp_notify("- %p\n", hostaddr);
72 return;
75 static int
76 find_pset (int pos, size_t mapnum, unsigned short *kinds)
78 if (pos + 1 >= mapnum)
79 return 0;
81 unsigned char kind = kinds[pos+1] & 0xff;
83 return kind == GOMP_MAP_TO_PSET;
87 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
88 plugins have been loaded if appropriate). The ACC_dev variable for the
89 current thread will be set appropriately for the given device type on
90 return. */
92 attribute_hidden void
93 select_acc_device (int device_type)
95 ACC_lazy_initialize ();
97 if (device_type == GOMP_IF_CLAUSE_FALSE)
98 return;
100 if (device_type == acc_device_none)
101 device_type = acc_device_host;
103 if (device_type >= 0)
105 /* NOTE: this will go badly if the surrounding data environment is set up
106 to use a different device type. We'll just have to trust that users
107 know what they're doing... */
108 acc_set_device_type (device_type);
112 void goacc_wait (int async, int num_waits, va_list ap);
114 void
115 GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
116 size_t mapnum, void **hostaddrs, size_t *sizes,
117 unsigned short *kinds,
118 int num_gangs, int num_workers, int vector_length,
119 int async, int num_waits, ...)
121 bool if_clause_condition_value = device != GOMP_IF_CLAUSE_FALSE;
122 va_list ap;
123 struct goacc_thread *thr;
124 struct gomp_device_descr *acc_dev;
125 struct target_mem_desc *tgt;
126 void **devaddrs;
127 unsigned int i;
128 struct splay_tree_key_s k;
129 splay_tree_key tgt_fn_key;
130 void (*tgt_fn);
132 if (num_workers != 1)
133 gomp_fatal ("num_workers (%d) different from one is not yet supported",
134 num_workers);
136 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
137 __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
139 select_acc_device (device);
141 thr = goacc_thread ();
142 acc_dev = thr->dev;
144 /* Host fallback if "if" clause is false or if the current device is set to
145 the host. */
146 if (!if_clause_condition_value)
148 ACC_save_and_set_bind (acc_device_host);
149 fn (hostaddrs);
150 ACC_restore_bind ();
151 return;
153 else if (acc_device_type (acc_dev->type) == acc_device_host)
155 fn (hostaddrs);
156 return;
159 va_start (ap, num_waits);
161 if (num_waits > 0)
162 goacc_wait (async, num_waits, ap);
164 va_end (ap);
166 acc_dev->openacc.async_set_async_func (async);
168 if (!(acc_dev->capabilities & TARGET_CAP_NATIVE_EXEC))
170 k.host_start = (uintptr_t) fn;
171 k.host_end = k.host_start + 1;
172 gomp_mutex_lock (&acc_dev->mem_map.lock);
173 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
174 gomp_mutex_unlock (&acc_dev->mem_map.lock);
176 if (tgt_fn_key == NULL)
177 gomp_fatal ("target function wasn't mapped: perhaps -fopenacc was "
178 "used without -flto?");
180 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
182 else
183 tgt_fn = (void (*)) fn;
185 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
186 false);
188 devaddrs = alloca (sizeof (void *) * mapnum);
189 for (i = 0; i < mapnum; i++)
190 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
191 + tgt->list[i]->tgt_offset);
193 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
194 num_gangs, num_workers, vector_length, async,
195 tgt);
197 /* If running synchronously, unmap immediately. */
198 if (async < acc_async_noval)
199 gomp_unmap_vars (tgt, true);
200 else
202 gomp_copy_from_async (tgt);
203 acc_dev->openacc.register_async_cleanup_func (tgt);
206 acc_dev->openacc.async_set_async_func (acc_async_sync);
209 void
210 GOACC_data_start (int device, const void *offload_table, size_t mapnum,
211 void **hostaddrs, size_t *sizes, unsigned short *kinds)
213 bool if_clause_condition_value = device != GOMP_IF_CLAUSE_FALSE;
214 struct target_mem_desc *tgt;
216 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
217 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
219 select_acc_device (device);
221 struct goacc_thread *thr = goacc_thread ();
222 struct gomp_device_descr *acc_dev = thr->dev;
224 /* Host fallback or 'do nothing'. */
225 if ((acc_dev->capabilities & TARGET_CAP_SHARED_MEM)
226 || !if_clause_condition_value)
228 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
229 tgt->prev = thr->mapped_data;
230 thr->mapped_data = tgt;
232 return;
235 gomp_notify (" %s: prepare mappings\n", __FUNCTION__);
236 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
237 false);
238 gomp_notify (" %s: mappings prepared\n", __FUNCTION__);
239 tgt->prev = thr->mapped_data;
240 thr->mapped_data = tgt;
243 void
244 GOACC_data_end (void)
246 struct goacc_thread *thr = goacc_thread ();
247 struct target_mem_desc *tgt = thr->mapped_data;
249 gomp_notify (" %s: restore mappings\n", __FUNCTION__);
250 thr->mapped_data = tgt->prev;
251 gomp_unmap_vars (tgt, true);
252 gomp_notify (" %s: mappings restored\n", __FUNCTION__);
255 void
256 GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum,
257 void **hostaddrs, size_t *sizes, unsigned short *kinds,
258 int async, int num_waits, ...)
260 struct goacc_thread *thr;
261 struct gomp_device_descr *acc_dev;
262 bool if_clause_condition_value = device != GOMP_IF_CLAUSE_FALSE;
263 bool data_enter = false;
264 size_t i;
266 select_acc_device (device);
268 thr = goacc_thread ();
269 acc_dev = thr->dev;
271 if ((acc_dev->capabilities & TARGET_CAP_SHARED_MEM)
272 || !if_clause_condition_value)
273 return;
275 if (num_waits > 0)
277 va_list ap;
279 va_start (ap, num_waits);
281 goacc_wait (async, num_waits, ap);
283 va_end (ap);
286 acc_dev->openacc.async_set_async_func (async);
288 /* Determine if this is an "acc enter data". */
289 for (i = 0; i < mapnum; ++i)
291 unsigned char kind = kinds[i] & 0xff;
293 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
294 continue;
296 if (kind == GOMP_MAP_FORCE_ALLOC || kind == GOMP_MAP_FORCE_PRESENT
297 || kind == GOMP_MAP_FORCE_TO)
299 data_enter = true;
300 break;
303 if (kind == GOMP_MAP_FORCE_DEALLOC || kind == GOMP_MAP_FORCE_FROM)
304 break;
306 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
307 kind);
310 if (data_enter)
312 for (i = 0; i < mapnum; i++)
314 unsigned char kind = kinds[i] & 0xff;
316 /* Scan for PSETs. */
317 int psets = find_pset (i, mapnum, kinds);
319 if (!psets)
321 switch (kind)
323 case GOMP_MAP_POINTER:
324 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
325 &kinds[i]);
326 break;
327 case GOMP_MAP_FORCE_ALLOC:
328 acc_create (hostaddrs[i], sizes[i]);
329 break;
330 case GOMP_MAP_FORCE_PRESENT:
331 acc_present_or_copyin (hostaddrs[i], sizes[i]);
332 break;
333 case GOMP_MAP_FORCE_TO:
334 acc_present_or_copyin (hostaddrs[i], sizes[i]);
335 break;
336 default:
337 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
338 kind);
339 break;
342 else
344 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
345 /* Increment 'i' by two because OpenACC requires fortran
346 arrays to be contiguous, so each PSET is associated with
347 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
348 one MAP_POINTER. */
349 i += 2;
353 else
354 for (i = 0; i < mapnum; ++i)
356 unsigned char kind = kinds[i] & 0xff;
358 int psets = find_pset (i, mapnum, kinds);
360 if (!psets)
362 switch (kind)
364 case GOMP_MAP_POINTER:
365 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
366 == GOMP_MAP_FORCE_FROM,
367 async, 1);
368 break;
369 case GOMP_MAP_FORCE_DEALLOC:
370 acc_delete (hostaddrs[i], sizes[i]);
371 break;
372 case GOMP_MAP_FORCE_FROM:
373 acc_copyout (hostaddrs[i], sizes[i]);
374 break;
375 default:
376 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
377 kind);
378 break;
381 else
383 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
384 == GOMP_MAP_FORCE_FROM, async, 3);
385 /* See the above comment. */
386 i += 2;
390 acc_dev->openacc.async_set_async_func (acc_async_sync);
393 void
394 GOACC_kernels (int device, void (*fn) (void *), const void *offload_table,
395 size_t mapnum, void **hostaddrs, size_t *sizes,
396 unsigned short *kinds,
397 int num_gangs, int num_workers, int vector_length,
398 int async, int num_waits, ...)
400 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n", __FUNCTION__,
401 mapnum, hostaddrs, sizes, kinds);
403 va_list ap;
405 select_acc_device (device);
407 va_start (ap, num_waits);
409 if (num_waits > 0)
410 goacc_wait (async, num_waits, ap);
412 va_end (ap);
414 /* TODO. */
415 GOACC_parallel (device, fn, offload_table, mapnum, hostaddrs, sizes, kinds,
416 num_gangs, num_workers, vector_length, async, num_waits);
419 void
420 goacc_wait (int async, int num_waits, va_list ap)
422 struct goacc_thread *thr = goacc_thread ();
423 struct gomp_device_descr *acc_dev = thr->dev;
424 int i;
426 assert (num_waits >= 0);
428 if (async == acc_async_sync && num_waits == 0)
430 acc_wait_all ();
431 return;
434 if (async == acc_async_sync && num_waits)
436 for (i = 0; i < num_waits; i++)
438 int qid = va_arg (ap, int);
440 if (acc_async_test (qid))
441 continue;
443 acc_wait (qid);
445 return;
448 if (async == acc_async_noval && num_waits == 0)
450 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
451 return;
454 for (i = 0; i < num_waits; i++)
456 int qid = va_arg (ap, int);
458 if (acc_async_test (qid))
459 continue;
461 /* If we're waiting on the same asynchronous queue as we're launching on,
462 the queue itself will order work as required, so there's no need to
463 wait explicitly. */
464 if (qid != async)
465 acc_dev->openacc.async_wait_async_func (qid, async);
469 void
470 GOACC_update (int device, const void *offload_table, size_t mapnum,
471 void **hostaddrs, size_t *sizes, unsigned short *kinds,
472 int async, int num_waits, ...)
474 bool if_clause_condition_value = device != GOMP_IF_CLAUSE_FALSE;
475 size_t i;
477 select_acc_device (device);
479 struct goacc_thread *thr = goacc_thread ();
480 struct gomp_device_descr *acc_dev = thr->dev;
482 if ((acc_dev->capabilities & TARGET_CAP_SHARED_MEM)
483 || !if_clause_condition_value)
484 return;
486 if (num_waits > 0)
488 va_list ap;
490 va_start (ap, num_waits);
492 goacc_wait (async, num_waits, ap);
494 va_end (ap);
497 acc_dev->openacc.async_set_async_func (async);
499 for (i = 0; i < mapnum; ++i)
501 unsigned char kind = kinds[i] & 0xff;
503 dump_var ("UPD", i, hostaddrs[i], sizes[i], kinds[i]);
505 switch (kind)
507 case GOMP_MAP_POINTER:
508 case GOMP_MAP_TO_PSET:
509 break;
511 case GOMP_MAP_FORCE_TO:
512 acc_update_device (hostaddrs[i], sizes[i]);
513 break;
515 case GOMP_MAP_FORCE_FROM:
516 acc_update_self (hostaddrs[i], sizes[i]);
517 break;
519 default:
520 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
521 break;
525 acc_dev->openacc.async_set_async_func (acc_async_sync);
528 void
529 GOACC_wait (int async, int num_waits, ...)
531 va_list ap;
533 va_start (ap, num_waits);
535 goacc_wait (async, num_waits, ap);
537 va_end (ap);
541 GOACC_get_num_threads (void)
543 return 1;
547 GOACC_get_thread_num (void)
549 return 0;