1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
39 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
41 if (pos
+ 1 >= mapnum
)
44 unsigned char kind
= kinds
[pos
+1] & 0xff;
46 return kind
== GOMP_MAP_TO_PSET
;
50 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
51 plugins have been loaded if appropriate). The ACC_dev variable for the
52 current thread will be set appropriately for the given device type on
56 select_acc_device (int device_type
)
58 goacc_lazy_initialize ();
60 if (device_type
== GOMP_DEVICE_HOST_FALLBACK
)
63 if (device_type
== acc_device_none
)
64 device_type
= acc_device_host
;
68 /* NOTE: this will go badly if the surrounding data environment is set up
69 to use a different device type. We'll just have to trust that users
70 know what they're doing... */
71 acc_set_device_type (device_type
);
75 static void goacc_wait (int async
, int num_waits
, va_list ap
);
78 GOACC_parallel (int device
, void (*fn
) (void *),
79 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
80 unsigned short *kinds
,
81 int num_gangs
, int num_workers
, int vector_length
,
82 int async
, int num_waits
, ...)
84 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
86 struct goacc_thread
*thr
;
87 struct gomp_device_descr
*acc_dev
;
88 struct target_mem_desc
*tgt
;
91 struct splay_tree_key_s k
;
92 splay_tree_key tgt_fn_key
;
96 gomp_fatal ("num_workers (%d) different from one is not yet supported",
99 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
100 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
, async
);
102 select_acc_device (device
);
104 thr
= goacc_thread ();
107 /* Host fallback if "if" clause is false or if the current device is set to
111 goacc_save_and_set_bind (acc_device_host
);
113 goacc_restore_bind ();
116 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
122 va_start (ap
, num_waits
);
125 goacc_wait (async
, num_waits
, ap
);
129 acc_dev
->openacc
.async_set_async_func (async
);
131 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
133 k
.host_start
= (uintptr_t) fn
;
134 k
.host_end
= k
.host_start
+ 1;
135 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
136 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
.splay_tree
, &k
);
137 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
139 if (tgt_fn_key
== NULL
)
140 gomp_fatal ("target function wasn't mapped");
142 tgt_fn
= (void (*)) tgt_fn_key
->tgt
->tgt_start
;
145 tgt_fn
= (void (*)) fn
;
147 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
150 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
151 for (i
= 0; i
< mapnum
; i
++)
152 devaddrs
[i
] = (void *) (tgt
->list
[i
]->tgt
->tgt_start
153 + tgt
->list
[i
]->tgt_offset
);
155 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, sizes
, kinds
,
156 num_gangs
, num_workers
, vector_length
, async
,
159 /* If running synchronously, unmap immediately. */
160 if (async
< acc_async_noval
)
161 gomp_unmap_vars (tgt
, true);
164 gomp_copy_from_async (tgt
);
165 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
168 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
172 GOACC_data_start (int device
, size_t mapnum
,
173 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
175 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
176 struct target_mem_desc
*tgt
;
178 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
179 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
);
181 select_acc_device (device
);
183 struct goacc_thread
*thr
= goacc_thread ();
184 struct gomp_device_descr
*acc_dev
= thr
->dev
;
186 /* Host fallback or 'do nothing'. */
187 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
190 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
191 tgt
->prev
= thr
->mapped_data
;
192 thr
->mapped_data
= tgt
;
197 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
198 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
200 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
201 tgt
->prev
= thr
->mapped_data
;
202 thr
->mapped_data
= tgt
;
206 GOACC_data_end (void)
208 struct goacc_thread
*thr
= goacc_thread ();
209 struct target_mem_desc
*tgt
= thr
->mapped_data
;
211 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
212 thr
->mapped_data
= tgt
->prev
;
213 gomp_unmap_vars (tgt
, true);
214 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
218 GOACC_enter_exit_data (int device
, size_t mapnum
,
219 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
220 int async
, int num_waits
, ...)
222 struct goacc_thread
*thr
;
223 struct gomp_device_descr
*acc_dev
;
224 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
225 bool data_enter
= false;
228 select_acc_device (device
);
230 thr
= goacc_thread ();
233 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
241 va_start (ap
, num_waits
);
243 goacc_wait (async
, num_waits
, ap
);
248 acc_dev
->openacc
.async_set_async_func (async
);
250 /* Determine if this is an "acc enter data". */
251 for (i
= 0; i
< mapnum
; ++i
)
253 unsigned char kind
= kinds
[i
] & 0xff;
255 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
258 if (kind
== GOMP_MAP_FORCE_ALLOC
259 || kind
== GOMP_MAP_FORCE_PRESENT
260 || kind
== GOMP_MAP_FORCE_TO
)
266 if (kind
== GOMP_MAP_FORCE_DEALLOC
267 || kind
== GOMP_MAP_FORCE_FROM
)
270 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
276 for (i
= 0; i
< mapnum
; i
++)
278 unsigned char kind
= kinds
[i
] & 0xff;
280 /* Scan for PSETs. */
281 int psets
= find_pset (i
, mapnum
, kinds
);
287 case GOMP_MAP_POINTER
:
288 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
291 case GOMP_MAP_FORCE_ALLOC
:
292 acc_create (hostaddrs
[i
], sizes
[i
]);
294 case GOMP_MAP_FORCE_PRESENT
:
295 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
297 case GOMP_MAP_FORCE_TO
:
298 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
301 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
308 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
309 /* Increment 'i' by two because OpenACC requires fortran
310 arrays to be contiguous, so each PSET is associated with
311 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
318 for (i
= 0; i
< mapnum
; ++i
)
320 unsigned char kind
= kinds
[i
] & 0xff;
322 int psets
= find_pset (i
, mapnum
, kinds
);
328 case GOMP_MAP_POINTER
:
329 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
330 == GOMP_MAP_FORCE_FROM
,
333 case GOMP_MAP_FORCE_DEALLOC
:
334 acc_delete (hostaddrs
[i
], sizes
[i
]);
336 case GOMP_MAP_FORCE_FROM
:
337 acc_copyout (hostaddrs
[i
], sizes
[i
]);
340 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
347 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
348 == GOMP_MAP_FORCE_FROM
, async
, 3);
349 /* See the above comment. */
354 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
358 GOACC_kernels (int device
, void (*fn
) (void *),
359 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
360 unsigned short *kinds
,
361 int num_gangs
, int num_workers
, int vector_length
,
362 int async
, int num_waits
, ...)
364 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
365 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
);
369 select_acc_device (device
);
371 va_start (ap
, num_waits
);
374 goacc_wait (async
, num_waits
, ap
);
378 GOACC_parallel (device
, fn
, mapnum
, hostaddrs
, sizes
, kinds
,
379 num_gangs
, num_workers
, vector_length
, async
, 0);
383 goacc_wait (int async
, int num_waits
, va_list ap
)
385 struct goacc_thread
*thr
= goacc_thread ();
386 struct gomp_device_descr
*acc_dev
= thr
->dev
;
389 assert (num_waits
>= 0);
391 if (async
== acc_async_sync
&& num_waits
== 0)
397 if (async
== acc_async_sync
&& num_waits
)
399 for (i
= 0; i
< num_waits
; i
++)
401 int qid
= va_arg (ap
, int);
403 if (acc_async_test (qid
))
411 if (async
== acc_async_noval
&& num_waits
== 0)
413 acc_dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
417 for (i
= 0; i
< num_waits
; i
++)
419 int qid
= va_arg (ap
, int);
421 if (acc_async_test (qid
))
424 /* If we're waiting on the same asynchronous queue as we're launching on,
425 the queue itself will order work as required, so there's no need to
428 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
433 GOACC_update (int device
, size_t mapnum
,
434 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
435 int async
, int num_waits
, ...)
437 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
440 select_acc_device (device
);
442 struct goacc_thread
*thr
= goacc_thread ();
443 struct gomp_device_descr
*acc_dev
= thr
->dev
;
445 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
453 va_start (ap
, num_waits
);
455 goacc_wait (async
, num_waits
, ap
);
460 acc_dev
->openacc
.async_set_async_func (async
);
462 for (i
= 0; i
< mapnum
; ++i
)
464 unsigned char kind
= kinds
[i
] & 0xff;
468 case GOMP_MAP_POINTER
:
469 case GOMP_MAP_TO_PSET
:
472 case GOMP_MAP_FORCE_TO
:
473 acc_update_device (hostaddrs
[i
], sizes
[i
]);
476 case GOMP_MAP_FORCE_FROM
:
477 acc_update_self (hostaddrs
[i
], sizes
[i
]);
481 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
486 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
490 GOACC_wait (int async
, int num_waits
, ...)
494 va_start (ap
, num_waits
);
496 goacc_wait (async
, num_waits
, ap
);
502 GOACC_get_num_threads (void)
508 GOACC_get_thread_num (void)