1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
42 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
44 if (pos
+ 1 >= mapnum
)
47 unsigned char kind
= kinds
[pos
+1] & 0xff;
49 return kind
== GOMP_MAP_TO_PSET
;
52 static void goacc_wait (int async
, int num_waits
, va_list *ap
);
55 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
57 blocks to be copied to/from the device. Varadic arguments are
58 keyed optional parameters terminated with a zero. */
61 GOACC_parallel_keyed (int device
, void (*fn
) (void *),
62 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
63 unsigned short *kinds
, ...)
65 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
67 struct goacc_thread
*thr
;
68 struct gomp_device_descr
*acc_dev
;
69 struct target_mem_desc
*tgt
;
72 struct splay_tree_key_s k
;
73 splay_tree_key tgt_fn_key
;
75 int async
= GOMP_ASYNC_SYNC
;
76 unsigned dims
[GOMP_DIM_MAX
];
79 #ifdef HAVE_INTTYPES_H
80 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
81 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
86 goacc_lazy_initialize ();
88 thr
= goacc_thread ();
91 /* Host fallback if "if" clause is false or if the current device is set to
95 goacc_save_and_set_bind (acc_device_host
);
97 goacc_restore_bind ();
100 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
106 /* Default: let the runtime choose. */
107 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
110 va_start (ap
, kinds
);
111 /* TODO: This will need amending when device_type is implemented. */
112 while ((tag
= va_arg (ap
, unsigned)) != 0)
114 if (GOMP_LAUNCH_DEVICE (tag
))
115 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116 GOMP_LAUNCH_DEVICE (tag
));
118 switch (GOMP_LAUNCH_CODE (tag
))
120 case GOMP_LAUNCH_DIM
:
122 unsigned mask
= GOMP_LAUNCH_OP (tag
);
124 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
125 if (mask
& GOMP_DIM_MASK (i
))
126 dims
[i
] = va_arg (ap
, unsigned);
130 case GOMP_LAUNCH_ASYNC
:
132 /* Small constant values are encoded in the operand. */
133 async
= GOMP_LAUNCH_OP (tag
);
135 if (async
== GOMP_LAUNCH_OP_MAX
)
136 async
= va_arg (ap
, unsigned);
140 case GOMP_LAUNCH_WAIT
:
142 unsigned num_waits
= GOMP_LAUNCH_OP (tag
);
145 goacc_wait (async
, num_waits
, &ap
);
150 gomp_fatal ("unrecognized offload code '%d',"
151 " libgomp is too old", GOMP_LAUNCH_CODE (tag
));
156 acc_dev
->openacc
.async_set_async_func (async
);
158 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
160 k
.host_start
= (uintptr_t) fn
;
161 k
.host_end
= k
.host_start
+ 1;
162 gomp_mutex_lock (&acc_dev
->lock
);
163 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
, &k
);
164 gomp_mutex_unlock (&acc_dev
->lock
);
166 if (tgt_fn_key
== NULL
)
167 gomp_fatal ("target function wasn't mapped");
169 tgt_fn
= (void (*)) tgt_fn_key
->tgt_offset
;
172 tgt_fn
= (void (*)) fn
;
174 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
175 GOMP_MAP_VARS_OPENACC
);
177 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
178 for (i
= 0; i
< mapnum
; i
++)
179 devaddrs
[i
] = (void *) (tgt
->list
[i
].key
->tgt
->tgt_start
180 + tgt
->list
[i
].key
->tgt_offset
);
182 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
,
185 /* If running synchronously, unmap immediately. */
186 if (async
< acc_async_noval
)
187 gomp_unmap_vars (tgt
, true);
190 gomp_copy_from_async (tgt
);
191 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
194 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
197 /* Legacy entry point, only provide host execution. */
200 GOACC_parallel (int device
, void (*fn
) (void *),
201 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
202 unsigned short *kinds
,
203 int num_gangs
, int num_workers
, int vector_length
,
204 int async
, int num_waits
, ...)
206 goacc_save_and_set_bind (acc_device_host
);
208 goacc_restore_bind ();
212 GOACC_data_start (int device
, size_t mapnum
,
213 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
215 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
216 struct target_mem_desc
*tgt
;
218 #ifdef HAVE_INTTYPES_H
219 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
220 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
222 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
223 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
226 goacc_lazy_initialize ();
228 struct goacc_thread
*thr
= goacc_thread ();
229 struct gomp_device_descr
*acc_dev
= thr
->dev
;
231 /* Host fallback or 'do nothing'. */
232 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
235 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
236 GOMP_MAP_VARS_OPENACC
);
237 tgt
->prev
= thr
->mapped_data
;
238 thr
->mapped_data
= tgt
;
243 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
244 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
245 GOMP_MAP_VARS_OPENACC
);
246 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
247 tgt
->prev
= thr
->mapped_data
;
248 thr
->mapped_data
= tgt
;
252 GOACC_data_end (void)
254 struct goacc_thread
*thr
= goacc_thread ();
255 struct target_mem_desc
*tgt
= thr
->mapped_data
;
257 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
258 thr
->mapped_data
= tgt
->prev
;
259 gomp_unmap_vars (tgt
, true);
260 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
264 GOACC_enter_exit_data (int device
, size_t mapnum
,
265 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
266 int async
, int num_waits
, ...)
268 struct goacc_thread
*thr
;
269 struct gomp_device_descr
*acc_dev
;
270 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
271 bool data_enter
= false;
274 goacc_lazy_initialize ();
276 thr
= goacc_thread ();
279 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
287 va_start (ap
, num_waits
);
288 goacc_wait (async
, num_waits
, &ap
);
292 acc_dev
->openacc
.async_set_async_func (async
);
294 /* Determine if this is an "acc enter data". */
295 for (i
= 0; i
< mapnum
; ++i
)
297 unsigned char kind
= kinds
[i
] & 0xff;
299 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
302 if (kind
== GOMP_MAP_FORCE_ALLOC
303 || kind
== GOMP_MAP_FORCE_PRESENT
304 || kind
== GOMP_MAP_FORCE_TO
)
310 if (kind
== GOMP_MAP_DELETE
311 || kind
== GOMP_MAP_FORCE_FROM
)
314 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
320 for (i
= 0; i
< mapnum
; i
++)
322 unsigned char kind
= kinds
[i
] & 0xff;
324 /* Scan for PSETs. */
325 int psets
= find_pset (i
, mapnum
, kinds
);
331 case GOMP_MAP_POINTER
:
332 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
335 case GOMP_MAP_FORCE_ALLOC
:
336 acc_create (hostaddrs
[i
], sizes
[i
]);
338 case GOMP_MAP_FORCE_PRESENT
:
339 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
341 case GOMP_MAP_FORCE_TO
:
342 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
345 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
352 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
353 /* Increment 'i' by two because OpenACC requires fortran
354 arrays to be contiguous, so each PSET is associated with
355 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
362 for (i
= 0; i
< mapnum
; ++i
)
364 unsigned char kind
= kinds
[i
] & 0xff;
366 int psets
= find_pset (i
, mapnum
, kinds
);
372 case GOMP_MAP_POINTER
:
373 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
374 == GOMP_MAP_FORCE_FROM
,
377 case GOMP_MAP_DELETE
:
378 acc_delete (hostaddrs
[i
], sizes
[i
]);
380 case GOMP_MAP_FORCE_FROM
:
381 acc_copyout (hostaddrs
[i
], sizes
[i
]);
384 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
391 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
392 == GOMP_MAP_FORCE_FROM
, async
, 3);
393 /* See the above comment. */
398 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
402 goacc_wait (int async
, int num_waits
, va_list *ap
)
404 struct goacc_thread
*thr
= goacc_thread ();
405 struct gomp_device_descr
*acc_dev
= thr
->dev
;
409 int qid
= va_arg (*ap
, int);
411 if (acc_async_test (qid
))
414 if (async
== acc_async_sync
)
416 else if (qid
== async
)
417 ;/* If we're waiting on the same asynchronous queue as we're
418 launching on, the queue itself will order work as
419 required, so there's no need to wait explicitly. */
421 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
426 GOACC_update (int device
, size_t mapnum
,
427 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
428 int async
, int num_waits
, ...)
430 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
433 goacc_lazy_initialize ();
435 struct goacc_thread
*thr
= goacc_thread ();
436 struct gomp_device_descr
*acc_dev
= thr
->dev
;
438 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
446 va_start (ap
, num_waits
);
447 goacc_wait (async
, num_waits
, &ap
);
451 acc_dev
->openacc
.async_set_async_func (async
);
453 for (i
= 0; i
< mapnum
; ++i
)
455 unsigned char kind
= kinds
[i
] & 0xff;
459 case GOMP_MAP_POINTER
:
460 case GOMP_MAP_TO_PSET
:
463 case GOMP_MAP_FORCE_TO
:
464 acc_update_device (hostaddrs
[i
], sizes
[i
]);
467 case GOMP_MAP_FORCE_FROM
:
468 acc_update_self (hostaddrs
[i
], sizes
[i
]);
472 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
477 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
481 GOACC_wait (int async
, int num_waits
, ...)
487 va_start (ap
, num_waits
);
488 goacc_wait (async
, num_waits
, &ap
);
491 else if (async
== acc_async_sync
)
493 else if (async
== acc_async_noval
)
494 goacc_thread ()->dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
498 GOACC_get_num_threads (void)
504 GOACC_get_thread_num (void)
510 GOACC_declare (int device
, size_t mapnum
,
511 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
515 for (i
= 0; i
< mapnum
; i
++)
517 unsigned char kind
= kinds
[i
] & 0xff;
519 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
524 case GOMP_MAP_FORCE_ALLOC
:
525 case GOMP_MAP_FORCE_FROM
:
526 case GOMP_MAP_FORCE_TO
:
527 case GOMP_MAP_POINTER
:
528 case GOMP_MAP_DELETE
:
529 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
533 case GOMP_MAP_FORCE_DEVICEPTR
:
537 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
538 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
543 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
549 kinds
[i
] = GOMP_MAP_FORCE_FROM
;
550 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
554 case GOMP_MAP_FORCE_PRESENT
:
555 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
556 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs
[i
],
557 (unsigned long) sizes
[i
]);