1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
42 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
44 if (pos
+ 1 >= mapnum
)
47 unsigned char kind
= kinds
[pos
+1] & 0xff;
49 return kind
== GOMP_MAP_TO_PSET
;
52 static void goacc_wait (int async
, int num_waits
, va_list *ap
);
55 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
57 blocks to be copied to/from the device. Varadic arguments are
58 keyed optional parameters terminated with a zero. */
61 GOACC_parallel_keyed (int device
, void (*fn
) (void *),
62 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
63 unsigned short *kinds
, ...)
65 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
67 struct goacc_thread
*thr
;
68 struct gomp_device_descr
*acc_dev
;
69 struct target_mem_desc
*tgt
;
72 struct splay_tree_key_s k
;
73 splay_tree_key tgt_fn_key
;
75 int async
= GOMP_ASYNC_SYNC
;
76 unsigned dims
[GOMP_DIM_MAX
];
79 #ifdef HAVE_INTTYPES_H
80 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
81 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
86 goacc_lazy_initialize ();
88 thr
= goacc_thread ();
91 /* Host fallback if "if" clause is false or if the current device is set to
95 goacc_save_and_set_bind (acc_device_host
);
97 goacc_restore_bind ();
100 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
106 va_start (ap
, kinds
);
107 /* TODO: This will need amending when device_type is implemented. */
108 while ((tag
= va_arg (ap
, unsigned)) != 0)
110 if (GOMP_LAUNCH_DEVICE (tag
))
111 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
112 GOMP_LAUNCH_DEVICE (tag
));
114 switch (GOMP_LAUNCH_CODE (tag
))
116 case GOMP_LAUNCH_DIM
:
118 unsigned mask
= GOMP_LAUNCH_OP (tag
);
120 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
121 if (mask
& GOMP_DIM_MASK (i
))
122 dims
[i
] = va_arg (ap
, unsigned);
126 case GOMP_LAUNCH_ASYNC
:
128 /* Small constant values are encoded in the operand. */
129 async
= GOMP_LAUNCH_OP (tag
);
131 if (async
== GOMP_LAUNCH_OP_MAX
)
132 async
= va_arg (ap
, unsigned);
136 case GOMP_LAUNCH_WAIT
:
138 unsigned num_waits
= GOMP_LAUNCH_OP (tag
);
141 goacc_wait (async
, num_waits
, &ap
);
146 gomp_fatal ("unrecognized offload code '%d',"
147 " libgomp is too old", GOMP_LAUNCH_CODE (tag
));
152 acc_dev
->openacc
.async_set_async_func (async
);
154 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
156 k
.host_start
= (uintptr_t) fn
;
157 k
.host_end
= k
.host_start
+ 1;
158 gomp_mutex_lock (&acc_dev
->lock
);
159 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
, &k
);
160 gomp_mutex_unlock (&acc_dev
->lock
);
162 if (tgt_fn_key
== NULL
)
163 gomp_fatal ("target function wasn't mapped");
165 tgt_fn
= (void (*)) tgt_fn_key
->tgt_offset
;
168 tgt_fn
= (void (*)) fn
;
170 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
171 GOMP_MAP_VARS_OPENACC
);
173 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
174 for (i
= 0; i
< mapnum
; i
++)
175 devaddrs
[i
] = (void *) (tgt
->list
[i
].key
->tgt
->tgt_start
176 + tgt
->list
[i
].key
->tgt_offset
);
178 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
,
181 /* If running synchronously, unmap immediately. */
182 if (async
< acc_async_noval
)
183 gomp_unmap_vars (tgt
, true);
186 gomp_copy_from_async (tgt
);
187 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
190 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
193 /* Legacy entry point, only provide host execution. */
196 GOACC_parallel (int device
, void (*fn
) (void *),
197 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
198 unsigned short *kinds
,
199 int num_gangs
, int num_workers
, int vector_length
,
200 int async
, int num_waits
, ...)
202 goacc_save_and_set_bind (acc_device_host
);
204 goacc_restore_bind ();
208 GOACC_data_start (int device
, size_t mapnum
,
209 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
211 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
212 struct target_mem_desc
*tgt
;
214 #ifdef HAVE_INTTYPES_H
215 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
216 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
218 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
219 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
222 goacc_lazy_initialize ();
224 struct goacc_thread
*thr
= goacc_thread ();
225 struct gomp_device_descr
*acc_dev
= thr
->dev
;
227 /* Host fallback or 'do nothing'. */
228 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
231 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
232 GOMP_MAP_VARS_OPENACC
);
233 tgt
->prev
= thr
->mapped_data
;
234 thr
->mapped_data
= tgt
;
239 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
240 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
241 GOMP_MAP_VARS_OPENACC
);
242 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
243 tgt
->prev
= thr
->mapped_data
;
244 thr
->mapped_data
= tgt
;
248 GOACC_data_end (void)
250 struct goacc_thread
*thr
= goacc_thread ();
251 struct target_mem_desc
*tgt
= thr
->mapped_data
;
253 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
254 thr
->mapped_data
= tgt
->prev
;
255 gomp_unmap_vars (tgt
, true);
256 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
260 GOACC_enter_exit_data (int device
, size_t mapnum
,
261 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
262 int async
, int num_waits
, ...)
264 struct goacc_thread
*thr
;
265 struct gomp_device_descr
*acc_dev
;
266 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
267 bool data_enter
= false;
270 goacc_lazy_initialize ();
272 thr
= goacc_thread ();
275 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
283 va_start (ap
, num_waits
);
284 goacc_wait (async
, num_waits
, &ap
);
288 acc_dev
->openacc
.async_set_async_func (async
);
290 /* Determine if this is an "acc enter data". */
291 for (i
= 0; i
< mapnum
; ++i
)
293 unsigned char kind
= kinds
[i
] & 0xff;
295 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
298 if (kind
== GOMP_MAP_FORCE_ALLOC
299 || kind
== GOMP_MAP_FORCE_PRESENT
300 || kind
== GOMP_MAP_FORCE_TO
)
306 if (kind
== GOMP_MAP_FORCE_DEALLOC
307 || kind
== GOMP_MAP_FORCE_FROM
)
310 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
316 for (i
= 0; i
< mapnum
; i
++)
318 unsigned char kind
= kinds
[i
] & 0xff;
320 /* Scan for PSETs. */
321 int psets
= find_pset (i
, mapnum
, kinds
);
327 case GOMP_MAP_POINTER
:
328 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
331 case GOMP_MAP_FORCE_ALLOC
:
332 acc_create (hostaddrs
[i
], sizes
[i
]);
334 case GOMP_MAP_FORCE_PRESENT
:
335 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
337 case GOMP_MAP_FORCE_TO
:
338 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
341 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
348 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
349 /* Increment 'i' by two because OpenACC requires fortran
350 arrays to be contiguous, so each PSET is associated with
351 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
358 for (i
= 0; i
< mapnum
; ++i
)
360 unsigned char kind
= kinds
[i
] & 0xff;
362 int psets
= find_pset (i
, mapnum
, kinds
);
368 case GOMP_MAP_POINTER
:
369 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
370 == GOMP_MAP_FORCE_FROM
,
373 case GOMP_MAP_FORCE_DEALLOC
:
374 acc_delete (hostaddrs
[i
], sizes
[i
]);
376 case GOMP_MAP_FORCE_FROM
:
377 acc_copyout (hostaddrs
[i
], sizes
[i
]);
380 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
387 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
388 == GOMP_MAP_FORCE_FROM
, async
, 3);
389 /* See the above comment. */
394 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
398 goacc_wait (int async
, int num_waits
, va_list *ap
)
400 struct goacc_thread
*thr
= goacc_thread ();
401 struct gomp_device_descr
*acc_dev
= thr
->dev
;
405 int qid
= va_arg (*ap
, int);
407 if (acc_async_test (qid
))
410 if (async
== acc_async_sync
)
412 else if (qid
== async
)
413 ;/* If we're waiting on the same asynchronous queue as we're
414 launching on, the queue itself will order work as
415 required, so there's no need to wait explicitly. */
417 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
422 GOACC_update (int device
, size_t mapnum
,
423 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
424 int async
, int num_waits
, ...)
426 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
429 goacc_lazy_initialize ();
431 struct goacc_thread
*thr
= goacc_thread ();
432 struct gomp_device_descr
*acc_dev
= thr
->dev
;
434 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
442 va_start (ap
, num_waits
);
443 goacc_wait (async
, num_waits
, &ap
);
447 acc_dev
->openacc
.async_set_async_func (async
);
449 for (i
= 0; i
< mapnum
; ++i
)
451 unsigned char kind
= kinds
[i
] & 0xff;
455 case GOMP_MAP_POINTER
:
456 case GOMP_MAP_TO_PSET
:
459 case GOMP_MAP_FORCE_TO
:
460 acc_update_device (hostaddrs
[i
], sizes
[i
]);
463 case GOMP_MAP_FORCE_FROM
:
464 acc_update_self (hostaddrs
[i
], sizes
[i
]);
468 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
473 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
477 GOACC_wait (int async
, int num_waits
, ...)
483 va_start (ap
, num_waits
);
484 goacc_wait (async
, num_waits
, &ap
);
487 else if (async
== acc_async_sync
)
489 else if (async
== acc_async_noval
)
490 goacc_thread ()->dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
494 GOACC_get_num_threads (void)
500 GOACC_get_thread_num (void)
506 GOACC_declare (int device
, size_t mapnum
,
507 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
511 for (i
= 0; i
< mapnum
; i
++)
513 unsigned char kind
= kinds
[i
] & 0xff;
515 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
520 case GOMP_MAP_FORCE_ALLOC
:
521 case GOMP_MAP_FORCE_DEALLOC
:
522 case GOMP_MAP_FORCE_FROM
:
523 case GOMP_MAP_FORCE_TO
:
524 case GOMP_MAP_POINTER
:
525 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
529 case GOMP_MAP_FORCE_DEVICEPTR
:
533 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
534 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
539 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
545 kinds
[i
] = GOMP_MAP_FORCE_FROM
;
546 GOACC_enter_exit_data (device
, 1, &hostaddrs
[i
], &sizes
[i
],
550 case GOMP_MAP_FORCE_PRESENT
:
551 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
552 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs
[i
],
553 (unsigned long) sizes
[i
]);