1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
39 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
41 if (pos
+ 1 >= mapnum
)
44 unsigned char kind
= kinds
[pos
+1] & 0xff;
46 return kind
== GOMP_MAP_TO_PSET
;
50 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
51 plugins have been loaded if appropriate). The ACC_dev variable for the
52 current thread will be set appropriately for the given device type on
56 select_acc_device (int device_type
)
58 goacc_lazy_initialize ();
60 if (device_type
== GOMP_DEVICE_HOST_FALLBACK
)
63 if (device_type
== acc_device_none
)
64 device_type
= acc_device_host
;
68 /* NOTE: this will go badly if the surrounding data environment is set up
69 to use a different device type. We'll just have to trust that users
70 know what they're doing... */
71 acc_set_device_type (device_type
);
75 static void goacc_wait (int async
, int num_waits
, va_list ap
);
78 GOACC_parallel (int device
, void (*fn
) (void *),
79 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
80 unsigned short *kinds
,
81 int num_gangs
, int num_workers
, int vector_length
,
82 int async
, int num_waits
, ...)
84 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
86 struct goacc_thread
*thr
;
87 struct gomp_device_descr
*acc_dev
;
88 struct target_mem_desc
*tgt
;
91 struct splay_tree_key_s k
;
92 splay_tree_key tgt_fn_key
;
96 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
99 gomp_fatal ("num_workers (%d) different from one is not yet supported",
102 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
103 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
, async
);
105 select_acc_device (device
);
107 thr
= goacc_thread ();
110 /* Host fallback if "if" clause is false or if the current device is set to
114 goacc_save_and_set_bind (acc_device_host
);
116 goacc_restore_bind ();
119 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
125 va_start (ap
, num_waits
);
128 goacc_wait (async
, num_waits
, ap
);
132 acc_dev
->openacc
.async_set_async_func (async
);
134 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
136 k
.host_start
= (uintptr_t) fn
;
137 k
.host_end
= k
.host_start
+ 1;
138 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
139 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
.splay_tree
, &k
);
140 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
142 if (tgt_fn_key
== NULL
)
143 gomp_fatal ("target function wasn't mapped");
145 tgt_fn
= (void (*)) tgt_fn_key
->tgt
->tgt_start
;
148 tgt_fn
= (void (*)) fn
;
150 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
153 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
154 for (i
= 0; i
< mapnum
; i
++)
155 devaddrs
[i
] = (void *) (tgt
->list
[i
]->tgt
->tgt_start
156 + tgt
->list
[i
]->tgt_offset
);
158 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, sizes
, kinds
,
159 num_gangs
, num_workers
, vector_length
, async
,
162 /* If running synchronously, unmap immediately. */
163 if (async
< acc_async_noval
)
164 gomp_unmap_vars (tgt
, true);
167 gomp_copy_from_async (tgt
);
168 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
171 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
175 GOACC_data_start (int device
, size_t mapnum
,
176 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
178 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
179 struct target_mem_desc
*tgt
;
181 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
182 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
);
184 select_acc_device (device
);
186 struct goacc_thread
*thr
= goacc_thread ();
187 struct gomp_device_descr
*acc_dev
= thr
->dev
;
189 /* Host fallback or 'do nothing'. */
190 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
193 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
194 tgt
->prev
= thr
->mapped_data
;
195 thr
->mapped_data
= tgt
;
200 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
201 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
203 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
204 tgt
->prev
= thr
->mapped_data
;
205 thr
->mapped_data
= tgt
;
209 GOACC_data_end (void)
211 struct goacc_thread
*thr
= goacc_thread ();
212 struct target_mem_desc
*tgt
= thr
->mapped_data
;
214 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
215 thr
->mapped_data
= tgt
->prev
;
216 gomp_unmap_vars (tgt
, true);
217 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
221 GOACC_enter_exit_data (int device
, size_t mapnum
,
222 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
223 int async
, int num_waits
, ...)
225 struct goacc_thread
*thr
;
226 struct gomp_device_descr
*acc_dev
;
227 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
228 bool data_enter
= false;
231 select_acc_device (device
);
233 thr
= goacc_thread ();
236 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
244 va_start (ap
, num_waits
);
246 goacc_wait (async
, num_waits
, ap
);
251 acc_dev
->openacc
.async_set_async_func (async
);
253 /* Determine if this is an "acc enter data". */
254 for (i
= 0; i
< mapnum
; ++i
)
256 unsigned char kind
= kinds
[i
] & 0xff;
258 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
261 if (kind
== GOMP_MAP_FORCE_ALLOC
262 || kind
== GOMP_MAP_FORCE_PRESENT
263 || kind
== GOMP_MAP_FORCE_TO
)
269 if (kind
== GOMP_MAP_FORCE_DEALLOC
270 || kind
== GOMP_MAP_FORCE_FROM
)
273 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
279 for (i
= 0; i
< mapnum
; i
++)
281 unsigned char kind
= kinds
[i
] & 0xff;
283 /* Scan for PSETs. */
284 int psets
= find_pset (i
, mapnum
, kinds
);
290 case GOMP_MAP_POINTER
:
291 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
294 case GOMP_MAP_FORCE_ALLOC
:
295 acc_create (hostaddrs
[i
], sizes
[i
]);
297 case GOMP_MAP_FORCE_PRESENT
:
298 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
300 case GOMP_MAP_FORCE_TO
:
301 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
304 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
311 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
312 /* Increment 'i' by two because OpenACC requires fortran
313 arrays to be contiguous, so each PSET is associated with
314 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
321 for (i
= 0; i
< mapnum
; ++i
)
323 unsigned char kind
= kinds
[i
] & 0xff;
325 int psets
= find_pset (i
, mapnum
, kinds
);
331 case GOMP_MAP_POINTER
:
332 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
333 == GOMP_MAP_FORCE_FROM
,
336 case GOMP_MAP_FORCE_DEALLOC
:
337 acc_delete (hostaddrs
[i
], sizes
[i
]);
339 case GOMP_MAP_FORCE_FROM
:
340 acc_copyout (hostaddrs
[i
], sizes
[i
]);
343 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
350 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
351 == GOMP_MAP_FORCE_FROM
, async
, 3);
352 /* See the above comment. */
357 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
361 goacc_wait (int async
, int num_waits
, va_list ap
)
363 struct goacc_thread
*thr
= goacc_thread ();
364 struct gomp_device_descr
*acc_dev
= thr
->dev
;
367 assert (num_waits
>= 0);
369 if (async
== acc_async_sync
&& num_waits
== 0)
375 if (async
== acc_async_sync
&& num_waits
)
377 for (i
= 0; i
< num_waits
; i
++)
379 int qid
= va_arg (ap
, int);
381 if (acc_async_test (qid
))
389 if (async
== acc_async_noval
&& num_waits
== 0)
391 acc_dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
395 for (i
= 0; i
< num_waits
; i
++)
397 int qid
= va_arg (ap
, int);
399 if (acc_async_test (qid
))
402 /* If we're waiting on the same asynchronous queue as we're launching on,
403 the queue itself will order work as required, so there's no need to
406 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
411 GOACC_update (int device
, size_t mapnum
,
412 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
413 int async
, int num_waits
, ...)
415 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
418 select_acc_device (device
);
420 struct goacc_thread
*thr
= goacc_thread ();
421 struct gomp_device_descr
*acc_dev
= thr
->dev
;
423 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
431 va_start (ap
, num_waits
);
433 goacc_wait (async
, num_waits
, ap
);
438 acc_dev
->openacc
.async_set_async_func (async
);
440 for (i
= 0; i
< mapnum
; ++i
)
442 unsigned char kind
= kinds
[i
] & 0xff;
446 case GOMP_MAP_POINTER
:
447 case GOMP_MAP_TO_PSET
:
450 case GOMP_MAP_FORCE_TO
:
451 acc_update_device (hostaddrs
[i
], sizes
[i
]);
454 case GOMP_MAP_FORCE_FROM
:
455 acc_update_self (hostaddrs
[i
], sizes
[i
]);
459 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
464 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
468 GOACC_wait (int async
, int num_waits
, ...)
472 va_start (ap
, num_waits
);
474 goacc_wait (async
, num_waits
, ap
);
480 GOACC_get_num_threads (void)
486 GOACC_get_thread_num (void)