1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
40 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
42 if (pos
+ 1 >= mapnum
)
45 unsigned char kind
= kinds
[pos
+1] & 0xff;
47 return kind
== GOMP_MAP_TO_PSET
;
51 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
52 plugins have been loaded if appropriate). The ACC_dev variable for the
53 current thread will be set appropriately for the given device type on
57 select_acc_device (int device_type
)
59 goacc_lazy_initialize ();
61 if (device_type
== GOMP_DEVICE_HOST_FALLBACK
)
64 if (device_type
== acc_device_none
)
65 device_type
= acc_device_host
;
69 /* NOTE: this will go badly if the surrounding data environment is set up
70 to use a different device type. We'll just have to trust that users
71 know what they're doing... */
72 acc_set_device_type (device_type
);
76 static void goacc_wait (int async
, int num_waits
, va_list ap
);
79 GOACC_parallel (int device
, void (*fn
) (void *), const void *offload_table
,
80 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
81 unsigned short *kinds
,
82 int num_gangs
, int num_workers
, int vector_length
,
83 int async
, int num_waits
, ...)
85 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
87 struct goacc_thread
*thr
;
88 struct gomp_device_descr
*acc_dev
;
89 struct target_mem_desc
*tgt
;
92 struct splay_tree_key_s k
;
93 splay_tree_key tgt_fn_key
;
97 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
100 gomp_fatal ("num_workers (%d) different from one is not yet supported",
103 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
104 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
, async
);
106 select_acc_device (device
);
108 thr
= goacc_thread ();
111 /* Host fallback if "if" clause is false or if the current device is set to
115 goacc_save_and_set_bind (acc_device_host
);
117 goacc_restore_bind ();
120 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
126 va_start (ap
, num_waits
);
129 goacc_wait (async
, num_waits
, ap
);
133 acc_dev
->openacc
.async_set_async_func (async
);
135 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
137 k
.host_start
= (uintptr_t) fn
;
138 k
.host_end
= k
.host_start
+ 1;
139 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
140 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
.splay_tree
, &k
);
141 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
143 if (tgt_fn_key
== NULL
)
144 gomp_fatal ("target function wasn't mapped");
146 tgt_fn
= (void (*)) tgt_fn_key
->tgt
->tgt_start
;
149 tgt_fn
= (void (*)) fn
;
151 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
154 devaddrs
= alloca (sizeof (void *) * mapnum
);
155 for (i
= 0; i
< mapnum
; i
++)
156 devaddrs
[i
] = (void *) (tgt
->list
[i
]->tgt
->tgt_start
157 + tgt
->list
[i
]->tgt_offset
);
159 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, sizes
, kinds
,
160 num_gangs
, num_workers
, vector_length
, async
,
163 /* If running synchronously, unmap immediately. */
164 if (async
< acc_async_noval
)
165 gomp_unmap_vars (tgt
, true);
168 gomp_copy_from_async (tgt
);
169 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
172 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
176 GOACC_data_start (int device
, const void *offload_table
, size_t mapnum
,
177 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
179 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
180 struct target_mem_desc
*tgt
;
182 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
183 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
);
185 select_acc_device (device
);
187 struct goacc_thread
*thr
= goacc_thread ();
188 struct gomp_device_descr
*acc_dev
= thr
->dev
;
190 /* Host fallback or 'do nothing'. */
191 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
194 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
195 tgt
->prev
= thr
->mapped_data
;
196 thr
->mapped_data
= tgt
;
201 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
202 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
204 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
205 tgt
->prev
= thr
->mapped_data
;
206 thr
->mapped_data
= tgt
;
210 GOACC_data_end (void)
212 struct goacc_thread
*thr
= goacc_thread ();
213 struct target_mem_desc
*tgt
= thr
->mapped_data
;
215 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
216 thr
->mapped_data
= tgt
->prev
;
217 gomp_unmap_vars (tgt
, true);
218 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
222 GOACC_enter_exit_data (int device
, const void *offload_table
, size_t mapnum
,
223 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
224 int async
, int num_waits
, ...)
226 struct goacc_thread
*thr
;
227 struct gomp_device_descr
*acc_dev
;
228 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
229 bool data_enter
= false;
232 select_acc_device (device
);
234 thr
= goacc_thread ();
237 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
245 va_start (ap
, num_waits
);
247 goacc_wait (async
, num_waits
, ap
);
252 acc_dev
->openacc
.async_set_async_func (async
);
254 /* Determine if this is an "acc enter data". */
255 for (i
= 0; i
< mapnum
; ++i
)
257 unsigned char kind
= kinds
[i
] & 0xff;
259 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
262 if (kind
== GOMP_MAP_FORCE_ALLOC
263 || kind
== GOMP_MAP_FORCE_PRESENT
264 || kind
== GOMP_MAP_FORCE_TO
)
270 if (kind
== GOMP_MAP_FORCE_DEALLOC
271 || kind
== GOMP_MAP_FORCE_FROM
)
274 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
280 for (i
= 0; i
< mapnum
; i
++)
282 unsigned char kind
= kinds
[i
] & 0xff;
284 /* Scan for PSETs. */
285 int psets
= find_pset (i
, mapnum
, kinds
);
291 case GOMP_MAP_POINTER
:
292 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
295 case GOMP_MAP_FORCE_ALLOC
:
296 acc_create (hostaddrs
[i
], sizes
[i
]);
298 case GOMP_MAP_FORCE_PRESENT
:
299 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
301 case GOMP_MAP_FORCE_TO
:
302 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
305 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
313 /* Increment 'i' by two because OpenACC requires fortran
314 arrays to be contiguous, so each PSET is associated with
315 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
322 for (i
= 0; i
< mapnum
; ++i
)
324 unsigned char kind
= kinds
[i
] & 0xff;
326 int psets
= find_pset (i
, mapnum
, kinds
);
332 case GOMP_MAP_POINTER
:
333 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
334 == GOMP_MAP_FORCE_FROM
,
337 case GOMP_MAP_FORCE_DEALLOC
:
338 acc_delete (hostaddrs
[i
], sizes
[i
]);
340 case GOMP_MAP_FORCE_FROM
:
341 acc_copyout (hostaddrs
[i
], sizes
[i
]);
344 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
351 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
352 == GOMP_MAP_FORCE_FROM
, async
, 3);
353 /* See the above comment. */
358 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
362 goacc_wait (int async
, int num_waits
, va_list ap
)
364 struct goacc_thread
*thr
= goacc_thread ();
365 struct gomp_device_descr
*acc_dev
= thr
->dev
;
368 assert (num_waits
>= 0);
370 if (async
== acc_async_sync
&& num_waits
== 0)
376 if (async
== acc_async_sync
&& num_waits
)
378 for (i
= 0; i
< num_waits
; i
++)
380 int qid
= va_arg (ap
, int);
382 if (acc_async_test (qid
))
390 if (async
== acc_async_noval
&& num_waits
== 0)
392 acc_dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
396 for (i
= 0; i
< num_waits
; i
++)
398 int qid
= va_arg (ap
, int);
400 if (acc_async_test (qid
))
403 /* If we're waiting on the same asynchronous queue as we're launching on,
404 the queue itself will order work as required, so there's no need to
407 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
412 GOACC_update (int device
, const void *offload_table
, size_t mapnum
,
413 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
414 int async
, int num_waits
, ...)
416 bool host_fallback
= device
== GOMP_DEVICE_HOST_FALLBACK
;
419 select_acc_device (device
);
421 struct goacc_thread
*thr
= goacc_thread ();
422 struct gomp_device_descr
*acc_dev
= thr
->dev
;
424 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
432 va_start (ap
, num_waits
);
434 goacc_wait (async
, num_waits
, ap
);
439 acc_dev
->openacc
.async_set_async_func (async
);
441 for (i
= 0; i
< mapnum
; ++i
)
443 unsigned char kind
= kinds
[i
] & 0xff;
447 case GOMP_MAP_POINTER
:
448 case GOMP_MAP_TO_PSET
:
451 case GOMP_MAP_FORCE_TO
:
452 acc_update_device (hostaddrs
[i
], sizes
[i
]);
455 case GOMP_MAP_FORCE_FROM
:
456 acc_update_self (hostaddrs
[i
], sizes
[i
]);
460 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
465 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
469 GOACC_wait (int async
, int num_waits
, ...)
473 va_start (ap
, num_waits
);
475 goacc_wait (async
, num_waits
, ap
);
481 GOACC_get_num_threads (void)
487 GOACC_get_thread_num (void)