1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
3 Contributed by Thomas Schwinge <thomas@codesourcery.com>.
5 This file is part of the GNU OpenMP Library (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles OpenACC constructs. */
30 #include "libgomp_g.h"
31 #include "gomp-constants.h"
32 #include "libgomp_target.h"
41 dump_var (char *s
, size_t idx
, void *hostaddr
, size_t size
, unsigned char kind
)
43 gomp_notify(" %2zi: %3s 0x%.2x -", idx
, s
, kind
& 0xff);
47 case 0x00: gomp_notify(" ALLOC "); break;
48 case 0x01: gomp_notify(" ALLOC TO "); break;
49 case 0x02: gomp_notify(" ALLOC FROM "); break;
50 case 0x03: gomp_notify(" ALLOC TOFROM "); break;
51 case 0x04: gomp_notify(" POINTER "); break;
52 case 0x05: gomp_notify(" TO_PSET "); break;
54 case 0x08: gomp_notify(" FORCE_ALLOC "); break;
55 case 0x09: gomp_notify(" FORCE_TO "); break;
56 case 0x0a: gomp_notify(" FORCE_FROM "); break;
57 case 0x0b: gomp_notify(" FORCE_TOFROM "); break;
58 case 0x0c: gomp_notify(" FORCE_PRESENT "); break;
59 case 0x0d: gomp_notify(" FORCE_DEALLOC "); break;
60 case 0x0e: gomp_notify(" FORCE_DEVICEPTR "); break;
62 case 0x18: gomp_notify(" FORCE_PRIVATE "); break;
63 case 0x19: gomp_notify(" FORCE_FIRSTPRIVATE "); break;
65 case (unsigned char) -1: gomp_notify(" DUMMY "); break;
66 default: gomp_notify("UGH! 0x%x\n", kind
);
69 gomp_notify("- %d - %4d/0x%04x ", 1 << (kind
>> 8), (int)size
, (int)size
);
70 gomp_notify("- %p\n", hostaddr
);
76 find_pset (int pos
, size_t mapnum
, unsigned short *kinds
)
78 if (pos
+ 1 >= mapnum
)
81 unsigned char kind
= kinds
[pos
+1] & 0xff;
83 return kind
== GOMP_MAP_TO_PSET
;
87 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
88 plugins have been loaded if appropriate). The ACC_dev variable for the
89 current thread will be set appropriately for the given device type on
93 select_acc_device (int device_type
)
95 ACC_lazy_initialize ();
97 if (device_type
== GOMP_IF_CLAUSE_FALSE
)
100 if (device_type
== acc_device_none
)
101 device_type
= acc_device_host
;
103 if (device_type
>= 0)
105 /* NOTE: this will go badly if the surrounding data environment is set up
106 to use a different device type. We'll just have to trust that users
107 know what they're doing... */
108 acc_set_device_type (device_type
);
112 void goacc_wait (int async
, int num_waits
, va_list ap
);
115 GOACC_parallel (int device
, void (*fn
) (void *), const void *offload_table
,
116 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
117 unsigned short *kinds
,
118 int num_gangs
, int num_workers
, int vector_length
,
119 int async
, int num_waits
, ...)
121 bool if_clause_condition_value
= device
!= GOMP_IF_CLAUSE_FALSE
;
123 struct goacc_thread
*thr
;
124 struct gomp_device_descr
*acc_dev
;
125 struct target_mem_desc
*tgt
;
128 struct splay_tree_key_s k
;
129 splay_tree_key tgt_fn_key
;
132 if (num_workers
!= 1)
133 gomp_fatal ("num_workers (%d) different from one is not yet supported",
136 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
137 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
, async
);
139 select_acc_device (device
);
141 thr
= goacc_thread ();
144 /* Host fallback if "if" clause is false or if the current device is set to
146 if (!if_clause_condition_value
)
148 ACC_save_and_set_bind (acc_device_host
);
153 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
159 va_start (ap
, num_waits
);
162 goacc_wait (async
, num_waits
, ap
);
166 acc_dev
->openacc
.async_set_async_func (async
);
168 if (!(acc_dev
->capabilities
& TARGET_CAP_NATIVE_EXEC
))
170 k
.host_start
= (uintptr_t) fn
;
171 k
.host_end
= k
.host_start
+ 1;
172 gomp_mutex_lock (&acc_dev
->mem_map
.lock
);
173 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
.splay_tree
, &k
);
174 gomp_mutex_unlock (&acc_dev
->mem_map
.lock
);
176 if (tgt_fn_key
== NULL
)
177 gomp_fatal ("target function wasn't mapped: perhaps -fopenacc was "
178 "used without -flto?");
180 tgt_fn
= (void (*)) tgt_fn_key
->tgt
->tgt_start
;
183 tgt_fn
= (void (*)) fn
;
185 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
188 devaddrs
= alloca (sizeof (void *) * mapnum
);
189 for (i
= 0; i
< mapnum
; i
++)
190 devaddrs
[i
] = (void *) (tgt
->list
[i
]->tgt
->tgt_start
191 + tgt
->list
[i
]->tgt_offset
);
193 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, sizes
, kinds
,
194 num_gangs
, num_workers
, vector_length
, async
,
197 /* If running synchronously, unmap immediately. */
198 if (async
< acc_async_noval
)
199 gomp_unmap_vars (tgt
, true);
202 gomp_copy_from_async (tgt
);
203 acc_dev
->openacc
.register_async_cleanup_func (tgt
);
206 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
210 GOACC_data_start (int device
, const void *offload_table
, size_t mapnum
,
211 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
213 bool if_clause_condition_value
= device
!= GOMP_IF_CLAUSE_FALSE
;
214 struct target_mem_desc
*tgt
;
216 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
217 __FUNCTION__
, mapnum
, hostaddrs
, sizes
, kinds
);
219 select_acc_device (device
);
221 struct goacc_thread
*thr
= goacc_thread ();
222 struct gomp_device_descr
*acc_dev
= thr
->dev
;
224 /* Host fallback or 'do nothing'. */
225 if ((acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
226 || !if_clause_condition_value
)
228 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true, false);
229 tgt
->prev
= thr
->mapped_data
;
230 thr
->mapped_data
= tgt
;
235 gomp_notify (" %s: prepare mappings\n", __FUNCTION__
);
236 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
238 gomp_notify (" %s: mappings prepared\n", __FUNCTION__
);
239 tgt
->prev
= thr
->mapped_data
;
240 thr
->mapped_data
= tgt
;
244 GOACC_data_end (void)
246 struct goacc_thread
*thr
= goacc_thread ();
247 struct target_mem_desc
*tgt
= thr
->mapped_data
;
249 gomp_notify (" %s: restore mappings\n", __FUNCTION__
);
250 thr
->mapped_data
= tgt
->prev
;
251 gomp_unmap_vars (tgt
, true);
252 gomp_notify (" %s: mappings restored\n", __FUNCTION__
);
256 GOACC_enter_exit_data (int device
, const void *offload_table
, size_t mapnum
,
257 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
258 int async
, int num_waits
, ...)
260 struct goacc_thread
*thr
;
261 struct gomp_device_descr
*acc_dev
;
262 bool if_clause_condition_value
= device
!= GOMP_IF_CLAUSE_FALSE
;
263 bool data_enter
= false;
266 select_acc_device (device
);
268 thr
= goacc_thread ();
271 if ((acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
272 || !if_clause_condition_value
)
279 va_start (ap
, num_waits
);
281 goacc_wait (async
, num_waits
, ap
);
286 acc_dev
->openacc
.async_set_async_func (async
);
288 /* Determine if this is an "acc enter data". */
289 for (i
= 0; i
< mapnum
; ++i
)
291 unsigned char kind
= kinds
[i
] & 0xff;
293 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
296 if (kind
== GOMP_MAP_FORCE_ALLOC
|| kind
== GOMP_MAP_FORCE_PRESENT
297 || kind
== GOMP_MAP_FORCE_TO
)
303 if (kind
== GOMP_MAP_FORCE_DEALLOC
|| kind
== GOMP_MAP_FORCE_FROM
)
306 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312 for (i
= 0; i
< mapnum
; i
++)
314 unsigned char kind
= kinds
[i
] & 0xff;
316 /* Scan for PSETs. */
317 int psets
= find_pset (i
, mapnum
, kinds
);
323 case GOMP_MAP_POINTER
:
324 gomp_acc_insert_pointer (1, &hostaddrs
[i
], &sizes
[i
],
327 case GOMP_MAP_FORCE_ALLOC
:
328 acc_create (hostaddrs
[i
], sizes
[i
]);
330 case GOMP_MAP_FORCE_PRESENT
:
331 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
333 case GOMP_MAP_FORCE_TO
:
334 acc_present_or_copyin (hostaddrs
[i
], sizes
[i
]);
337 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
344 gomp_acc_insert_pointer (3, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
]);
345 /* Increment 'i' by two because OpenACC requires fortran
346 arrays to be contiguous, so each PSET is associated with
347 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
354 for (i
= 0; i
< mapnum
; ++i
)
356 unsigned char kind
= kinds
[i
] & 0xff;
358 int psets
= find_pset (i
, mapnum
, kinds
);
364 case GOMP_MAP_POINTER
:
365 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
366 == GOMP_MAP_FORCE_FROM
,
369 case GOMP_MAP_FORCE_DEALLOC
:
370 acc_delete (hostaddrs
[i
], sizes
[i
]);
372 case GOMP_MAP_FORCE_FROM
:
373 acc_copyout (hostaddrs
[i
], sizes
[i
]);
376 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
383 gomp_acc_remove_pointer (hostaddrs
[i
], (kinds
[i
] & 0xff)
384 == GOMP_MAP_FORCE_FROM
, async
, 3);
385 /* See the above comment. */
390 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
394 GOACC_kernels (int device
, void (*fn
) (void *), const void *offload_table
,
395 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
396 unsigned short *kinds
,
397 int num_gangs
, int num_workers
, int vector_length
,
398 int async
, int num_waits
, ...)
400 gomp_notify ("%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n", __FUNCTION__
,
401 mapnum
, hostaddrs
, sizes
, kinds
);
405 select_acc_device (device
);
407 va_start (ap
, num_waits
);
410 goacc_wait (async
, num_waits
, ap
);
415 GOACC_parallel (device
, fn
, offload_table
, mapnum
, hostaddrs
, sizes
, kinds
,
416 num_gangs
, num_workers
, vector_length
, async
, num_waits
);
420 goacc_wait (int async
, int num_waits
, va_list ap
)
422 struct goacc_thread
*thr
= goacc_thread ();
423 struct gomp_device_descr
*acc_dev
= thr
->dev
;
426 assert (num_waits
>= 0);
428 if (async
== acc_async_sync
&& num_waits
== 0)
434 if (async
== acc_async_sync
&& num_waits
)
436 for (i
= 0; i
< num_waits
; i
++)
438 int qid
= va_arg (ap
, int);
440 if (acc_async_test (qid
))
448 if (async
== acc_async_noval
&& num_waits
== 0)
450 acc_dev
->openacc
.async_wait_all_async_func (acc_async_noval
);
454 for (i
= 0; i
< num_waits
; i
++)
456 int qid
= va_arg (ap
, int);
458 if (acc_async_test (qid
))
461 /* If we're waiting on the same asynchronous queue as we're launching on,
462 the queue itself will order work as required, so there's no need to
465 acc_dev
->openacc
.async_wait_async_func (qid
, async
);
470 GOACC_update (int device
, const void *offload_table
, size_t mapnum
,
471 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
472 int async
, int num_waits
, ...)
474 bool if_clause_condition_value
= device
!= GOMP_IF_CLAUSE_FALSE
;
477 select_acc_device (device
);
479 struct goacc_thread
*thr
= goacc_thread ();
480 struct gomp_device_descr
*acc_dev
= thr
->dev
;
482 if ((acc_dev
->capabilities
& TARGET_CAP_SHARED_MEM
)
483 || !if_clause_condition_value
)
490 va_start (ap
, num_waits
);
492 goacc_wait (async
, num_waits
, ap
);
497 acc_dev
->openacc
.async_set_async_func (async
);
499 for (i
= 0; i
< mapnum
; ++i
)
501 unsigned char kind
= kinds
[i
] & 0xff;
503 dump_var ("UPD", i
, hostaddrs
[i
], sizes
[i
], kinds
[i
]);
507 case GOMP_MAP_POINTER
:
508 case GOMP_MAP_TO_PSET
:
511 case GOMP_MAP_FORCE_TO
:
512 acc_update_device (hostaddrs
[i
], sizes
[i
]);
515 case GOMP_MAP_FORCE_FROM
:
516 acc_update_self (hostaddrs
[i
], sizes
[i
]);
520 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
525 acc_dev
->openacc
.async_set_async_func (acc_async_sync
);
529 GOACC_wait (int async
, int num_waits
, ...)
533 va_start (ap
, num_waits
);
535 goacc_wait (async
, num_waits
, ap
);
541 GOACC_get_num_threads (void)
547 GOACC_get_thread_num (void)