1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock
;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr
{
60 enum offload_target_type type
;
61 const void *host_table
;
62 const void *target_data
;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr
*offload_images
;
68 /* Total number of offload images. */
69 static int num_offload_images
;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr
*devices
;
74 /* Total number of available devices. */
75 static int num_devices
;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp
;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 gomp_realloc_unlock (void *old
, size_t size
)
85 void *ret
= realloc (old
, size
);
88 gomp_mutex_unlock (®ister_lock
);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
95 gomp_init_targets_once (void)
97 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
101 gomp_get_num_devices (void)
103 gomp_init_targets_once ();
104 return num_devices_openmp
;
107 static struct gomp_device_descr
*
108 resolve_device (int device_id
)
110 if (device_id
== GOMP_DEVICE_ICV
)
112 struct gomp_task_icv
*icv
= gomp_icv (false);
113 device_id
= icv
->default_device_var
;
116 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
119 gomp_mutex_lock (&devices
[device_id
].lock
);
120 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
121 gomp_init_device (&devices
[device_id
]);
122 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
124 gomp_mutex_unlock (&devices
[device_id
].lock
);
127 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 return &devices
[device_id
];
133 static inline splay_tree_key
134 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
136 if (key
->host_start
!= key
->host_end
)
137 return splay_tree_lookup (mem_map
, key
);
140 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
145 n
= splay_tree_lookup (mem_map
, key
);
149 return splay_tree_lookup (mem_map
, key
);
152 static inline splay_tree_key
153 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
155 if (key
->host_start
!= key
->host_end
)
156 return splay_tree_lookup (mem_map
, key
);
159 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
165 gomp_device_copy (struct gomp_device_descr
*devicep
,
166 bool (*copy_func
) (int, void *, const void *, size_t),
167 const char *dst
, void *dstaddr
,
168 const char *src
, const void *srcaddr
,
171 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
173 gomp_mutex_unlock (&devicep
->lock
);
174 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
175 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
180 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
181 bool (*copy_func
) (int, void *, const void *, size_t,
182 struct goacc_asyncqueue
*),
183 const char *dst
, void *dstaddr
,
184 const char *src
, const void *srcaddr
,
185 size_t size
, struct goacc_asyncqueue
*aq
)
187 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
189 gomp_mutex_unlock (&devicep
->lock
);
190 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
191 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
195 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
196 host to device memory transfers. */
198 struct gomp_coalesce_chunk
200 /* The starting and ending point of a coalesced chunk of memory. */
204 struct gomp_coalesce_buf
206 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
207 it will be copied to the device. */
209 struct target_mem_desc
*tgt
;
210 /* Array with offsets, chunks[i].start is the starting offset and
211 chunks[i].end ending offset relative to tgt->tgt_start device address
212 of chunks which are to be copied to buf and later copied to device. */
213 struct gomp_coalesce_chunk
*chunks
;
214 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 /* During construction of chunks array, how many memory regions are within
218 the last chunk. If there is just one memory region for a chunk, we copy
219 it directly to device rather than going through buf. */
223 /* Maximum size of memory region considered for coalescing. Larger copies
224 are performed directly. */
225 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
227 /* Maximum size of a gap in between regions to consider them being copied
228 within the same chunk. All the device offsets considered are within
229 newly allocated device memory, so it isn't fatal if we copy some padding
230 in between from host to device. The gaps come either from alignment
231 padding or from memory regions which are not supposed to be copied from
232 host to device (e.g. map(alloc:), map(from:) etc.). */
233 #define MAX_COALESCE_BUF_GAP (4 * 1024)
235 /* Add region with device tgt_start relative offset and length to CBUF. */
238 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
240 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
244 if (cbuf
->chunk_cnt
< 0)
246 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
248 cbuf
->chunk_cnt
= -1;
251 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
253 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
257 /* If the last chunk is only used by one mapping, discard it,
258 as it will be one host to device copy anyway and
259 memcpying it around will only waste cycles. */
260 if (cbuf
->use_cnt
== 1)
263 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
264 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
269 /* Return true for mapping kinds which need to copy data from the
270 host to device for regions that weren't previously mapped. */
273 gomp_to_device_kind_p (int kind
)
279 case GOMP_MAP_FORCE_ALLOC
:
280 case GOMP_MAP_ALWAYS_FROM
:
287 attribute_hidden
void
288 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
289 struct goacc_asyncqueue
*aq
,
290 void *d
, const void *h
, size_t sz
,
291 struct gomp_coalesce_buf
*cbuf
)
295 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
296 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
299 long last
= cbuf
->chunk_cnt
- 1;
300 while (first
<= last
)
302 long middle
= (first
+ last
) >> 1;
303 if (cbuf
->chunks
[middle
].end
<= doff
)
305 else if (cbuf
->chunks
[middle
].start
<= doff
)
307 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
308 gomp_fatal ("internal libgomp cbuf error");
309 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
318 if (__builtin_expect (aq
!= NULL
, 0))
319 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
320 "dev", d
, "host", h
, sz
, aq
);
322 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
325 attribute_hidden
void
326 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
327 struct goacc_asyncqueue
*aq
,
328 void *h
, const void *d
, size_t sz
)
330 if (__builtin_expect (aq
!= NULL
, 0))
331 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
332 "host", h
, "dev", d
, sz
, aq
);
334 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
338 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
340 if (!devicep
->free_func (devicep
->target_id
, devptr
))
342 gomp_mutex_unlock (&devicep
->lock
);
343 gomp_fatal ("error in freeing device memory block at %p", devptr
);
347 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
348 gomp_map_0len_lookup found oldn for newn.
349 Helper function of gomp_map_vars. */
352 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
353 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
354 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
355 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
358 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
359 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
360 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
361 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
363 if ((kind
& GOMP_MAP_FLAG_FORCE
)
364 || oldn
->host_start
> newn
->host_start
365 || oldn
->host_end
< newn
->host_end
)
367 gomp_mutex_unlock (&devicep
->lock
);
368 gomp_fatal ("Trying to map into device [%p..%p) object when "
369 "[%p..%p) is already mapped",
370 (void *) newn
->host_start
, (void *) newn
->host_end
,
371 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
374 if (GOMP_MAP_ALWAYS_TO_P (kind
))
375 gomp_copy_host2dev (devicep
, aq
,
376 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
377 + newn
->host_start
- oldn
->host_start
),
378 (void *) newn
->host_start
,
379 newn
->host_end
- newn
->host_start
, cbuf
);
381 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
386 get_kind (bool short_mapkind
, void *kinds
, int idx
)
388 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
389 : ((unsigned char *) kinds
)[idx
];
393 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
394 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
395 struct gomp_coalesce_buf
*cbuf
)
397 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
398 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
399 struct splay_tree_key_s cur_node
;
401 cur_node
.host_start
= host_ptr
;
402 if (cur_node
.host_start
== (uintptr_t) NULL
)
404 cur_node
.tgt_offset
= (uintptr_t) NULL
;
405 gomp_copy_host2dev (devicep
, aq
,
406 (void *) (tgt
->tgt_start
+ target_offset
),
407 (void *) &cur_node
.tgt_offset
,
408 sizeof (void *), cbuf
);
411 /* Add bias to the pointer value. */
412 cur_node
.host_start
+= bias
;
413 cur_node
.host_end
= cur_node
.host_start
;
414 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
417 gomp_mutex_unlock (&devicep
->lock
);
418 gomp_fatal ("Pointer target of array section wasn't mapped");
420 cur_node
.host_start
-= n
->host_start
;
422 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
423 /* At this point tgt_offset is target address of the
424 array section. Now subtract bias to get what we want
425 to initialize the pointer with. */
426 cur_node
.tgt_offset
-= bias
;
427 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
428 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
432 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
433 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
434 size_t first
, size_t i
, void **hostaddrs
,
435 size_t *sizes
, void *kinds
,
436 struct gomp_coalesce_buf
*cbuf
)
438 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
439 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
440 struct splay_tree_key_s cur_node
;
442 const bool short_mapkind
= true;
443 const int typemask
= short_mapkind
? 0xff : 0x7;
445 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
446 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
447 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
448 kind
= get_kind (short_mapkind
, kinds
, i
);
451 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
453 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
454 &tgt
->list
[i
], kind
& typemask
, cbuf
);
459 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
461 cur_node
.host_start
--;
462 n2
= splay_tree_lookup (mem_map
, &cur_node
);
463 cur_node
.host_start
++;
466 && n2
->host_start
- n
->host_start
467 == n2
->tgt_offset
- n
->tgt_offset
)
469 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
470 &tgt
->list
[i
], kind
& typemask
, cbuf
);
475 n2
= splay_tree_lookup (mem_map
, &cur_node
);
479 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
481 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
482 kind
& typemask
, cbuf
);
486 gomp_mutex_unlock (&devicep
->lock
);
487 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
488 "other mapped elements from the same structure weren't mapped "
489 "together with it", (void *) cur_node
.host_start
,
490 (void *) cur_node
.host_end
);
493 static inline uintptr_t
494 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
496 if (tgt
->list
[i
].key
!= NULL
)
497 return tgt
->list
[i
].key
->tgt
->tgt_start
498 + tgt
->list
[i
].key
->tgt_offset
499 + tgt
->list
[i
].offset
;
500 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
501 return (uintptr_t) hostaddrs
[i
];
502 if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
504 if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
505 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
506 + tgt
->list
[i
+ 1].key
->tgt_offset
507 + tgt
->list
[i
+ 1].offset
508 + (uintptr_t) hostaddrs
[i
]
509 - (uintptr_t) hostaddrs
[i
+ 1];
510 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
513 static inline __attribute__((always_inline
)) struct target_mem_desc
*
514 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
515 struct goacc_asyncqueue
*aq
, size_t mapnum
,
516 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
517 void *kinds
, bool short_mapkind
,
518 enum gomp_map_vars_kind pragma_kind
)
520 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
521 bool has_firstprivate
= false;
522 const int rshift
= short_mapkind
? 8 : 3;
523 const int typemask
= short_mapkind
? 0xff : 0x7;
524 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
525 struct splay_tree_key_s cur_node
;
526 struct target_mem_desc
*tgt
527 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
528 tgt
->list_count
= mapnum
;
529 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
530 tgt
->device_descr
= devicep
;
531 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
540 tgt_align
= sizeof (void *);
546 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
548 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
549 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
552 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
554 size_t align
= 4 * sizeof (void *);
556 tgt_size
= mapnum
* sizeof (void *);
558 cbuf
.use_cnt
= 1 + (mapnum
> 1);
559 cbuf
.chunks
[0].start
= 0;
560 cbuf
.chunks
[0].end
= tgt_size
;
563 gomp_mutex_lock (&devicep
->lock
);
564 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
566 gomp_mutex_unlock (&devicep
->lock
);
571 for (i
= 0; i
< mapnum
; i
++)
573 int kind
= get_kind (short_mapkind
, kinds
, i
);
574 if (hostaddrs
[i
] == NULL
575 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
577 tgt
->list
[i
].key
= NULL
;
578 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
581 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
583 tgt
->list
[i
].key
= NULL
;
588 tgt
->list
[i
].offset
= 0;
591 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
593 size_t first
= i
+ 1;
594 size_t last
= i
+ sizes
[i
];
595 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
596 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
598 tgt
->list
[i
].key
= NULL
;
599 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
600 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
603 size_t align
= (size_t) 1 << (kind
>> rshift
);
604 if (tgt_align
< align
)
606 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
607 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
608 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
609 not_found_cnt
+= last
- i
;
610 for (i
= first
; i
<= last
; i
++)
612 tgt
->list
[i
].key
= NULL
;
613 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
615 gomp_coalesce_buf_add (&cbuf
,
616 tgt_size
- cur_node
.host_end
617 + (uintptr_t) hostaddrs
[i
],
623 for (i
= first
; i
<= last
; i
++)
624 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
629 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
631 tgt
->list
[i
].key
= NULL
;
632 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
633 has_firstprivate
= true;
636 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
637 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
638 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
640 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
641 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
643 tgt
->list
[i
].key
= NULL
;
645 size_t align
= (size_t) 1 << (kind
>> rshift
);
646 if (tgt_align
< align
)
648 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
649 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
650 cur_node
.host_end
- cur_node
.host_start
);
651 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
652 has_firstprivate
= true;
656 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
658 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
661 tgt
->list
[i
].key
= NULL
;
662 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
667 n
= splay_tree_lookup (mem_map
, &cur_node
);
668 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
669 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
670 kind
& typemask
, NULL
);
673 tgt
->list
[i
].key
= NULL
;
675 size_t align
= (size_t) 1 << (kind
>> rshift
);
677 if (tgt_align
< align
)
679 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
680 if (gomp_to_device_kind_p (kind
& typemask
))
681 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
682 cur_node
.host_end
- cur_node
.host_start
);
683 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
684 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
687 for (j
= i
+ 1; j
< mapnum
; j
++)
688 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
691 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
692 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
693 > cur_node
.host_end
))
697 tgt
->list
[j
].key
= NULL
;
708 gomp_mutex_unlock (&devicep
->lock
);
709 gomp_fatal ("unexpected aggregation");
711 tgt
->to_free
= devaddrs
[0];
712 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
713 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
715 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
717 /* Allocate tgt_align aligned tgt_size block of memory. */
718 /* FIXME: Perhaps change interface to allocate properly aligned
720 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
721 tgt_size
+ tgt_align
- 1);
724 gomp_mutex_unlock (&devicep
->lock
);
725 gomp_fatal ("device memory allocation fail");
728 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
729 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
730 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
732 if (cbuf
.use_cnt
== 1)
734 if (cbuf
.chunk_cnt
> 0)
737 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
753 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
754 tgt_size
= mapnum
* sizeof (void *);
757 if (not_found_cnt
|| has_firstprivate
)
760 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
761 splay_tree_node array
= tgt
->array
;
762 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
763 uintptr_t field_tgt_base
= 0;
765 for (i
= 0; i
< mapnum
; i
++)
766 if (tgt
->list
[i
].key
== NULL
)
768 int kind
= get_kind (short_mapkind
, kinds
, i
);
769 if (hostaddrs
[i
] == NULL
)
771 switch (kind
& typemask
)
773 size_t align
, len
, first
, last
;
775 case GOMP_MAP_FIRSTPRIVATE
:
776 align
= (size_t) 1 << (kind
>> rshift
);
777 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
778 tgt
->list
[i
].offset
= tgt_size
;
780 gomp_copy_host2dev (devicep
, aq
,
781 (void *) (tgt
->tgt_start
+ tgt_size
),
782 (void *) hostaddrs
[i
], len
, cbufp
);
785 case GOMP_MAP_FIRSTPRIVATE_INT
:
786 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
788 case GOMP_MAP_USE_DEVICE_PTR
:
789 if (tgt
->list
[i
].offset
== 0)
791 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
792 cur_node
.host_end
= cur_node
.host_start
;
793 n
= gomp_map_lookup (mem_map
, &cur_node
);
796 gomp_mutex_unlock (&devicep
->lock
);
797 gomp_fatal ("use_device_ptr pointer wasn't mapped");
799 cur_node
.host_start
-= n
->host_start
;
801 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
802 + cur_node
.host_start
);
803 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
806 case GOMP_MAP_STRUCT
:
809 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
810 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
812 if (tgt
->list
[first
].key
!= NULL
)
814 n
= splay_tree_lookup (mem_map
, &cur_node
);
817 size_t align
= (size_t) 1 << (kind
>> rshift
);
818 tgt_size
-= (uintptr_t) hostaddrs
[first
]
819 - (uintptr_t) hostaddrs
[i
];
820 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
821 tgt_size
+= (uintptr_t) hostaddrs
[first
]
822 - (uintptr_t) hostaddrs
[i
];
823 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
824 field_tgt_offset
= tgt_size
;
825 field_tgt_clear
= last
;
826 tgt_size
+= cur_node
.host_end
827 - (uintptr_t) hostaddrs
[first
];
830 for (i
= first
; i
<= last
; i
++)
831 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
832 sizes
, kinds
, cbufp
);
835 case GOMP_MAP_ALWAYS_POINTER
:
836 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
837 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
838 n
= splay_tree_lookup (mem_map
, &cur_node
);
840 || n
->host_start
> cur_node
.host_start
841 || n
->host_end
< cur_node
.host_end
)
843 gomp_mutex_unlock (&devicep
->lock
);
844 gomp_fatal ("always pointer not mapped");
846 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
847 != GOMP_MAP_ALWAYS_POINTER
)
848 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
849 if (cur_node
.tgt_offset
)
850 cur_node
.tgt_offset
-= sizes
[i
];
851 gomp_copy_host2dev (devicep
, aq
,
852 (void *) (n
->tgt
->tgt_start
854 + cur_node
.host_start
856 (void *) &cur_node
.tgt_offset
,
857 sizeof (void *), cbufp
);
858 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
859 + cur_node
.host_start
- n
->host_start
;
864 splay_tree_key k
= &array
->key
;
865 k
->host_start
= (uintptr_t) hostaddrs
[i
];
866 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
867 k
->host_end
= k
->host_start
+ sizes
[i
];
869 k
->host_end
= k
->host_start
+ sizeof (void *);
870 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
871 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
872 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
873 kind
& typemask
, cbufp
);
877 if (n
&& n
->refcount
== REFCOUNT_LINK
)
879 /* Replace target address of the pointer with target address
880 of mapped object in the splay tree. */
881 splay_tree_remove (mem_map
, n
);
884 size_t align
= (size_t) 1 << (kind
>> rshift
);
885 tgt
->list
[i
].key
= k
;
887 if (field_tgt_clear
!= ~(size_t) 0)
889 k
->tgt_offset
= k
->host_start
- field_tgt_base
891 if (i
== field_tgt_clear
)
892 field_tgt_clear
= ~(size_t) 0;
896 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
897 k
->tgt_offset
= tgt_size
;
898 tgt_size
+= k
->host_end
- k
->host_start
;
900 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
901 tgt
->list
[i
].always_copy_from
902 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
903 tgt
->list
[i
].offset
= 0;
904 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
906 k
->dynamic_refcount
= 0;
910 splay_tree_insert (mem_map
, array
);
911 switch (kind
& typemask
)
915 case GOMP_MAP_FORCE_ALLOC
:
916 case GOMP_MAP_FORCE_FROM
:
917 case GOMP_MAP_ALWAYS_FROM
:
920 case GOMP_MAP_TOFROM
:
921 case GOMP_MAP_FORCE_TO
:
922 case GOMP_MAP_FORCE_TOFROM
:
923 case GOMP_MAP_ALWAYS_TO
:
924 case GOMP_MAP_ALWAYS_TOFROM
:
925 gomp_copy_host2dev (devicep
, aq
,
926 (void *) (tgt
->tgt_start
928 (void *) k
->host_start
,
929 k
->host_end
- k
->host_start
, cbufp
);
931 case GOMP_MAP_POINTER
:
932 gomp_map_pointer (tgt
, aq
,
933 (uintptr_t) *(void **) k
->host_start
,
934 k
->tgt_offset
, sizes
[i
], cbufp
);
936 case GOMP_MAP_TO_PSET
:
937 gomp_copy_host2dev (devicep
, aq
,
938 (void *) (tgt
->tgt_start
940 (void *) k
->host_start
,
941 k
->host_end
- k
->host_start
, cbufp
);
943 for (j
= i
+ 1; j
< mapnum
; j
++)
944 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
948 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
949 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
954 tgt
->list
[j
].key
= k
;
955 tgt
->list
[j
].copy_from
= false;
956 tgt
->list
[j
].always_copy_from
= false;
957 if (k
->refcount
!= REFCOUNT_INFINITY
)
959 gomp_map_pointer (tgt
, aq
,
960 (uintptr_t) *(void **) hostaddrs
[j
],
962 + ((uintptr_t) hostaddrs
[j
]
968 case GOMP_MAP_FORCE_PRESENT
:
970 /* We already looked up the memory region above and it
972 size_t size
= k
->host_end
- k
->host_start
;
973 gomp_mutex_unlock (&devicep
->lock
);
974 #ifdef HAVE_INTTYPES_H
975 gomp_fatal ("present clause: !acc_is_present (%p, "
976 "%"PRIu64
" (0x%"PRIx64
"))",
977 (void *) k
->host_start
,
978 (uint64_t) size
, (uint64_t) size
);
980 gomp_fatal ("present clause: !acc_is_present (%p, "
981 "%lu (0x%lx))", (void *) k
->host_start
,
982 (unsigned long) size
, (unsigned long) size
);
986 case GOMP_MAP_FORCE_DEVICEPTR
:
987 assert (k
->host_end
- k
->host_start
== sizeof (void *));
988 gomp_copy_host2dev (devicep
, aq
,
989 (void *) (tgt
->tgt_start
991 (void *) k
->host_start
,
992 sizeof (void *), cbufp
);
995 gomp_mutex_unlock (&devicep
->lock
);
996 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1002 /* Set link pointer on target to the device address of the
1004 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1005 /* We intentionally do not use coalescing here, as it's not
1006 data allocated by the current call to this function. */
1007 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1008 &tgt_addr
, sizeof (void *), NULL
);
1015 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1017 for (i
= 0; i
< mapnum
; i
++)
1019 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1020 gomp_copy_host2dev (devicep
, aq
,
1021 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1022 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1030 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1031 gomp_copy_host2dev (devicep
, aq
,
1032 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1033 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1034 - cbuf
.chunks
[0].start
),
1035 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1041 /* If the variable from "omp target enter data" map-list was already mapped,
1042 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1044 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
1050 gomp_mutex_unlock (&devicep
->lock
);
1054 attribute_hidden
struct target_mem_desc
*
1055 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1056 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1057 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1059 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1060 sizes
, kinds
, short_mapkind
, pragma_kind
);
1063 attribute_hidden
struct target_mem_desc
*
1064 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1065 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1066 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1067 void *kinds
, bool short_mapkind
,
1068 enum gomp_map_vars_kind pragma_kind
)
1070 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1071 sizes
, kinds
, short_mapkind
, pragma_kind
);
1074 attribute_hidden
void
1075 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1077 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1079 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1085 attribute_hidden
bool
1086 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1088 bool is_tgt_unmapped
= false;
1089 splay_tree_remove (&devicep
->mem_map
, k
);
1091 splay_tree_insert (&devicep
->mem_map
, (splay_tree_node
) k
->link_key
);
1092 if (k
->tgt
->refcount
> 1)
1096 is_tgt_unmapped
= true;
1097 gomp_unmap_tgt (k
->tgt
);
1099 return is_tgt_unmapped
;
1103 gomp_unref_tgt (void *ptr
)
1105 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1107 if (tgt
->refcount
> 1)
1110 gomp_unmap_tgt (tgt
);
1113 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1114 variables back from device to host: if it is false, it is assumed that this
1115 has been done already. */
1117 static inline __attribute__((always_inline
)) void
1118 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1119 struct goacc_asyncqueue
*aq
)
1121 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1123 if (tgt
->list_count
== 0)
1129 gomp_mutex_lock (&devicep
->lock
);
1130 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1132 gomp_mutex_unlock (&devicep
->lock
);
1139 for (i
= 0; i
< tgt
->list_count
; i
++)
1141 splay_tree_key k
= tgt
->list
[i
].key
;
1145 bool do_unmap
= false;
1146 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1148 else if (k
->refcount
== 1)
1154 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1155 || tgt
->list
[i
].always_copy_from
)
1156 gomp_copy_dev2host (devicep
, aq
,
1157 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1158 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1159 + tgt
->list
[i
].offset
),
1160 tgt
->list
[i
].length
);
1162 gomp_remove_var (devicep
, k
);
1166 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt
,
1169 gomp_unref_tgt ((void *) tgt
);
1171 gomp_mutex_unlock (&devicep
->lock
);
1174 attribute_hidden
void
1175 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1177 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1180 attribute_hidden
void
1181 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1182 struct goacc_asyncqueue
*aq
)
1184 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1188 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1189 size_t *sizes
, void *kinds
, bool short_mapkind
)
1192 struct splay_tree_key_s cur_node
;
1193 const int typemask
= short_mapkind
? 0xff : 0x7;
1201 gomp_mutex_lock (&devicep
->lock
);
1202 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1204 gomp_mutex_unlock (&devicep
->lock
);
1208 for (i
= 0; i
< mapnum
; i
++)
1211 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1212 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1213 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1216 int kind
= get_kind (short_mapkind
, kinds
, i
);
1217 if (n
->host_start
> cur_node
.host_start
1218 || n
->host_end
< cur_node
.host_end
)
1220 gomp_mutex_unlock (&devicep
->lock
);
1221 gomp_fatal ("Trying to update [%p..%p) object when "
1222 "only [%p..%p) is mapped",
1223 (void *) cur_node
.host_start
,
1224 (void *) cur_node
.host_end
,
1225 (void *) n
->host_start
,
1226 (void *) n
->host_end
);
1230 void *hostaddr
= (void *) cur_node
.host_start
;
1231 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1232 + cur_node
.host_start
- n
->host_start
);
1233 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1235 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1236 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1238 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1239 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1242 gomp_mutex_unlock (&devicep
->lock
);
1245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1246 And insert to splay tree the mapping between addresses from HOST_TABLE and
1247 from loaded target image. We rely in the host and device compiler
1248 emitting variable and functions in the same order. */
1251 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1252 const void *host_table
, const void *target_data
,
1253 bool is_register_lock
)
1255 void **host_func_table
= ((void ***) host_table
)[0];
1256 void **host_funcs_end
= ((void ***) host_table
)[1];
1257 void **host_var_table
= ((void ***) host_table
)[2];
1258 void **host_vars_end
= ((void ***) host_table
)[3];
1260 /* The func table contains only addresses, the var table contains addresses
1261 and corresponding sizes. */
1262 int num_funcs
= host_funcs_end
- host_func_table
;
1263 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1265 /* Load image to device and get target addresses for the image. */
1266 struct addr_pair
*target_table
= NULL
;
1267 int i
, num_target_entries
;
1270 = devicep
->load_image_func (devicep
->target_id
, version
,
1271 target_data
, &target_table
);
1273 if (num_target_entries
!= num_funcs
+ num_vars
)
1275 gomp_mutex_unlock (&devicep
->lock
);
1276 if (is_register_lock
)
1277 gomp_mutex_unlock (®ister_lock
);
1278 gomp_fatal ("Cannot map target functions or variables"
1279 " (expected %u, have %u)", num_funcs
+ num_vars
,
1280 num_target_entries
);
1283 /* Insert host-target address mapping into splay tree. */
1284 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1285 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1286 tgt
->refcount
= REFCOUNT_INFINITY
;
1289 tgt
->to_free
= NULL
;
1291 tgt
->list_count
= 0;
1292 tgt
->device_descr
= devicep
;
1293 splay_tree_node array
= tgt
->array
;
1295 for (i
= 0; i
< num_funcs
; i
++)
1297 splay_tree_key k
= &array
->key
;
1298 k
->host_start
= (uintptr_t) host_func_table
[i
];
1299 k
->host_end
= k
->host_start
+ 1;
1301 k
->tgt_offset
= target_table
[i
].start
;
1302 k
->refcount
= REFCOUNT_INFINITY
;
1305 array
->right
= NULL
;
1306 splay_tree_insert (&devicep
->mem_map
, array
);
1310 /* Most significant bit of the size in host and target tables marks
1311 "omp declare target link" variables. */
1312 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1313 const uintptr_t size_mask
= ~link_bit
;
1315 for (i
= 0; i
< num_vars
; i
++)
1317 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1318 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1320 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1322 gomp_mutex_unlock (&devicep
->lock
);
1323 if (is_register_lock
)
1324 gomp_mutex_unlock (®ister_lock
);
1325 gomp_fatal ("Cannot map target variables (size mismatch)");
1328 splay_tree_key k
= &array
->key
;
1329 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1331 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1333 k
->tgt_offset
= target_var
->start
;
1334 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1337 array
->right
= NULL
;
1338 splay_tree_insert (&devicep
->mem_map
, array
);
1342 free (target_table
);
1345 /* Unload the mappings described by target_data from device DEVICE_P.
1346 The device must be locked. */
1349 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1351 const void *host_table
, const void *target_data
)
1353 void **host_func_table
= ((void ***) host_table
)[0];
1354 void **host_funcs_end
= ((void ***) host_table
)[1];
1355 void **host_var_table
= ((void ***) host_table
)[2];
1356 void **host_vars_end
= ((void ***) host_table
)[3];
1358 /* The func table contains only addresses, the var table contains addresses
1359 and corresponding sizes. */
1360 int num_funcs
= host_funcs_end
- host_func_table
;
1361 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1363 struct splay_tree_key_s k
;
1364 splay_tree_key node
= NULL
;
1366 /* Find mapping at start of node array */
1367 if (num_funcs
|| num_vars
)
1369 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1370 : (uintptr_t) host_var_table
[0]);
1371 k
.host_end
= k
.host_start
+ 1;
1372 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1375 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1377 gomp_mutex_unlock (&devicep
->lock
);
1378 gomp_fatal ("image unload fail");
1381 /* Remove mappings from splay tree. */
1383 for (i
= 0; i
< num_funcs
; i
++)
1385 k
.host_start
= (uintptr_t) host_func_table
[i
];
1386 k
.host_end
= k
.host_start
+ 1;
1387 splay_tree_remove (&devicep
->mem_map
, &k
);
1390 /* Most significant bit of the size in host and target tables marks
1391 "omp declare target link" variables. */
1392 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1393 const uintptr_t size_mask
= ~link_bit
;
1394 bool is_tgt_unmapped
= false;
1396 for (i
= 0; i
< num_vars
; i
++)
1398 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1400 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1402 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1403 splay_tree_remove (&devicep
->mem_map
, &k
);
1406 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1407 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1411 if (node
&& !is_tgt_unmapped
)
1418 /* This function should be called from every offload image while loading.
1419 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1420 the target, and TARGET_DATA needed by target plugin. */
1423 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1424 int target_type
, const void *target_data
)
1428 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1429 gomp_fatal ("Library too old for offload (version %u < %u)",
1430 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1432 gomp_mutex_lock (®ister_lock
);
1434 /* Load image to all initialized devices. */
1435 for (i
= 0; i
< num_devices
; i
++)
1437 struct gomp_device_descr
*devicep
= &devices
[i
];
1438 gomp_mutex_lock (&devicep
->lock
);
1439 if (devicep
->type
== target_type
1440 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1441 gomp_load_image_to_device (devicep
, version
,
1442 host_table
, target_data
, true);
1443 gomp_mutex_unlock (&devicep
->lock
);
1446 /* Insert image to array of pending images. */
1448 = gomp_realloc_unlock (offload_images
,
1449 (num_offload_images
+ 1)
1450 * sizeof (struct offload_image_descr
));
1451 offload_images
[num_offload_images
].version
= version
;
1452 offload_images
[num_offload_images
].type
= target_type
;
1453 offload_images
[num_offload_images
].host_table
= host_table
;
1454 offload_images
[num_offload_images
].target_data
= target_data
;
1456 num_offload_images
++;
1457 gomp_mutex_unlock (®ister_lock
);
1461 GOMP_offload_register (const void *host_table
, int target_type
,
1462 const void *target_data
)
1464 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1467 /* This function should be called from every offload image while unloading.
1468 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1469 the target, and TARGET_DATA needed by target plugin. */
1472 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1473 int target_type
, const void *target_data
)
1477 gomp_mutex_lock (®ister_lock
);
1479 /* Unload image from all initialized devices. */
1480 for (i
= 0; i
< num_devices
; i
++)
1482 struct gomp_device_descr
*devicep
= &devices
[i
];
1483 gomp_mutex_lock (&devicep
->lock
);
1484 if (devicep
->type
== target_type
1485 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1486 gomp_unload_image_from_device (devicep
, version
,
1487 host_table
, target_data
);
1488 gomp_mutex_unlock (&devicep
->lock
);
1491 /* Remove image from array of pending images. */
1492 for (i
= 0; i
< num_offload_images
; i
++)
1493 if (offload_images
[i
].target_data
== target_data
)
1495 offload_images
[i
] = offload_images
[--num_offload_images
];
1499 gomp_mutex_unlock (®ister_lock
);
1503 GOMP_offload_unregister (const void *host_table
, int target_type
,
1504 const void *target_data
)
1506 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1509 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1510 must be locked on entry, and remains locked on return. */
1512 attribute_hidden
void
1513 gomp_init_device (struct gomp_device_descr
*devicep
)
1516 if (!devicep
->init_device_func (devicep
->target_id
))
1518 gomp_mutex_unlock (&devicep
->lock
);
1519 gomp_fatal ("device initialization failed");
1522 /* Load to device all images registered by the moment. */
1523 for (i
= 0; i
< num_offload_images
; i
++)
1525 struct offload_image_descr
*image
= &offload_images
[i
];
1526 if (image
->type
== devicep
->type
)
1527 gomp_load_image_to_device (devicep
, image
->version
,
1528 image
->host_table
, image
->target_data
,
1532 /* Initialize OpenACC asynchronous queues. */
1533 goacc_init_asyncqueues (devicep
);
1535 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1538 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1539 must be locked on entry, and remains locked on return. */
1541 attribute_hidden
bool
1542 gomp_fini_device (struct gomp_device_descr
*devicep
)
1544 bool ret
= goacc_fini_asyncqueues (devicep
);
1545 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1546 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1550 attribute_hidden
void
1551 gomp_unload_device (struct gomp_device_descr
*devicep
)
1553 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1557 /* Unload from device all images registered at the moment. */
1558 for (i
= 0; i
< num_offload_images
; i
++)
1560 struct offload_image_descr
*image
= &offload_images
[i
];
1561 if (image
->type
== devicep
->type
)
1562 gomp_unload_image_from_device (devicep
, image
->version
,
1564 image
->target_data
);
1569 /* Free address mapping tables. MM must be locked on entry, and remains locked
1572 attribute_hidden
void
1573 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1575 while (mem_map
->root
)
1577 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1579 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1585 /* Host fallback for GOMP_target{,_ext} routines. */
1588 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1590 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1592 memset (thr
, '\0', sizeof (*thr
));
1593 if (gomp_places_list
)
1595 thr
->place
= old_thr
.place
;
1596 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1599 gomp_free_thread (thr
);
1603 /* Calculate alignment and size requirements of a private copy of data shared
1604 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1607 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1608 unsigned short *kinds
, size_t *tgt_align
,
1612 for (i
= 0; i
< mapnum
; i
++)
1613 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1615 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1616 if (*tgt_align
< align
)
1618 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1619 *tgt_size
+= sizes
[i
];
1623 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1626 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1627 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1630 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1632 tgt
+= tgt_align
- al
;
1635 for (i
= 0; i
< mapnum
; i
++)
1636 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1638 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1639 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1640 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1641 hostaddrs
[i
] = tgt
+ tgt_size
;
1642 tgt_size
= tgt_size
+ sizes
[i
];
1646 /* Helper function of GOMP_target{,_ext} routines. */
1649 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1650 void (*host_fn
) (void *))
1652 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1653 return (void *) host_fn
;
1656 gomp_mutex_lock (&devicep
->lock
);
1657 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1659 gomp_mutex_unlock (&devicep
->lock
);
1663 struct splay_tree_key_s k
;
1664 k
.host_start
= (uintptr_t) host_fn
;
1665 k
.host_end
= k
.host_start
+ 1;
1666 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1667 gomp_mutex_unlock (&devicep
->lock
);
1671 return (void *) tgt_fn
->tgt_offset
;
1675 /* Called when encountering a target directive. If DEVICE
1676 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1677 GOMP_DEVICE_HOST_FALLBACK (or any value
1678 larger than last available hw device), use host fallback.
1679 FN is address of host code, UNUSED is part of the current ABI, but
1680 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1681 with MAPNUM entries, with addresses of the host objects,
1682 sizes of the host objects (resp. for pointer kind pointer bias
1683 and assumed sizeof (void *) size) and kinds. */
1686 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1687 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1688 unsigned char *kinds
)
1690 struct gomp_device_descr
*devicep
= resolve_device (device
);
1694 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1695 /* All shared memory devices should use the GOMP_target_ext function. */
1696 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1697 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1698 return gomp_target_fallback (fn
, hostaddrs
);
1700 struct target_mem_desc
*tgt_vars
1701 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1702 GOMP_MAP_VARS_TARGET
);
1703 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1705 gomp_unmap_vars (tgt_vars
, true);
1708 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1709 and several arguments have been added:
1710 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1711 DEPEND is array of dependencies, see GOMP_task for details.
1713 ARGS is a pointer to an array consisting of a variable number of both
1714 device-independent and device-specific arguments, which can take one two
1715 elements where the first specifies for which device it is intended, the type
1716 and optionally also the value. If the value is not present in the first
1717 one, the whole second element the actual value. The last element of the
1718 array is a single NULL. Among the device independent can be for example
1719 NUM_TEAMS and THREAD_LIMIT.
1721 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1722 that value, or 1 if teams construct is not present, or 0, if
1723 teams construct does not have num_teams clause and so the choice is
1724 implementation defined, and -1 if it can't be determined on the host
1725 what value will GOMP_teams have on the device.
1726 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1727 body with that value, or 0, if teams construct does not have thread_limit
1728 clause or the teams construct is not present, or -1 if it can't be
1729 determined on the host what value will GOMP_teams have on the device. */
1732 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1733 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1734 unsigned int flags
, void **depend
, void **args
)
1736 struct gomp_device_descr
*devicep
= resolve_device (device
);
1737 size_t tgt_align
= 0, tgt_size
= 0;
1738 bool fpc_done
= false;
1740 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1742 struct gomp_thread
*thr
= gomp_thread ();
1743 /* Create a team if we don't have any around, as nowait
1744 target tasks make sense to run asynchronously even when
1745 outside of any parallel. */
1746 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1748 struct gomp_team
*team
= gomp_new_team (1);
1749 struct gomp_task
*task
= thr
->task
;
1750 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1751 team
->prev_ts
= thr
->ts
;
1752 thr
->ts
.team
= team
;
1753 thr
->ts
.team_id
= 0;
1754 thr
->ts
.work_share
= &team
->work_shares
[0];
1755 thr
->ts
.last_work_share
= NULL
;
1756 #ifdef HAVE_SYNC_BUILTINS
1757 thr
->ts
.single_count
= 0;
1759 thr
->ts
.static_trip
= 0;
1760 thr
->task
= &team
->implicit_task
[0];
1761 gomp_init_task (thr
->task
, NULL
, icv
);
1767 thr
->task
= &team
->implicit_task
[0];
1770 pthread_setspecific (gomp_thread_destructor
, thr
);
1773 && !thr
->task
->final_task
)
1775 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1776 sizes
, kinds
, flags
, depend
, args
,
1777 GOMP_TARGET_TASK_BEFORE_MAP
);
1782 /* If there are depend clauses, but nowait is not present
1783 (or we are in a final task), block the parent task until the
1784 dependencies are resolved and then just continue with the rest
1785 of the function as if it is a merged task. */
1788 struct gomp_thread
*thr
= gomp_thread ();
1789 if (thr
->task
&& thr
->task
->depend_hash
)
1791 /* If we might need to wait, copy firstprivate now. */
1792 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1793 &tgt_align
, &tgt_size
);
1796 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1797 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1798 tgt_align
, tgt_size
);
1801 gomp_task_maybe_wait_for_dependencies (depend
);
1807 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1808 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1809 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1813 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1814 &tgt_align
, &tgt_size
);
1817 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1818 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1819 tgt_align
, tgt_size
);
1822 gomp_target_fallback (fn
, hostaddrs
);
1826 struct target_mem_desc
*tgt_vars
;
1827 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1831 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1832 &tgt_align
, &tgt_size
);
1835 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1836 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1837 tgt_align
, tgt_size
);
1843 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1844 true, GOMP_MAP_VARS_TARGET
);
1845 devicep
->run_func (devicep
->target_id
, fn_addr
,
1846 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1849 gomp_unmap_vars (tgt_vars
, true);
1852 /* Host fallback for GOMP_target_data{,_ext} routines. */
1855 gomp_target_data_fallback (void)
1857 struct gomp_task_icv
*icv
= gomp_icv (false);
1858 if (icv
->target_data
)
1860 /* Even when doing a host fallback, if there are any active
1861 #pragma omp target data constructs, need to remember the
1862 new #pragma omp target data, otherwise GOMP_target_end_data
1863 would get out of sync. */
1864 struct target_mem_desc
*tgt
1865 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1866 GOMP_MAP_VARS_DATA
);
1867 tgt
->prev
= icv
->target_data
;
1868 icv
->target_data
= tgt
;
1873 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1874 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1876 struct gomp_device_descr
*devicep
= resolve_device (device
);
1879 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1880 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1881 return gomp_target_data_fallback ();
1883 struct target_mem_desc
*tgt
1884 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1885 GOMP_MAP_VARS_DATA
);
1886 struct gomp_task_icv
*icv
= gomp_icv (true);
1887 tgt
->prev
= icv
->target_data
;
1888 icv
->target_data
= tgt
;
1892 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1893 size_t *sizes
, unsigned short *kinds
)
1895 struct gomp_device_descr
*devicep
= resolve_device (device
);
1898 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1899 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1900 return gomp_target_data_fallback ();
1902 struct target_mem_desc
*tgt
1903 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1904 GOMP_MAP_VARS_DATA
);
1905 struct gomp_task_icv
*icv
= gomp_icv (true);
1906 tgt
->prev
= icv
->target_data
;
1907 icv
->target_data
= tgt
;
1911 GOMP_target_end_data (void)
1913 struct gomp_task_icv
*icv
= gomp_icv (false);
1914 if (icv
->target_data
)
1916 struct target_mem_desc
*tgt
= icv
->target_data
;
1917 icv
->target_data
= tgt
->prev
;
1918 gomp_unmap_vars (tgt
, true);
1923 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1924 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1926 struct gomp_device_descr
*devicep
= resolve_device (device
);
1929 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1930 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1933 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1937 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1938 size_t *sizes
, unsigned short *kinds
,
1939 unsigned int flags
, void **depend
)
1941 struct gomp_device_descr
*devicep
= resolve_device (device
);
1943 /* If there are depend clauses, but nowait is not present,
1944 block the parent task until the dependencies are resolved
1945 and then just continue with the rest of the function as if it
1946 is a merged task. Until we are able to schedule task during
1947 variable mapping or unmapping, ignore nowait if depend clauses
1951 struct gomp_thread
*thr
= gomp_thread ();
1952 if (thr
->task
&& thr
->task
->depend_hash
)
1954 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1956 && !thr
->task
->final_task
)
1958 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1959 mapnum
, hostaddrs
, sizes
, kinds
,
1960 flags
| GOMP_TARGET_FLAG_UPDATE
,
1961 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
1966 struct gomp_team
*team
= thr
->ts
.team
;
1967 /* If parallel or taskgroup has been cancelled, don't start new
1969 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1971 if (gomp_team_barrier_cancelled (&team
->barrier
))
1973 if (thr
->task
->taskgroup
)
1975 if (thr
->task
->taskgroup
->cancelled
)
1977 if (thr
->task
->taskgroup
->workshare
1978 && thr
->task
->taskgroup
->prev
1979 && thr
->task
->taskgroup
->prev
->cancelled
)
1984 gomp_task_maybe_wait_for_dependencies (depend
);
1990 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1991 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1994 struct gomp_thread
*thr
= gomp_thread ();
1995 struct gomp_team
*team
= thr
->ts
.team
;
1996 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1997 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1999 if (gomp_team_barrier_cancelled (&team
->barrier
))
2001 if (thr
->task
->taskgroup
)
2003 if (thr
->task
->taskgroup
->cancelled
)
2005 if (thr
->task
->taskgroup
->workshare
2006 && thr
->task
->taskgroup
->prev
2007 && thr
->task
->taskgroup
->prev
->cancelled
)
2012 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2016 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2017 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2019 const int typemask
= 0xff;
2021 gomp_mutex_lock (&devicep
->lock
);
2022 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2024 gomp_mutex_unlock (&devicep
->lock
);
2028 for (i
= 0; i
< mapnum
; i
++)
2030 struct splay_tree_key_s cur_node
;
2031 unsigned char kind
= kinds
[i
] & typemask
;
2035 case GOMP_MAP_ALWAYS_FROM
:
2036 case GOMP_MAP_DELETE
:
2037 case GOMP_MAP_RELEASE
:
2038 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2039 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2040 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2041 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2042 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2043 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2044 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2045 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2049 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2051 if ((kind
== GOMP_MAP_DELETE
2052 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2053 && k
->refcount
!= REFCOUNT_INFINITY
)
2056 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2057 || kind
== GOMP_MAP_ALWAYS_FROM
)
2058 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2059 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2060 + cur_node
.host_start
2062 cur_node
.host_end
- cur_node
.host_start
);
2063 if (k
->refcount
== 0)
2065 splay_tree_remove (&devicep
->mem_map
, k
);
2067 splay_tree_insert (&devicep
->mem_map
,
2068 (splay_tree_node
) k
->link_key
);
2069 if (k
->tgt
->refcount
> 1)
2072 gomp_unmap_tgt (k
->tgt
);
2077 gomp_mutex_unlock (&devicep
->lock
);
2078 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2083 gomp_mutex_unlock (&devicep
->lock
);
2087 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2088 size_t *sizes
, unsigned short *kinds
,
2089 unsigned int flags
, void **depend
)
2091 struct gomp_device_descr
*devicep
= resolve_device (device
);
2093 /* If there are depend clauses, but nowait is not present,
2094 block the parent task until the dependencies are resolved
2095 and then just continue with the rest of the function as if it
2096 is a merged task. Until we are able to schedule task during
2097 variable mapping or unmapping, ignore nowait if depend clauses
2101 struct gomp_thread
*thr
= gomp_thread ();
2102 if (thr
->task
&& thr
->task
->depend_hash
)
2104 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2106 && !thr
->task
->final_task
)
2108 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2109 mapnum
, hostaddrs
, sizes
, kinds
,
2110 flags
, depend
, NULL
,
2111 GOMP_TARGET_TASK_DATA
))
2116 struct gomp_team
*team
= thr
->ts
.team
;
2117 /* If parallel or taskgroup has been cancelled, don't start new
2119 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2121 if (gomp_team_barrier_cancelled (&team
->barrier
))
2123 if (thr
->task
->taskgroup
)
2125 if (thr
->task
->taskgroup
->cancelled
)
2127 if (thr
->task
->taskgroup
->workshare
2128 && thr
->task
->taskgroup
->prev
2129 && thr
->task
->taskgroup
->prev
->cancelled
)
2134 gomp_task_maybe_wait_for_dependencies (depend
);
2140 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2141 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2144 struct gomp_thread
*thr
= gomp_thread ();
2145 struct gomp_team
*team
= thr
->ts
.team
;
2146 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2147 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2149 if (gomp_team_barrier_cancelled (&team
->barrier
))
2151 if (thr
->task
->taskgroup
)
2153 if (thr
->task
->taskgroup
->cancelled
)
2155 if (thr
->task
->taskgroup
->workshare
2156 && thr
->task
->taskgroup
->prev
2157 && thr
->task
->taskgroup
->prev
->cancelled
)
2163 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2164 for (i
= 0; i
< mapnum
; i
++)
2165 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2167 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2168 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2172 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2173 true, GOMP_MAP_VARS_ENTER_DATA
);
2175 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2179 gomp_target_task_fn (void *data
)
2181 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2182 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2184 if (ttask
->fn
!= NULL
)
2188 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2189 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2190 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2192 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2193 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2197 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2200 gomp_unmap_vars (ttask
->tgt
, true);
2204 void *actual_arguments
;
2205 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2208 actual_arguments
= ttask
->hostaddrs
;
2212 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2213 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2214 GOMP_MAP_VARS_TARGET
);
2215 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2217 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2219 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2220 ttask
->args
, (void *) ttask
);
2223 else if (devicep
== NULL
2224 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2225 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2229 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2230 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2231 ttask
->kinds
, true);
2232 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2233 for (i
= 0; i
< ttask
->mapnum
; i
++)
2234 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2236 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2237 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2238 GOMP_MAP_VARS_ENTER_DATA
);
2239 i
+= ttask
->sizes
[i
];
2242 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2243 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2245 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2251 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2255 struct gomp_task_icv
*icv
= gomp_icv (true);
2256 icv
->thread_limit_var
2257 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2263 omp_target_alloc (size_t size
, int device_num
)
2265 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2266 return malloc (size
);
2271 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2272 if (devicep
== NULL
)
2275 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2276 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2277 return malloc (size
);
2279 gomp_mutex_lock (&devicep
->lock
);
2280 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2281 gomp_mutex_unlock (&devicep
->lock
);
2286 omp_target_free (void *device_ptr
, int device_num
)
2288 if (device_ptr
== NULL
)
2291 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2300 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2301 if (devicep
== NULL
)
2304 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2305 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2311 gomp_mutex_lock (&devicep
->lock
);
2312 gomp_free_device_memory (devicep
, device_ptr
);
2313 gomp_mutex_unlock (&devicep
->lock
);
2317 omp_target_is_present (const void *ptr
, int device_num
)
2322 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2328 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2329 if (devicep
== NULL
)
2332 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2333 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2336 gomp_mutex_lock (&devicep
->lock
);
2337 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2338 struct splay_tree_key_s cur_node
;
2340 cur_node
.host_start
= (uintptr_t) ptr
;
2341 cur_node
.host_end
= cur_node
.host_start
;
2342 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2343 int ret
= n
!= NULL
;
2344 gomp_mutex_unlock (&devicep
->lock
);
2349 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2350 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2353 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2356 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2358 if (dst_device_num
< 0)
2361 dst_devicep
= resolve_device (dst_device_num
);
2362 if (dst_devicep
== NULL
)
2365 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2366 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2369 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2371 if (src_device_num
< 0)
2374 src_devicep
= resolve_device (src_device_num
);
2375 if (src_devicep
== NULL
)
2378 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2379 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2382 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2384 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2387 if (src_devicep
== NULL
)
2389 gomp_mutex_lock (&dst_devicep
->lock
);
2390 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2391 (char *) dst
+ dst_offset
,
2392 (char *) src
+ src_offset
, length
);
2393 gomp_mutex_unlock (&dst_devicep
->lock
);
2394 return (ret
? 0 : EINVAL
);
2396 if (dst_devicep
== NULL
)
2398 gomp_mutex_lock (&src_devicep
->lock
);
2399 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2400 (char *) dst
+ dst_offset
,
2401 (char *) src
+ src_offset
, length
);
2402 gomp_mutex_unlock (&src_devicep
->lock
);
2403 return (ret
? 0 : EINVAL
);
2405 if (src_devicep
== dst_devicep
)
2407 gomp_mutex_lock (&src_devicep
->lock
);
2408 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2409 (char *) dst
+ dst_offset
,
2410 (char *) src
+ src_offset
, length
);
2411 gomp_mutex_unlock (&src_devicep
->lock
);
2412 return (ret
? 0 : EINVAL
);
2418 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2419 int num_dims
, const size_t *volume
,
2420 const size_t *dst_offsets
,
2421 const size_t *src_offsets
,
2422 const size_t *dst_dimensions
,
2423 const size_t *src_dimensions
,
2424 struct gomp_device_descr
*dst_devicep
,
2425 struct gomp_device_descr
*src_devicep
)
2427 size_t dst_slice
= element_size
;
2428 size_t src_slice
= element_size
;
2429 size_t j
, dst_off
, src_off
, length
;
2434 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2435 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2436 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2438 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2440 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2444 else if (src_devicep
== NULL
)
2445 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2446 (char *) dst
+ dst_off
,
2447 (const char *) src
+ src_off
,
2449 else if (dst_devicep
== NULL
)
2450 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2451 (char *) dst
+ dst_off
,
2452 (const char *) src
+ src_off
,
2454 else if (src_devicep
== dst_devicep
)
2455 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2456 (char *) dst
+ dst_off
,
2457 (const char *) src
+ src_off
,
2461 return ret
? 0 : EINVAL
;
2464 /* FIXME: it would be nice to have some plugin function to handle
2465 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2466 be handled in the generic recursion below, and for host-host it
2467 should be used even for any num_dims >= 2. */
2469 for (i
= 1; i
< num_dims
; i
++)
2470 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2471 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2473 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2474 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2476 for (j
= 0; j
< volume
[0]; j
++)
2478 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2479 (const char *) src
+ src_off
,
2480 element_size
, num_dims
- 1,
2481 volume
+ 1, dst_offsets
+ 1,
2482 src_offsets
+ 1, dst_dimensions
+ 1,
2483 src_dimensions
+ 1, dst_devicep
,
2487 dst_off
+= dst_slice
;
2488 src_off
+= src_slice
;
2494 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2495 int num_dims
, const size_t *volume
,
2496 const size_t *dst_offsets
,
2497 const size_t *src_offsets
,
2498 const size_t *dst_dimensions
,
2499 const size_t *src_dimensions
,
2500 int dst_device_num
, int src_device_num
)
2502 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2507 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2509 if (dst_device_num
< 0)
2512 dst_devicep
= resolve_device (dst_device_num
);
2513 if (dst_devicep
== NULL
)
2516 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2517 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2520 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2522 if (src_device_num
< 0)
2525 src_devicep
= resolve_device (src_device_num
);
2526 if (src_devicep
== NULL
)
2529 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2530 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2534 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2538 gomp_mutex_lock (&src_devicep
->lock
);
2539 else if (dst_devicep
)
2540 gomp_mutex_lock (&dst_devicep
->lock
);
2541 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2542 volume
, dst_offsets
, src_offsets
,
2543 dst_dimensions
, src_dimensions
,
2544 dst_devicep
, src_devicep
);
2546 gomp_mutex_unlock (&src_devicep
->lock
);
2547 else if (dst_devicep
)
2548 gomp_mutex_unlock (&dst_devicep
->lock
);
2553 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2554 size_t size
, size_t device_offset
, int device_num
)
2556 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2562 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2563 if (devicep
== NULL
)
2566 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2567 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2570 gomp_mutex_lock (&devicep
->lock
);
2572 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2573 struct splay_tree_key_s cur_node
;
2576 cur_node
.host_start
= (uintptr_t) host_ptr
;
2577 cur_node
.host_end
= cur_node
.host_start
+ size
;
2578 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2581 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2582 == (uintptr_t) device_ptr
+ device_offset
2583 && n
->host_start
<= cur_node
.host_start
2584 && n
->host_end
>= cur_node
.host_end
)
2589 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2590 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2594 tgt
->to_free
= NULL
;
2596 tgt
->list_count
= 0;
2597 tgt
->device_descr
= devicep
;
2598 splay_tree_node array
= tgt
->array
;
2599 splay_tree_key k
= &array
->key
;
2600 k
->host_start
= cur_node
.host_start
;
2601 k
->host_end
= cur_node
.host_end
;
2603 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2604 k
->refcount
= REFCOUNT_INFINITY
;
2606 array
->right
= NULL
;
2607 splay_tree_insert (&devicep
->mem_map
, array
);
2610 gomp_mutex_unlock (&devicep
->lock
);
2615 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2617 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2623 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2624 if (devicep
== NULL
)
2627 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2630 gomp_mutex_lock (&devicep
->lock
);
2632 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2633 struct splay_tree_key_s cur_node
;
2636 cur_node
.host_start
= (uintptr_t) ptr
;
2637 cur_node
.host_end
= cur_node
.host_start
;
2638 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2640 && n
->host_start
== cur_node
.host_start
2641 && n
->refcount
== REFCOUNT_INFINITY
2642 && n
->tgt
->tgt_start
== 0
2643 && n
->tgt
->to_free
== NULL
2644 && n
->tgt
->refcount
== 1
2645 && n
->tgt
->list_count
== 0)
2647 splay_tree_remove (&devicep
->mem_map
, n
);
2648 gomp_unmap_tgt (n
->tgt
);
2652 gomp_mutex_unlock (&devicep
->lock
);
2657 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2660 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2661 return gomp_pause_host ();
2662 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2664 /* Do nothing for target devices for now. */
2669 omp_pause_resource_all (omp_pause_resource_t kind
)
2672 if (gomp_pause_host ())
2674 /* Do nothing for target devices for now. */
2678 ialias (omp_pause_resource
)
2679 ialias (omp_pause_resource_all
)
2681 #ifdef PLUGIN_SUPPORT
2683 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2685 The handles of the found functions are stored in the corresponding fields
2686 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2689 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2690 const char *plugin_name
)
2692 const char *err
= NULL
, *last_missing
= NULL
;
2694 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2698 /* Check if all required functions are available in the plugin and store
2699 their handlers. None of the symbols can legitimately be NULL,
2700 so we don't need to check dlerror all the time. */
2702 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2704 /* Similar, but missing functions are not an error. Return false if
2705 failed, true otherwise. */
2706 #define DLSYM_OPT(f, n) \
2707 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2708 || (last_missing = #n, 0))
2711 if (device
->version_func () != GOMP_VERSION
)
2713 err
= "plugin version mismatch";
2720 DLSYM (get_num_devices
);
2721 DLSYM (init_device
);
2722 DLSYM (fini_device
);
2724 DLSYM (unload_image
);
2729 device
->capabilities
= device
->get_caps_func ();
2730 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2734 DLSYM_OPT (can_run
, can_run
);
2737 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2739 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
2740 || !DLSYM_OPT (openacc
.create_thread_data
,
2741 openacc_create_thread_data
)
2742 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2743 openacc_destroy_thread_data
)
2744 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
2745 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
2746 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
2747 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
2748 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
2749 || !DLSYM_OPT (openacc
.async
.queue_callback
,
2750 openacc_async_queue_callback
)
2751 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
2752 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
2753 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
))
2755 /* Require all the OpenACC handlers if we have
2756 GOMP_OFFLOAD_CAP_OPENACC_200. */
2757 err
= "plugin missing OpenACC handler function";
2762 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2763 openacc_cuda_get_current_device
);
2764 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2765 openacc_cuda_get_current_context
);
2766 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
2767 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
2768 if (cuda
&& cuda
!= 4)
2770 /* Make sure all the CUDA functions are there if any of them are. */
2771 err
= "plugin missing OpenACC CUDA handler function";
2783 gomp_error ("while loading %s: %s", plugin_name
, err
);
2785 gomp_error ("missing function was %s", last_missing
);
2787 dlclose (plugin_handle
);
2792 /* This function finalizes all initialized devices. */
2795 gomp_target_fini (void)
2798 for (i
= 0; i
< num_devices
; i
++)
2801 struct gomp_device_descr
*devicep
= &devices
[i
];
2802 gomp_mutex_lock (&devicep
->lock
);
2803 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2804 ret
= gomp_fini_device (devicep
);
2805 gomp_mutex_unlock (&devicep
->lock
);
2807 gomp_fatal ("device finalization failed");
2811 /* This function initializes the runtime for offloading.
2812 It parses the list of offload plugins, and tries to load these.
2813 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2814 will be set, and the array DEVICES initialized, containing descriptors for
2815 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2819 gomp_target_init (void)
2821 const char *prefix
="libgomp-plugin-";
2822 const char *suffix
= SONAME_SUFFIX (1);
2823 const char *cur
, *next
;
2825 int i
, new_num_devices
;
2830 cur
= OFFLOAD_PLUGINS
;
2834 struct gomp_device_descr current_device
;
2835 size_t prefix_len
, suffix_len
, cur_len
;
2837 next
= strchr (cur
, ',');
2839 prefix_len
= strlen (prefix
);
2840 cur_len
= next
? next
- cur
: strlen (cur
);
2841 suffix_len
= strlen (suffix
);
2843 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
2850 memcpy (plugin_name
, prefix
, prefix_len
);
2851 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
2852 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
2854 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2856 new_num_devices
= current_device
.get_num_devices_func ();
2857 if (new_num_devices
>= 1)
2859 /* Augment DEVICES and NUM_DEVICES. */
2861 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2862 * sizeof (struct gomp_device_descr
));
2870 current_device
.name
= current_device
.get_name_func ();
2871 /* current_device.capabilities has already been set. */
2872 current_device
.type
= current_device
.get_type_func ();
2873 current_device
.mem_map
.root
= NULL
;
2874 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2875 current_device
.openacc
.data_environ
= NULL
;
2876 for (i
= 0; i
< new_num_devices
; i
++)
2878 current_device
.target_id
= i
;
2879 devices
[num_devices
] = current_device
;
2880 gomp_mutex_init (&devices
[num_devices
].lock
);
2891 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2892 NUM_DEVICES_OPENMP. */
2893 struct gomp_device_descr
*devices_s
2894 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2901 num_devices_openmp
= 0;
2902 for (i
= 0; i
< num_devices
; i
++)
2903 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2904 devices_s
[num_devices_openmp
++] = devices
[i
];
2905 int num_devices_after_openmp
= num_devices_openmp
;
2906 for (i
= 0; i
< num_devices
; i
++)
2907 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2908 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2910 devices
= devices_s
;
2912 for (i
= 0; i
< num_devices
; i
++)
2914 /* The 'devices' array can be moved (by the realloc call) until we have
2915 found all the plugins, so registering with the OpenACC runtime (which
2916 takes a copy of the pointer argument) must be delayed until now. */
2917 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2918 goacc_register (&devices
[i
]);
2921 if (atexit (gomp_target_fini
) != 0)
2922 gomp_fatal ("atexit failed");
2925 #else /* PLUGIN_SUPPORT */
2926 /* If dlfcn.h is unavailable we always fallback to host execution.
2927 GOMP_target* routines are just stubs for this case. */
2929 gomp_target_init (void)
2932 #endif /* PLUGIN_SUPPORT */