1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 typedef uintptr_t *hash_entry_type
;
48 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
49 static inline void htab_free (void *ptr
) { free (ptr
); }
52 static inline hashval_t
53 htab_hash (hash_entry_type element
)
55 return hash_pointer ((void *) element
);
59 htab_eq (hash_entry_type x
, hash_entry_type y
)
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock
;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr
{
79 enum offload_target_type type
;
80 const void *host_table
;
81 const void *target_data
;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr
*offload_images
;
87 /* Total number of offload images. */
88 static int num_offload_images
;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr
*devices
;
93 /* Total number of available devices. */
94 static int num_devices
;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp
;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
102 gomp_realloc_unlock (void *old
, size_t size
)
104 void *ret
= realloc (old
, size
);
107 gomp_mutex_unlock (®ister_lock
);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
113 attribute_hidden
void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp
;
126 static struct gomp_device_descr
*
127 resolve_device (int device_id
)
129 if (device_id
== GOMP_DEVICE_ICV
)
131 struct gomp_task_icv
*icv
= gomp_icv (false);
132 device_id
= icv
->default_device_var
;
135 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
137 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
139 && device_id
!= num_devices_openmp
)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
146 gomp_mutex_lock (&devices
[device_id
].lock
);
147 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
148 gomp_init_device (&devices
[device_id
]);
149 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
151 gomp_mutex_unlock (&devices
[device_id
].lock
);
153 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
159 gomp_mutex_unlock (&devices
[device_id
].lock
);
161 return &devices
[device_id
];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
168 if (key
->host_start
!= key
->host_end
)
169 return splay_tree_lookup (mem_map
, key
);
172 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
177 n
= splay_tree_lookup (mem_map
, key
);
181 return splay_tree_lookup (mem_map
, key
);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
187 if (key
->host_start
!= key
->host_end
)
188 return splay_tree_lookup (mem_map
, key
);
191 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
197 gomp_device_copy (struct gomp_device_descr
*devicep
,
198 bool (*copy_func
) (int, void *, const void *, size_t),
199 const char *dst
, void *dstaddr
,
200 const char *src
, const void *srcaddr
,
203 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
205 gomp_mutex_unlock (&devicep
->lock
);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
212 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
213 bool (*copy_func
) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue
*),
215 const char *dst
, void *dstaddr
,
216 const char *src
, const void *srcaddr
,
217 const void *srcaddr_orig
,
218 size_t size
, struct goacc_asyncqueue
*aq
)
220 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
222 gomp_mutex_unlock (&devicep
->lock
);
223 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src
, srcaddr_orig
, srcaddr_orig
+ size
,
228 src
, srcaddr
, srcaddr
+ size
,
229 dst
, dstaddr
, dstaddr
+ size
);
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src
, srcaddr
, srcaddr
+ size
,
234 dst
, dstaddr
, dstaddr
+ size
);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
252 struct target_mem_desc
*tgt
;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk
*chunks
;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
290 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
294 if (cbuf
->chunk_cnt
< 0)
296 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
298 cbuf
->chunk_cnt
= -1;
301 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
303 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf
->use_cnt
== 1)
313 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
314 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
323 gomp_to_device_kind_p (int kind
)
329 case GOMP_MAP_FORCE_ALLOC
:
330 case GOMP_MAP_FORCE_FROM
:
331 case GOMP_MAP_ALWAYS_FROM
:
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
343 attribute_hidden
void
344 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
345 struct goacc_asyncqueue
*aq
,
346 void *d
, const void *h
, size_t sz
,
347 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
349 if (__builtin_expect (aq
!= NULL
, 0))
351 /* See 'gomp_coalesce_buf_add'. */
354 void *h_buf
= (void *) h
;
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf
= gomp_malloc (sz
);
362 memcpy (h_buf
, h
, sz
);
364 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
365 "dev", d
, "host", h_buf
, h
, sz
, aq
);
367 /* Free temporary buffer once the transfer has completed. */
368 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
375 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
376 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
379 long last
= cbuf
->chunk_cnt
- 1;
380 while (first
<= last
)
382 long middle
= (first
+ last
) >> 1;
383 if (cbuf
->chunks
[middle
].end
<= doff
)
385 else if (cbuf
->chunks
[middle
].start
<= doff
)
387 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
389 gomp_mutex_unlock (&devicep
->lock
);
390 gomp_fatal ("internal libgomp cbuf error");
392 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
402 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
405 attribute_hidden
void
406 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
407 struct goacc_asyncqueue
*aq
,
408 void *h
, const void *d
, size_t sz
)
410 if (__builtin_expect (aq
!= NULL
, 0))
411 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
412 "host", h
, "dev", d
, NULL
, sz
, aq
);
414 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
418 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
420 if (!devicep
->free_func (devicep
->target_id
, devptr
))
422 gomp_mutex_unlock (&devicep
->lock
);
423 gomp_fatal ("error in freeing device memory block at %p", devptr
);
427 /* Increment reference count of a splay_tree_key region K by 1.
428 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429 increment the value if refcount is not yet contained in the set (used for
430 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431 once for each construct). */
434 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
436 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
439 uintptr_t *refcount_ptr
= &k
->refcount
;
441 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
442 refcount_ptr
= &k
->structelem_refcount
;
443 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
444 refcount_ptr
= k
->structelem_refcount_ptr
;
448 if (htab_find (*refcount_set
, refcount_ptr
))
450 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
451 *slot
= refcount_ptr
;
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460 track already seen refcounts, and only adjust the value if refcount is not
461 yet contained in the set (like gomp_increment_refcount).
463 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464 it is already zero and we know we decremented it earlier. This signals that
465 associated maps should be copied back to host.
467 *DO_REMOVE is set to true when we this is the first handling of this refcount
468 and we are setting it to zero. This signals a removal of this key from the
471 Copy and removal are separated due to cases like handling of structure
472 elements, e.g. each map of a structure element representing a possible copy
473 out of a structure field has to be handled individually, but we only signal
474 removal for one (the first encountered) sibing map. */
477 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
478 bool *do_copy
, bool *do_remove
)
480 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
482 *do_copy
= *do_remove
= false;
486 uintptr_t *refcount_ptr
= &k
->refcount
;
488 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
489 refcount_ptr
= &k
->structelem_refcount
;
490 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
491 refcount_ptr
= k
->structelem_refcount_ptr
;
493 bool new_encountered_refcount
;
494 bool set_to_zero
= false;
495 bool is_zero
= false;
497 uintptr_t orig_refcount
= *refcount_ptr
;
501 if (htab_find (*refcount_set
, refcount_ptr
))
503 new_encountered_refcount
= false;
507 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
508 *slot
= refcount_ptr
;
509 new_encountered_refcount
= true;
512 /* If no refcount_set being used, assume all keys are being decremented
513 for the first time. */
514 new_encountered_refcount
= true;
518 else if (*refcount_ptr
> 0)
522 if (*refcount_ptr
== 0)
524 if (orig_refcount
> 0)
530 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
531 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535 gomp_map_0len_lookup found oldn for newn.
536 Helper function of gomp_map_vars. */
539 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
540 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
541 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
542 unsigned char kind
, bool always_to_flag
,
543 struct gomp_coalesce_buf
*cbuf
,
544 htab_t
*refcount_set
)
546 assert (kind
!= GOMP_MAP_ATTACH
);
549 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
550 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
551 tgt_var
->is_attach
= false;
552 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
553 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
555 if ((kind
& GOMP_MAP_FLAG_FORCE
)
556 || oldn
->host_start
> newn
->host_start
557 || oldn
->host_end
< newn
->host_end
)
559 gomp_mutex_unlock (&devicep
->lock
);
560 gomp_fatal ("Trying to map into device [%p..%p) object when "
561 "[%p..%p) is already mapped",
562 (void *) newn
->host_start
, (void *) newn
->host_end
,
563 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
566 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
567 gomp_copy_host2dev (devicep
, aq
,
568 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
569 + newn
->host_start
- oldn
->host_start
),
570 (void *) newn
->host_start
,
571 newn
->host_end
- newn
->host_start
, false, cbuf
);
573 gomp_increment_refcount (oldn
, refcount_set
);
577 get_kind (bool short_mapkind
, void *kinds
, int idx
)
579 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
580 : ((unsigned char *) kinds
)[idx
];
584 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
585 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
586 struct gomp_coalesce_buf
*cbuf
)
588 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
589 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
590 struct splay_tree_key_s cur_node
;
592 cur_node
.host_start
= host_ptr
;
593 if (cur_node
.host_start
== (uintptr_t) NULL
)
595 cur_node
.tgt_offset
= (uintptr_t) NULL
;
596 gomp_copy_host2dev (devicep
, aq
,
597 (void *) (tgt
->tgt_start
+ target_offset
),
598 (void *) &cur_node
.tgt_offset
, sizeof (void *),
602 /* Add bias to the pointer value. */
603 cur_node
.host_start
+= bias
;
604 cur_node
.host_end
= cur_node
.host_start
;
605 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
608 gomp_mutex_unlock (&devicep
->lock
);
609 gomp_fatal ("Pointer target of array section wasn't mapped");
611 cur_node
.host_start
-= n
->host_start
;
613 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
614 /* At this point tgt_offset is target address of the
615 array section. Now subtract bias to get what we want
616 to initialize the pointer with. */
617 cur_node
.tgt_offset
-= bias
;
618 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
619 (void *) &cur_node
.tgt_offset
, sizeof (void *),
624 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
625 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
626 size_t first
, size_t i
, void **hostaddrs
,
627 size_t *sizes
, void *kinds
,
628 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
630 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
631 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
632 struct splay_tree_key_s cur_node
;
634 const bool short_mapkind
= true;
635 const int typemask
= short_mapkind
? 0xff : 0x7;
637 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
638 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
639 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
640 kind
= get_kind (short_mapkind
, kinds
, i
);
643 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
645 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
646 kind
& typemask
, false, cbuf
, refcount_set
);
651 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
653 cur_node
.host_start
--;
654 n2
= splay_tree_lookup (mem_map
, &cur_node
);
655 cur_node
.host_start
++;
658 && n2
->host_start
- n
->host_start
659 == n2
->tgt_offset
- n
->tgt_offset
)
661 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
662 kind
& typemask
, false, cbuf
, refcount_set
);
667 n2
= splay_tree_lookup (mem_map
, &cur_node
);
671 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
673 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
674 kind
& typemask
, false, cbuf
, refcount_set
);
678 gomp_mutex_unlock (&devicep
->lock
);
679 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
680 "other mapped elements from the same structure weren't mapped "
681 "together with it", (void *) cur_node
.host_start
,
682 (void *) cur_node
.host_end
);
685 attribute_hidden
void
686 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
687 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
688 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
689 struct gomp_coalesce_buf
*cbufp
)
691 struct splay_tree_key_s s
;
696 gomp_mutex_unlock (&devicep
->lock
);
697 gomp_fatal ("enclosing struct not mapped for attach");
700 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
701 /* We might have a pointer in a packed struct: however we cannot have more
702 than one such pointer in each pointer-sized portion of the struct, so
704 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
707 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
709 if (!n
->aux
->attach_count
)
711 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
713 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
714 n
->aux
->attach_count
[idx
]++;
717 gomp_mutex_unlock (&devicep
->lock
);
718 gomp_fatal ("attach count overflow");
721 if (n
->aux
->attach_count
[idx
] == 1)
723 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
725 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
729 if ((void *) target
== NULL
)
731 gomp_mutex_unlock (&devicep
->lock
);
732 gomp_fatal ("attempt to attach null pointer");
735 s
.host_start
= target
+ bias
;
736 s
.host_end
= s
.host_start
+ 1;
737 tn
= splay_tree_lookup (mem_map
, &s
);
741 gomp_mutex_unlock (&devicep
->lock
);
742 gomp_fatal ("pointer target not mapped for attach");
745 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
748 "%s: attaching host %p, target %p (struct base %p) to %p\n",
749 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
750 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
752 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
753 sizeof (void *), true, cbufp
);
756 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
757 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
760 attribute_hidden
void
761 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
762 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
763 uintptr_t detach_from
, bool finalize
,
764 struct gomp_coalesce_buf
*cbufp
)
770 gomp_mutex_unlock (&devicep
->lock
);
771 gomp_fatal ("enclosing struct not mapped for detach");
774 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
776 if (!n
->aux
|| !n
->aux
->attach_count
)
778 gomp_mutex_unlock (&devicep
->lock
);
779 gomp_fatal ("no attachment counters for struct");
783 n
->aux
->attach_count
[idx
] = 1;
785 if (n
->aux
->attach_count
[idx
] == 0)
787 gomp_mutex_unlock (&devicep
->lock
);
788 gomp_fatal ("attach count underflow");
791 n
->aux
->attach_count
[idx
]--;
793 if (n
->aux
->attach_count
[idx
] == 0)
795 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
797 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
800 "%s: detaching host %p, target %p (struct base %p) to %p\n",
801 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
802 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
805 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
806 sizeof (void *), true, cbufp
);
809 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
810 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
813 attribute_hidden
uintptr_t
814 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
816 if (tgt
->list
[i
].key
!= NULL
)
817 return tgt
->list
[i
].key
->tgt
->tgt_start
818 + tgt
->list
[i
].key
->tgt_offset
819 + tgt
->list
[i
].offset
;
821 switch (tgt
->list
[i
].offset
)
824 return (uintptr_t) hostaddrs
[i
];
830 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
831 + tgt
->list
[i
+ 1].key
->tgt_offset
832 + tgt
->list
[i
+ 1].offset
833 + (uintptr_t) hostaddrs
[i
]
834 - (uintptr_t) hostaddrs
[i
+ 1];
837 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
841 static inline __attribute__((always_inline
)) struct target_mem_desc
*
842 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
843 struct goacc_asyncqueue
*aq
, size_t mapnum
,
844 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
845 void *kinds
, bool short_mapkind
,
846 htab_t
*refcount_set
,
847 enum gomp_map_vars_kind pragma_kind
)
849 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
850 bool has_firstprivate
= false;
851 bool has_always_ptrset
= false;
852 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
853 const int rshift
= short_mapkind
? 8 : 3;
854 const int typemask
= short_mapkind
? 0xff : 0x7;
855 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
856 struct splay_tree_key_s cur_node
;
857 struct target_mem_desc
*tgt
858 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
859 tgt
->list_count
= mapnum
;
860 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
861 tgt
->device_descr
= devicep
;
863 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
872 tgt_align
= sizeof (void *);
878 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
880 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
881 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
884 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
886 size_t align
= 4 * sizeof (void *);
888 tgt_size
= mapnum
* sizeof (void *);
890 cbuf
.use_cnt
= 1 + (mapnum
> 1);
891 cbuf
.chunks
[0].start
= 0;
892 cbuf
.chunks
[0].end
= tgt_size
;
895 gomp_mutex_lock (&devicep
->lock
);
896 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
898 gomp_mutex_unlock (&devicep
->lock
);
903 for (i
= 0; i
< mapnum
; i
++)
905 int kind
= get_kind (short_mapkind
, kinds
, i
);
906 if (hostaddrs
[i
] == NULL
907 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
909 tgt
->list
[i
].key
= NULL
;
910 tgt
->list
[i
].offset
= OFFSET_INLINED
;
913 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
914 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
916 tgt
->list
[i
].key
= NULL
;
919 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
920 on a separate construct prior to using use_device_{addr,ptr}.
921 In OpenMP 5.0, map directives need to be ordered by the
922 middle-end before the use_device_* clauses. If
923 !not_found_cnt, all mappings requested (if any) are already
924 mapped, so use_device_{addr,ptr} can be resolved right away.
925 Otherwise, if not_found_cnt, gomp_map_lookup might fail
926 now but would succeed after performing the mappings in the
927 following loop. We can't defer this always to the second
928 loop, because it is not even invoked when !not_found_cnt
929 after the first loop. */
930 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
931 cur_node
.host_end
= cur_node
.host_start
;
932 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
935 cur_node
.host_start
-= n
->host_start
;
937 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
938 + cur_node
.host_start
);
940 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
942 gomp_mutex_unlock (&devicep
->lock
);
943 gomp_fatal ("use_device_ptr pointer wasn't mapped");
945 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
946 /* If not present, continue using the host address. */
949 __builtin_unreachable ();
950 tgt
->list
[i
].offset
= OFFSET_INLINED
;
953 tgt
->list
[i
].offset
= 0;
956 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
958 size_t first
= i
+ 1;
959 size_t last
= i
+ sizes
[i
];
960 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
961 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
963 tgt
->list
[i
].key
= NULL
;
964 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
965 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
968 size_t align
= (size_t) 1 << (kind
>> rshift
);
969 if (tgt_align
< align
)
971 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
972 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
973 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
974 not_found_cnt
+= last
- i
;
975 for (i
= first
; i
<= last
; i
++)
977 tgt
->list
[i
].key
= NULL
;
979 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
981 gomp_coalesce_buf_add (&cbuf
,
982 tgt_size
- cur_node
.host_end
983 + (uintptr_t) hostaddrs
[i
],
989 for (i
= first
; i
<= last
; i
++)
990 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
991 sizes
, kinds
, NULL
, refcount_set
);
995 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
997 tgt
->list
[i
].key
= NULL
;
998 tgt
->list
[i
].offset
= OFFSET_POINTER
;
999 has_firstprivate
= true;
1002 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
1004 tgt
->list
[i
].key
= NULL
;
1005 has_firstprivate
= true;
1008 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1009 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1010 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1012 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1013 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1015 tgt
->list
[i
].key
= NULL
;
1017 size_t align
= (size_t) 1 << (kind
>> rshift
);
1018 if (tgt_align
< align
)
1020 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1022 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1023 cur_node
.host_end
- cur_node
.host_start
);
1024 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1025 has_firstprivate
= true;
1029 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1031 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1034 tgt
->list
[i
].key
= NULL
;
1035 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1040 n
= splay_tree_lookup (mem_map
, &cur_node
);
1041 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1043 int always_to_cnt
= 0;
1044 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1046 bool has_nullptr
= false;
1048 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1049 if (n
->tgt
->list
[j
].key
== n
)
1051 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1054 if (n
->tgt
->list_count
== 0)
1056 /* 'declare target'; assume has_nullptr; it could also be
1057 statically assigned pointer, but that it should be to
1058 the equivalent variable on the host. */
1059 assert (n
->refcount
== REFCOUNT_INFINITY
);
1063 assert (j
< n
->tgt
->list_count
);
1064 /* Re-map the data if there is an 'always' modifier or if it a
1065 null pointer was there and non a nonnull has been found; that
1066 permits transparent re-mapping for Fortran array descriptors
1067 which were previously mapped unallocated. */
1068 for (j
= i
+ 1; j
< mapnum
; j
++)
1070 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1071 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1073 || !GOMP_MAP_POINTER_P (ptr_kind
)
1074 || *(void **) hostaddrs
[j
] == NULL
))
1076 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1077 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1078 > cur_node
.host_end
))
1082 has_always_ptrset
= true;
1087 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1088 kind
& typemask
, always_to_cnt
> 0, NULL
,
1094 tgt
->list
[i
].key
= NULL
;
1096 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1098 /* Not present, hence, skip entry - including its MAP_POINTER,
1100 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1102 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1103 == GOMP_MAP_POINTER
))
1106 tgt
->list
[i
].key
= NULL
;
1107 tgt
->list
[i
].offset
= 0;
1111 size_t align
= (size_t) 1 << (kind
>> rshift
);
1113 if (tgt_align
< align
)
1115 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1117 && gomp_to_device_kind_p (kind
& typemask
))
1118 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1119 cur_node
.host_end
- cur_node
.host_start
);
1120 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1121 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1125 for (j
= i
+ 1; j
< mapnum
; j
++)
1126 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1127 kinds
, j
)) & typemask
))
1128 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1130 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1131 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1132 > cur_node
.host_end
))
1136 tgt
->list
[j
].key
= NULL
;
1147 gomp_mutex_unlock (&devicep
->lock
);
1148 gomp_fatal ("unexpected aggregation");
1150 tgt
->to_free
= devaddrs
[0];
1151 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1152 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1154 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1156 /* Allocate tgt_align aligned tgt_size block of memory. */
1157 /* FIXME: Perhaps change interface to allocate properly aligned
1159 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1160 tgt_size
+ tgt_align
- 1);
1163 gomp_mutex_unlock (&devicep
->lock
);
1164 gomp_fatal ("device memory allocation fail");
1167 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1168 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1169 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1171 if (cbuf
.use_cnt
== 1)
1173 if (cbuf
.chunk_cnt
> 0)
1176 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1186 tgt
->to_free
= NULL
;
1192 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1193 tgt_size
= mapnum
* sizeof (void *);
1196 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1199 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1200 splay_tree_node array
= tgt
->array
;
1201 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1202 uintptr_t field_tgt_base
= 0;
1203 splay_tree_key field_tgt_structelem_first
= NULL
;
1205 for (i
= 0; i
< mapnum
; i
++)
1206 if (has_always_ptrset
1208 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1209 == GOMP_MAP_TO_PSET
)
1211 splay_tree_key k
= tgt
->list
[i
].key
;
1212 bool has_nullptr
= false;
1214 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1215 if (k
->tgt
->list
[j
].key
== k
)
1217 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1220 if (k
->tgt
->list_count
== 0)
1223 assert (j
< k
->tgt
->list_count
);
1225 tgt
->list
[i
].has_null_ptr_assoc
= false;
1226 for (j
= i
+ 1; j
< mapnum
; j
++)
1228 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1229 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1231 || !GOMP_MAP_POINTER_P (ptr_kind
)
1232 || *(void **) hostaddrs
[j
] == NULL
))
1234 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1235 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1240 if (*(void **) hostaddrs
[j
] == NULL
)
1241 tgt
->list
[i
].has_null_ptr_assoc
= true;
1242 tgt
->list
[j
].key
= k
;
1243 tgt
->list
[j
].copy_from
= false;
1244 tgt
->list
[j
].always_copy_from
= false;
1245 tgt
->list
[j
].is_attach
= false;
1246 gomp_increment_refcount (k
, refcount_set
);
1247 gomp_map_pointer (k
->tgt
, aq
,
1248 (uintptr_t) *(void **) hostaddrs
[j
],
1249 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1256 else if (tgt
->list
[i
].key
== NULL
)
1258 int kind
= get_kind (short_mapkind
, kinds
, i
);
1259 if (hostaddrs
[i
] == NULL
)
1261 switch (kind
& typemask
)
1263 size_t align
, len
, first
, last
;
1265 case GOMP_MAP_FIRSTPRIVATE
:
1266 align
= (size_t) 1 << (kind
>> rshift
);
1267 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1268 tgt
->list
[i
].offset
= tgt_size
;
1270 gomp_copy_host2dev (devicep
, aq
,
1271 (void *) (tgt
->tgt_start
+ tgt_size
),
1272 (void *) hostaddrs
[i
], len
, false, cbufp
);
1275 case GOMP_MAP_FIRSTPRIVATE_INT
:
1276 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1278 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1279 /* The OpenACC 'host_data' construct only allows 'use_device'
1280 "mapping" clauses, so in the first loop, 'not_found_cnt'
1281 must always have been zero, so all OpenACC 'use_device'
1282 clauses have already been handled. (We can only easily test
1283 'use_device' with 'if_present' clause here.) */
1284 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1285 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1286 code conceptually simple, similar to the first loop. */
1287 case GOMP_MAP_USE_DEVICE_PTR
:
1288 if (tgt
->list
[i
].offset
== 0)
1290 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1291 cur_node
.host_end
= cur_node
.host_start
;
1292 n
= gomp_map_lookup (mem_map
, &cur_node
);
1295 cur_node
.host_start
-= n
->host_start
;
1297 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1298 + cur_node
.host_start
);
1300 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1302 gomp_mutex_unlock (&devicep
->lock
);
1303 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1305 else if ((kind
& typemask
)
1306 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1307 /* If not present, continue using the host address. */
1310 __builtin_unreachable ();
1311 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1314 case GOMP_MAP_STRUCT
:
1316 last
= i
+ sizes
[i
];
1317 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1318 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1320 if (tgt
->list
[first
].key
!= NULL
)
1322 n
= splay_tree_lookup (mem_map
, &cur_node
);
1325 size_t align
= (size_t) 1 << (kind
>> rshift
);
1326 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1327 - (uintptr_t) hostaddrs
[i
];
1328 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1329 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1330 - (uintptr_t) hostaddrs
[i
];
1331 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1332 field_tgt_offset
= tgt_size
;
1333 field_tgt_clear
= last
;
1334 field_tgt_structelem_first
= NULL
;
1335 tgt_size
+= cur_node
.host_end
1336 - (uintptr_t) hostaddrs
[first
];
1339 for (i
= first
; i
<= last
; i
++)
1340 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1341 sizes
, kinds
, cbufp
, refcount_set
);
1344 case GOMP_MAP_ALWAYS_POINTER
:
1345 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1346 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1347 n
= splay_tree_lookup (mem_map
, &cur_node
);
1349 || n
->host_start
> cur_node
.host_start
1350 || n
->host_end
< cur_node
.host_end
)
1352 gomp_mutex_unlock (&devicep
->lock
);
1353 gomp_fatal ("always pointer not mapped");
1355 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1356 != GOMP_MAP_ALWAYS_POINTER
)
1357 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1358 if (cur_node
.tgt_offset
)
1359 cur_node
.tgt_offset
-= sizes
[i
];
1360 gomp_copy_host2dev (devicep
, aq
,
1361 (void *) (n
->tgt
->tgt_start
1363 + cur_node
.host_start
1365 (void *) &cur_node
.tgt_offset
,
1366 sizeof (void *), true, cbufp
);
1367 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1368 + cur_node
.host_start
- n
->host_start
;
1370 case GOMP_MAP_IF_PRESENT
:
1371 /* Not present - otherwise handled above. Skip over its
1372 MAP_POINTER as well. */
1374 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1375 == GOMP_MAP_POINTER
))
1378 case GOMP_MAP_ATTACH
:
1380 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1381 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1382 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1385 tgt
->list
[i
].key
= n
;
1386 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1387 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1388 tgt
->list
[i
].copy_from
= false;
1389 tgt
->list
[i
].always_copy_from
= false;
1390 tgt
->list
[i
].is_attach
= true;
1391 /* OpenACC 'attach'/'detach' doesn't affect
1392 structured/dynamic reference counts ('n->refcount',
1393 'n->dynamic_refcount'). */
1395 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1396 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1399 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1401 gomp_mutex_unlock (&devicep
->lock
);
1402 gomp_fatal ("outer struct not mapped for attach");
1409 splay_tree_key k
= &array
->key
;
1410 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1411 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1412 k
->host_end
= k
->host_start
+ sizes
[i
];
1414 k
->host_end
= k
->host_start
+ sizeof (void *);
1415 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1416 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1417 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1418 kind
& typemask
, false, cbufp
,
1423 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1425 /* Replace target address of the pointer with target address
1426 of mapped object in the splay tree. */
1427 splay_tree_remove (mem_map
, n
);
1429 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1430 k
->aux
->link_key
= n
;
1432 size_t align
= (size_t) 1 << (kind
>> rshift
);
1433 tgt
->list
[i
].key
= k
;
1436 k
->dynamic_refcount
= 0;
1437 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1439 k
->tgt_offset
= k
->host_start
- field_tgt_base
1443 k
->refcount
= REFCOUNT_STRUCTELEM
;
1444 if (field_tgt_structelem_first
== NULL
)
1446 /* Set to first structure element of sequence. */
1447 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1448 field_tgt_structelem_first
= k
;
1451 /* Point to refcount of leading element, but do not
1453 k
->structelem_refcount_ptr
1454 = &field_tgt_structelem_first
->structelem_refcount
;
1456 if (i
== field_tgt_clear
)
1458 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1459 field_tgt_structelem_first
= NULL
;
1462 if (i
== field_tgt_clear
)
1463 field_tgt_clear
= FIELD_TGT_EMPTY
;
1467 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1468 k
->tgt_offset
= tgt_size
;
1469 tgt_size
+= k
->host_end
- k
->host_start
;
1471 /* First increment, from 0 to 1. gomp_increment_refcount
1472 encapsulates the different increment cases, so use this
1473 instead of directly setting 1 during initialization. */
1474 gomp_increment_refcount (k
, refcount_set
);
1476 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1477 tgt
->list
[i
].always_copy_from
1478 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1479 tgt
->list
[i
].is_attach
= false;
1480 tgt
->list
[i
].offset
= 0;
1481 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1484 array
->right
= NULL
;
1485 splay_tree_insert (mem_map
, array
);
1486 switch (kind
& typemask
)
1488 case GOMP_MAP_ALLOC
:
1490 case GOMP_MAP_FORCE_ALLOC
:
1491 case GOMP_MAP_FORCE_FROM
:
1492 case GOMP_MAP_ALWAYS_FROM
:
1495 case GOMP_MAP_TOFROM
:
1496 case GOMP_MAP_FORCE_TO
:
1497 case GOMP_MAP_FORCE_TOFROM
:
1498 case GOMP_MAP_ALWAYS_TO
:
1499 case GOMP_MAP_ALWAYS_TOFROM
:
1500 gomp_copy_host2dev (devicep
, aq
,
1501 (void *) (tgt
->tgt_start
1503 (void *) k
->host_start
,
1504 k
->host_end
- k
->host_start
,
1507 case GOMP_MAP_POINTER
:
1508 gomp_map_pointer (tgt
, aq
,
1509 (uintptr_t) *(void **) k
->host_start
,
1510 k
->tgt_offset
, sizes
[i
], cbufp
);
1512 case GOMP_MAP_TO_PSET
:
1513 gomp_copy_host2dev (devicep
, aq
,
1514 (void *) (tgt
->tgt_start
1516 (void *) k
->host_start
,
1517 k
->host_end
- k
->host_start
,
1519 tgt
->list
[i
].has_null_ptr_assoc
= false;
1521 for (j
= i
+ 1; j
< mapnum
; j
++)
1523 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1525 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1526 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1528 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1529 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1534 tgt
->list
[j
].key
= k
;
1535 tgt
->list
[j
].copy_from
= false;
1536 tgt
->list
[j
].always_copy_from
= false;
1537 tgt
->list
[j
].is_attach
= false;
1538 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1539 /* For OpenMP, the use of refcount_sets causes
1540 errors if we set k->refcount = 1 above but also
1541 increment it again here, for decrementing will
1542 not properly match, since we decrement only once
1543 for each key's refcount. Therefore avoid this
1544 increment for OpenMP constructs. */
1546 gomp_increment_refcount (k
, refcount_set
);
1547 gomp_map_pointer (tgt
, aq
,
1548 (uintptr_t) *(void **) hostaddrs
[j
],
1550 + ((uintptr_t) hostaddrs
[j
]
1557 case GOMP_MAP_FORCE_PRESENT
:
1559 /* We already looked up the memory region above and it
1561 size_t size
= k
->host_end
- k
->host_start
;
1562 gomp_mutex_unlock (&devicep
->lock
);
1563 #ifdef HAVE_INTTYPES_H
1564 gomp_fatal ("present clause: !acc_is_present (%p, "
1565 "%"PRIu64
" (0x%"PRIx64
"))",
1566 (void *) k
->host_start
,
1567 (uint64_t) size
, (uint64_t) size
);
1569 gomp_fatal ("present clause: !acc_is_present (%p, "
1570 "%lu (0x%lx))", (void *) k
->host_start
,
1571 (unsigned long) size
, (unsigned long) size
);
1575 case GOMP_MAP_FORCE_DEVICEPTR
:
1576 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1577 gomp_copy_host2dev (devicep
, aq
,
1578 (void *) (tgt
->tgt_start
1580 (void *) k
->host_start
,
1581 sizeof (void *), false, cbufp
);
1584 gomp_mutex_unlock (&devicep
->lock
);
1585 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1589 if (k
->aux
&& k
->aux
->link_key
)
1591 /* Set link pointer on target to the device address of the
1593 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1594 /* We intentionally do not use coalescing here, as it's not
1595 data allocated by the current call to this function. */
1596 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1597 &tgt_addr
, sizeof (void *), true, NULL
);
1604 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1606 for (i
= 0; i
< mapnum
; i
++)
1608 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1609 gomp_copy_host2dev (devicep
, aq
,
1610 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1611 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1618 /* See 'gomp_coalesce_buf_add'. */
1622 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1623 gomp_copy_host2dev (devicep
, aq
,
1624 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1625 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1626 - cbuf
.chunks
[0].start
),
1627 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1634 /* If the variable from "omp target enter data" map-list was already mapped,
1635 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1637 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1643 gomp_mutex_unlock (&devicep
->lock
);
1647 static struct target_mem_desc
*
1648 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1649 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1650 bool short_mapkind
, htab_t
*refcount_set
,
1651 enum gomp_map_vars_kind pragma_kind
)
1653 /* This management of a local refcount_set is for convenience of callers
1654 who do not share a refcount_set over multiple map/unmap uses. */
1655 htab_t local_refcount_set
= NULL
;
1656 if (refcount_set
== NULL
)
1658 local_refcount_set
= htab_create (mapnum
);
1659 refcount_set
= &local_refcount_set
;
1662 struct target_mem_desc
*tgt
;
1663 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1664 sizes
, kinds
, short_mapkind
, refcount_set
,
1666 if (local_refcount_set
)
1667 htab_free (local_refcount_set
);
1672 attribute_hidden
struct target_mem_desc
*
1673 goacc_map_vars (struct gomp_device_descr
*devicep
,
1674 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1675 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1676 void *kinds
, bool short_mapkind
,
1677 enum gomp_map_vars_kind pragma_kind
)
1679 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1680 sizes
, kinds
, short_mapkind
, NULL
,
1681 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1685 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1687 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1689 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1696 gomp_unref_tgt (void *ptr
)
1698 bool is_tgt_unmapped
= false;
1700 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1702 if (tgt
->refcount
> 1)
1706 gomp_unmap_tgt (tgt
);
1707 is_tgt_unmapped
= true;
1710 return is_tgt_unmapped
;
1714 gomp_unref_tgt_void (void *ptr
)
1716 (void) gomp_unref_tgt (ptr
);
1720 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1722 splay_tree_remove (sp
, k
);
1725 if (k
->aux
->link_key
)
1726 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1727 if (k
->aux
->attach_count
)
1728 free (k
->aux
->attach_count
);
1734 static inline __attribute__((always_inline
)) bool
1735 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1736 struct goacc_asyncqueue
*aq
)
1738 bool is_tgt_unmapped
= false;
1740 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1742 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1743 /* Infer the splay_tree_key of the first structelem key using the
1744 pointer to the first structleme_refcount. */
1745 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1746 - offsetof (struct splay_tree_key_s
,
1747 structelem_refcount
));
1748 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1750 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1751 with the splay_tree_keys embedded inside. */
1752 splay_tree_node node
=
1753 (splay_tree_node
) ((char *) k
1754 - offsetof (struct splay_tree_node_s
, key
));
1757 /* Starting from the _FIRST key, and continue for all following
1759 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1760 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1767 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1770 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1773 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1774 return is_tgt_unmapped
;
1777 attribute_hidden
bool
1778 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1780 return gomp_remove_var_internal (devicep
, k
, NULL
);
1783 /* Remove a variable asynchronously. This actually removes the variable
1784 mapping immediately, but retains the linked target_mem_desc until the
1785 asynchronous operation has completed (as it may still refer to target
1786 memory). The device lock must be held before entry, and remains locked on
1789 attribute_hidden
void
1790 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1791 struct goacc_asyncqueue
*aq
)
1793 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1796 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1797 variables back from device to host: if it is false, it is assumed that this
1798 has been done already. */
1800 static inline __attribute__((always_inline
)) void
1801 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1802 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1804 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1806 if (tgt
->list_count
== 0)
1812 gomp_mutex_lock (&devicep
->lock
);
1813 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1815 gomp_mutex_unlock (&devicep
->lock
);
1823 /* We must perform detachments before any copies back to the host. */
1824 for (i
= 0; i
< tgt
->list_count
; i
++)
1826 splay_tree_key k
= tgt
->list
[i
].key
;
1828 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1829 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1830 + tgt
->list
[i
].offset
,
1834 for (i
= 0; i
< tgt
->list_count
; i
++)
1836 splay_tree_key k
= tgt
->list
[i
].key
;
1840 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1841 counts ('n->refcount', 'n->dynamic_refcount'). */
1842 if (tgt
->list
[i
].is_attach
)
1845 bool do_copy
, do_remove
;
1846 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1848 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1849 || tgt
->list
[i
].always_copy_from
)
1850 gomp_copy_dev2host (devicep
, aq
,
1851 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1852 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1853 + tgt
->list
[i
].offset
),
1854 tgt
->list
[i
].length
);
1857 struct target_mem_desc
*k_tgt
= k
->tgt
;
1858 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1859 /* It would be bad if TGT got unmapped while we're still iterating
1860 over its LIST_COUNT, and also expect to use it in the following
1862 assert (!is_tgt_unmapped
1868 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1871 gomp_unref_tgt ((void *) tgt
);
1873 gomp_mutex_unlock (&devicep
->lock
);
1877 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1878 htab_t
*refcount_set
)
1880 /* This management of a local refcount_set is for convenience of callers
1881 who do not share a refcount_set over multiple map/unmap uses. */
1882 htab_t local_refcount_set
= NULL
;
1883 if (refcount_set
== NULL
)
1885 local_refcount_set
= htab_create (tgt
->list_count
);
1886 refcount_set
= &local_refcount_set
;
1889 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
1891 if (local_refcount_set
)
1892 htab_free (local_refcount_set
);
1895 attribute_hidden
void
1896 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1897 struct goacc_asyncqueue
*aq
)
1899 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
1903 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1904 size_t *sizes
, void *kinds
, bool short_mapkind
)
1907 struct splay_tree_key_s cur_node
;
1908 const int typemask
= short_mapkind
? 0xff : 0x7;
1916 gomp_mutex_lock (&devicep
->lock
);
1917 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1919 gomp_mutex_unlock (&devicep
->lock
);
1923 for (i
= 0; i
< mapnum
; i
++)
1926 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1927 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1928 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1931 int kind
= get_kind (short_mapkind
, kinds
, i
);
1932 if (n
->host_start
> cur_node
.host_start
1933 || n
->host_end
< cur_node
.host_end
)
1935 gomp_mutex_unlock (&devicep
->lock
);
1936 gomp_fatal ("Trying to update [%p..%p) object when "
1937 "only [%p..%p) is mapped",
1938 (void *) cur_node
.host_start
,
1939 (void *) cur_node
.host_end
,
1940 (void *) n
->host_start
,
1941 (void *) n
->host_end
);
1945 void *hostaddr
= (void *) cur_node
.host_start
;
1946 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1947 + cur_node
.host_start
- n
->host_start
);
1948 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1950 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1951 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1953 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1954 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1957 gomp_mutex_unlock (&devicep
->lock
);
1960 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1961 And insert to splay tree the mapping between addresses from HOST_TABLE and
1962 from loaded target image. We rely in the host and device compiler
1963 emitting variable and functions in the same order. */
1966 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1967 const void *host_table
, const void *target_data
,
1968 bool is_register_lock
)
1970 void **host_func_table
= ((void ***) host_table
)[0];
1971 void **host_funcs_end
= ((void ***) host_table
)[1];
1972 void **host_var_table
= ((void ***) host_table
)[2];
1973 void **host_vars_end
= ((void ***) host_table
)[3];
1975 /* The func table contains only addresses, the var table contains addresses
1976 and corresponding sizes. */
1977 int num_funcs
= host_funcs_end
- host_func_table
;
1978 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1980 /* Others currently is only 'device_num' */
1983 /* Load image to device and get target addresses for the image. */
1984 struct addr_pair
*target_table
= NULL
;
1985 int i
, num_target_entries
;
1988 = devicep
->load_image_func (devicep
->target_id
, version
,
1989 target_data
, &target_table
);
1991 if (num_target_entries
!= num_funcs
+ num_vars
1992 /* Others (device_num) are included as trailing entries in pair list. */
1993 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
1995 gomp_mutex_unlock (&devicep
->lock
);
1996 if (is_register_lock
)
1997 gomp_mutex_unlock (®ister_lock
);
1998 gomp_fatal ("Cannot map target functions or variables"
1999 " (expected %u, have %u)", num_funcs
+ num_vars
,
2000 num_target_entries
);
2003 /* Insert host-target address mapping into splay tree. */
2004 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2005 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2006 tgt
->refcount
= REFCOUNT_INFINITY
;
2009 tgt
->to_free
= NULL
;
2011 tgt
->list_count
= 0;
2012 tgt
->device_descr
= devicep
;
2013 splay_tree_node array
= tgt
->array
;
2015 for (i
= 0; i
< num_funcs
; i
++)
2017 splay_tree_key k
= &array
->key
;
2018 k
->host_start
= (uintptr_t) host_func_table
[i
];
2019 k
->host_end
= k
->host_start
+ 1;
2021 k
->tgt_offset
= target_table
[i
].start
;
2022 k
->refcount
= REFCOUNT_INFINITY
;
2023 k
->dynamic_refcount
= 0;
2026 array
->right
= NULL
;
2027 splay_tree_insert (&devicep
->mem_map
, array
);
2031 /* Most significant bit of the size in host and target tables marks
2032 "omp declare target link" variables. */
2033 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2034 const uintptr_t size_mask
= ~link_bit
;
2036 for (i
= 0; i
< num_vars
; i
++)
2038 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2039 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2040 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2042 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2044 gomp_mutex_unlock (&devicep
->lock
);
2045 if (is_register_lock
)
2046 gomp_mutex_unlock (®ister_lock
);
2047 gomp_fatal ("Cannot map target variables (size mismatch)");
2050 splay_tree_key k
= &array
->key
;
2051 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2053 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2055 k
->tgt_offset
= target_var
->start
;
2056 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2057 k
->dynamic_refcount
= 0;
2060 array
->right
= NULL
;
2061 splay_tree_insert (&devicep
->mem_map
, array
);
2065 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2066 where plugin does not return this entry. */
2067 if (num_funcs
+ num_vars
< num_target_entries
)
2069 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2070 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2071 was found in this image. */
2072 if (device_num_var
->start
!= 0)
2074 /* The index of the devicep within devices[] is regarded as its
2075 'device number', which is different from the per-device type
2076 devicep->target_id. */
2077 int device_num_val
= (int) (devicep
- &devices
[0]);
2078 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2080 gomp_mutex_unlock (&devicep
->lock
);
2081 if (is_register_lock
)
2082 gomp_mutex_unlock (®ister_lock
);
2083 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2087 /* Copy device_num value to place on device memory, hereby actually
2088 designating its device number into effect. */
2089 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2090 &device_num_val
, sizeof (int), false, NULL
);
2094 free (target_table
);
2097 /* Unload the mappings described by target_data from device DEVICE_P.
2098 The device must be locked. */
2101 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2103 const void *host_table
, const void *target_data
)
2105 void **host_func_table
= ((void ***) host_table
)[0];
2106 void **host_funcs_end
= ((void ***) host_table
)[1];
2107 void **host_var_table
= ((void ***) host_table
)[2];
2108 void **host_vars_end
= ((void ***) host_table
)[3];
2110 /* The func table contains only addresses, the var table contains addresses
2111 and corresponding sizes. */
2112 int num_funcs
= host_funcs_end
- host_func_table
;
2113 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2115 struct splay_tree_key_s k
;
2116 splay_tree_key node
= NULL
;
2118 /* Find mapping at start of node array */
2119 if (num_funcs
|| num_vars
)
2121 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2122 : (uintptr_t) host_var_table
[0]);
2123 k
.host_end
= k
.host_start
+ 1;
2124 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2127 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2129 gomp_mutex_unlock (&devicep
->lock
);
2130 gomp_fatal ("image unload fail");
2133 /* Remove mappings from splay tree. */
2135 for (i
= 0; i
< num_funcs
; i
++)
2137 k
.host_start
= (uintptr_t) host_func_table
[i
];
2138 k
.host_end
= k
.host_start
+ 1;
2139 splay_tree_remove (&devicep
->mem_map
, &k
);
2142 /* Most significant bit of the size in host and target tables marks
2143 "omp declare target link" variables. */
2144 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2145 const uintptr_t size_mask
= ~link_bit
;
2146 bool is_tgt_unmapped
= false;
2148 for (i
= 0; i
< num_vars
; i
++)
2150 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2152 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2154 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2155 splay_tree_remove (&devicep
->mem_map
, &k
);
2158 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2159 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2163 if (node
&& !is_tgt_unmapped
)
2170 /* This function should be called from every offload image while loading.
2171 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2172 the target, and TARGET_DATA needed by target plugin. */
2175 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2176 int target_type
, const void *target_data
)
2180 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2181 gomp_fatal ("Library too old for offload (version %u < %u)",
2182 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2184 gomp_mutex_lock (®ister_lock
);
2186 /* Load image to all initialized devices. */
2187 for (i
= 0; i
< num_devices
; i
++)
2189 struct gomp_device_descr
*devicep
= &devices
[i
];
2190 gomp_mutex_lock (&devicep
->lock
);
2191 if (devicep
->type
== target_type
2192 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2193 gomp_load_image_to_device (devicep
, version
,
2194 host_table
, target_data
, true);
2195 gomp_mutex_unlock (&devicep
->lock
);
2198 /* Insert image to array of pending images. */
2200 = gomp_realloc_unlock (offload_images
,
2201 (num_offload_images
+ 1)
2202 * sizeof (struct offload_image_descr
));
2203 offload_images
[num_offload_images
].version
= version
;
2204 offload_images
[num_offload_images
].type
= target_type
;
2205 offload_images
[num_offload_images
].host_table
= host_table
;
2206 offload_images
[num_offload_images
].target_data
= target_data
;
2208 num_offload_images
++;
2209 gomp_mutex_unlock (®ister_lock
);
2213 GOMP_offload_register (const void *host_table
, int target_type
,
2214 const void *target_data
)
2216 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2219 /* This function should be called from every offload image while unloading.
2220 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2221 the target, and TARGET_DATA needed by target plugin. */
2224 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2225 int target_type
, const void *target_data
)
2229 gomp_mutex_lock (®ister_lock
);
2231 /* Unload image from all initialized devices. */
2232 for (i
= 0; i
< num_devices
; i
++)
2234 struct gomp_device_descr
*devicep
= &devices
[i
];
2235 gomp_mutex_lock (&devicep
->lock
);
2236 if (devicep
->type
== target_type
2237 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2238 gomp_unload_image_from_device (devicep
, version
,
2239 host_table
, target_data
);
2240 gomp_mutex_unlock (&devicep
->lock
);
2243 /* Remove image from array of pending images. */
2244 for (i
= 0; i
< num_offload_images
; i
++)
2245 if (offload_images
[i
].target_data
== target_data
)
2247 offload_images
[i
] = offload_images
[--num_offload_images
];
2251 gomp_mutex_unlock (®ister_lock
);
2255 GOMP_offload_unregister (const void *host_table
, int target_type
,
2256 const void *target_data
)
2258 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2261 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2262 must be locked on entry, and remains locked on return. */
2264 attribute_hidden
void
2265 gomp_init_device (struct gomp_device_descr
*devicep
)
2268 if (!devicep
->init_device_func (devicep
->target_id
))
2270 gomp_mutex_unlock (&devicep
->lock
);
2271 gomp_fatal ("device initialization failed");
2274 /* Load to device all images registered by the moment. */
2275 for (i
= 0; i
< num_offload_images
; i
++)
2277 struct offload_image_descr
*image
= &offload_images
[i
];
2278 if (image
->type
== devicep
->type
)
2279 gomp_load_image_to_device (devicep
, image
->version
,
2280 image
->host_table
, image
->target_data
,
2284 /* Initialize OpenACC asynchronous queues. */
2285 goacc_init_asyncqueues (devicep
);
2287 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2290 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2291 must be locked on entry, and remains locked on return. */
2293 attribute_hidden
bool
2294 gomp_fini_device (struct gomp_device_descr
*devicep
)
2296 bool ret
= goacc_fini_asyncqueues (devicep
);
2297 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2298 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2302 attribute_hidden
void
2303 gomp_unload_device (struct gomp_device_descr
*devicep
)
2305 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2309 /* Unload from device all images registered at the moment. */
2310 for (i
= 0; i
< num_offload_images
; i
++)
2312 struct offload_image_descr
*image
= &offload_images
[i
];
2313 if (image
->type
== devicep
->type
)
2314 gomp_unload_image_from_device (devicep
, image
->version
,
2316 image
->target_data
);
2321 /* Host fallback for GOMP_target{,_ext} routines. */
2324 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2325 struct gomp_device_descr
*devicep
)
2327 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2329 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2331 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2332 "be used for offloading");
2335 memset (thr
, '\0', sizeof (*thr
));
2336 if (gomp_places_list
)
2338 thr
->place
= old_thr
.place
;
2339 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2342 gomp_free_thread (thr
);
2346 /* Calculate alignment and size requirements of a private copy of data shared
2347 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2350 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2351 unsigned short *kinds
, size_t *tgt_align
,
2355 for (i
= 0; i
< mapnum
; i
++)
2356 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2358 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2359 if (*tgt_align
< align
)
2361 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2362 *tgt_size
+= sizes
[i
];
2366 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2369 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2370 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2373 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2375 tgt
+= tgt_align
- al
;
2378 for (i
= 0; i
< mapnum
; i
++)
2379 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2381 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2382 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2383 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2384 hostaddrs
[i
] = tgt
+ tgt_size
;
2385 tgt_size
= tgt_size
+ sizes
[i
];
2389 /* Helper function of GOMP_target{,_ext} routines. */
2392 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2393 void (*host_fn
) (void *))
2395 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2396 return (void *) host_fn
;
2399 gomp_mutex_lock (&devicep
->lock
);
2400 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2402 gomp_mutex_unlock (&devicep
->lock
);
2406 struct splay_tree_key_s k
;
2407 k
.host_start
= (uintptr_t) host_fn
;
2408 k
.host_end
= k
.host_start
+ 1;
2409 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2410 gomp_mutex_unlock (&devicep
->lock
);
2414 return (void *) tgt_fn
->tgt_offset
;
2418 /* Called when encountering a target directive. If DEVICE
2419 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2420 GOMP_DEVICE_HOST_FALLBACK (or any value
2421 larger than last available hw device), use host fallback.
2422 FN is address of host code, UNUSED is part of the current ABI, but
2423 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2424 with MAPNUM entries, with addresses of the host objects,
2425 sizes of the host objects (resp. for pointer kind pointer bias
2426 and assumed sizeof (void *) size) and kinds. */
2429 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2430 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2431 unsigned char *kinds
)
2433 struct gomp_device_descr
*devicep
= resolve_device (device
);
2437 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2438 /* All shared memory devices should use the GOMP_target_ext function. */
2439 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2440 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2441 return gomp_target_fallback (fn
, hostaddrs
, devicep
);
2443 htab_t refcount_set
= htab_create (mapnum
);
2444 struct target_mem_desc
*tgt_vars
2445 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2446 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2447 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2449 htab_clear (refcount_set
);
2450 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2451 htab_free (refcount_set
);
2454 static inline unsigned int
2455 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2457 /* If we cannot run asynchronously, simply ignore nowait. */
2458 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2459 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2464 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2465 and several arguments have been added:
2466 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2467 DEPEND is array of dependencies, see GOMP_task for details.
2469 ARGS is a pointer to an array consisting of a variable number of both
2470 device-independent and device-specific arguments, which can take one two
2471 elements where the first specifies for which device it is intended, the type
2472 and optionally also the value. If the value is not present in the first
2473 one, the whole second element the actual value. The last element of the
2474 array is a single NULL. Among the device independent can be for example
2475 NUM_TEAMS and THREAD_LIMIT.
2477 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2478 that value, or 1 if teams construct is not present, or 0, if
2479 teams construct does not have num_teams clause and so the choice is
2480 implementation defined, and -1 if it can't be determined on the host
2481 what value will GOMP_teams have on the device.
2482 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2483 body with that value, or 0, if teams construct does not have thread_limit
2484 clause or the teams construct is not present, or -1 if it can't be
2485 determined on the host what value will GOMP_teams have on the device. */
2488 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2489 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2490 unsigned int flags
, void **depend
, void **args
)
2492 struct gomp_device_descr
*devicep
= resolve_device (device
);
2493 size_t tgt_align
= 0, tgt_size
= 0;
2494 bool fpc_done
= false;
2496 flags
= clear_unsupported_flags (devicep
, flags
);
2498 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2500 struct gomp_thread
*thr
= gomp_thread ();
2501 /* Create a team if we don't have any around, as nowait
2502 target tasks make sense to run asynchronously even when
2503 outside of any parallel. */
2504 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2506 struct gomp_team
*team
= gomp_new_team (1);
2507 struct gomp_task
*task
= thr
->task
;
2508 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2509 team
->prev_ts
= thr
->ts
;
2510 thr
->ts
.team
= team
;
2511 thr
->ts
.team_id
= 0;
2512 thr
->ts
.work_share
= &team
->work_shares
[0];
2513 thr
->ts
.last_work_share
= NULL
;
2514 #ifdef HAVE_SYNC_BUILTINS
2515 thr
->ts
.single_count
= 0;
2517 thr
->ts
.static_trip
= 0;
2518 thr
->task
= &team
->implicit_task
[0];
2519 gomp_init_task (thr
->task
, NULL
, icv
);
2525 thr
->task
= &team
->implicit_task
[0];
2528 pthread_setspecific (gomp_thread_destructor
, thr
);
2531 && !thr
->task
->final_task
)
2533 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2534 sizes
, kinds
, flags
, depend
, args
,
2535 GOMP_TARGET_TASK_BEFORE_MAP
);
2540 /* If there are depend clauses, but nowait is not present
2541 (or we are in a final task), block the parent task until the
2542 dependencies are resolved and then just continue with the rest
2543 of the function as if it is a merged task. */
2546 struct gomp_thread
*thr
= gomp_thread ();
2547 if (thr
->task
&& thr
->task
->depend_hash
)
2549 /* If we might need to wait, copy firstprivate now. */
2550 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2551 &tgt_align
, &tgt_size
);
2554 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2555 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2556 tgt_align
, tgt_size
);
2559 gomp_task_maybe_wait_for_dependencies (depend
);
2565 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2566 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2567 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2571 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2572 &tgt_align
, &tgt_size
);
2575 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2576 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2577 tgt_align
, tgt_size
);
2580 gomp_target_fallback (fn
, hostaddrs
, devicep
);
2584 struct target_mem_desc
*tgt_vars
;
2585 htab_t refcount_set
= NULL
;
2587 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2591 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2592 &tgt_align
, &tgt_size
);
2595 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2596 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2597 tgt_align
, tgt_size
);
2604 refcount_set
= htab_create (mapnum
);
2605 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2606 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2608 devicep
->run_func (devicep
->target_id
, fn_addr
,
2609 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2613 htab_clear (refcount_set
);
2614 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2617 htab_free (refcount_set
);
2620 /* Host fallback for GOMP_target_data{,_ext} routines. */
2623 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2625 struct gomp_task_icv
*icv
= gomp_icv (false);
2627 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2629 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2630 "be used for offloading");
2632 if (icv
->target_data
)
2634 /* Even when doing a host fallback, if there are any active
2635 #pragma omp target data constructs, need to remember the
2636 new #pragma omp target data, otherwise GOMP_target_end_data
2637 would get out of sync. */
2638 struct target_mem_desc
*tgt
2639 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2640 NULL
, GOMP_MAP_VARS_DATA
);
2641 tgt
->prev
= icv
->target_data
;
2642 icv
->target_data
= tgt
;
2647 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2648 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2650 struct gomp_device_descr
*devicep
= resolve_device (device
);
2653 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2654 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2655 return gomp_target_data_fallback (devicep
);
2657 struct target_mem_desc
*tgt
2658 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2659 NULL
, GOMP_MAP_VARS_DATA
);
2660 struct gomp_task_icv
*icv
= gomp_icv (true);
2661 tgt
->prev
= icv
->target_data
;
2662 icv
->target_data
= tgt
;
2666 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2667 size_t *sizes
, unsigned short *kinds
)
2669 struct gomp_device_descr
*devicep
= resolve_device (device
);
2672 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2673 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2674 return gomp_target_data_fallback (devicep
);
2676 struct target_mem_desc
*tgt
2677 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2678 NULL
, GOMP_MAP_VARS_DATA
);
2679 struct gomp_task_icv
*icv
= gomp_icv (true);
2680 tgt
->prev
= icv
->target_data
;
2681 icv
->target_data
= tgt
;
2685 GOMP_target_end_data (void)
2687 struct gomp_task_icv
*icv
= gomp_icv (false);
2688 if (icv
->target_data
)
2690 struct target_mem_desc
*tgt
= icv
->target_data
;
2691 icv
->target_data
= tgt
->prev
;
2692 gomp_unmap_vars (tgt
, true, NULL
);
2697 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2698 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2700 struct gomp_device_descr
*devicep
= resolve_device (device
);
2703 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2704 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2707 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2711 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2712 size_t *sizes
, unsigned short *kinds
,
2713 unsigned int flags
, void **depend
)
2715 struct gomp_device_descr
*devicep
= resolve_device (device
);
2717 /* If there are depend clauses, but nowait is not present,
2718 block the parent task until the dependencies are resolved
2719 and then just continue with the rest of the function as if it
2720 is a merged task. Until we are able to schedule task during
2721 variable mapping or unmapping, ignore nowait if depend clauses
2725 struct gomp_thread
*thr
= gomp_thread ();
2726 if (thr
->task
&& thr
->task
->depend_hash
)
2728 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2730 && !thr
->task
->final_task
)
2732 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2733 mapnum
, hostaddrs
, sizes
, kinds
,
2734 flags
| GOMP_TARGET_FLAG_UPDATE
,
2735 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2740 struct gomp_team
*team
= thr
->ts
.team
;
2741 /* If parallel or taskgroup has been cancelled, don't start new
2743 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2745 if (gomp_team_barrier_cancelled (&team
->barrier
))
2747 if (thr
->task
->taskgroup
)
2749 if (thr
->task
->taskgroup
->cancelled
)
2751 if (thr
->task
->taskgroup
->workshare
2752 && thr
->task
->taskgroup
->prev
2753 && thr
->task
->taskgroup
->prev
->cancelled
)
2758 gomp_task_maybe_wait_for_dependencies (depend
);
2764 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2765 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2768 struct gomp_thread
*thr
= gomp_thread ();
2769 struct gomp_team
*team
= thr
->ts
.team
;
2770 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2771 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2773 if (gomp_team_barrier_cancelled (&team
->barrier
))
2775 if (thr
->task
->taskgroup
)
2777 if (thr
->task
->taskgroup
->cancelled
)
2779 if (thr
->task
->taskgroup
->workshare
2780 && thr
->task
->taskgroup
->prev
2781 && thr
->task
->taskgroup
->prev
->cancelled
)
2786 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2790 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2791 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2792 htab_t
*refcount_set
)
2794 const int typemask
= 0xff;
2796 gomp_mutex_lock (&devicep
->lock
);
2797 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2799 gomp_mutex_unlock (&devicep
->lock
);
2803 for (i
= 0; i
< mapnum
; i
++)
2804 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
2806 struct splay_tree_key_s cur_node
;
2807 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2808 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
2809 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2812 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
2817 splay_tree_key remove_vars
[mapnum
];
2819 for (i
= 0; i
< mapnum
; i
++)
2821 struct splay_tree_key_s cur_node
;
2822 unsigned char kind
= kinds
[i
] & typemask
;
2826 case GOMP_MAP_ALWAYS_FROM
:
2827 case GOMP_MAP_DELETE
:
2828 case GOMP_MAP_RELEASE
:
2829 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2830 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2831 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2832 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2833 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2834 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2835 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2836 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2840 bool delete_p
= (kind
== GOMP_MAP_DELETE
2841 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
2842 bool do_copy
, do_remove
;
2843 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
2846 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
2847 || kind
== GOMP_MAP_ALWAYS_FROM
)
2848 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2849 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2850 + cur_node
.host_start
2852 cur_node
.host_end
- cur_node
.host_start
);
2854 /* Structure elements lists are removed altogether at once, which
2855 may cause immediate deallocation of the target_mem_desc, causing
2856 errors if we still have following element siblings to copy back.
2857 While we're at it, it also seems more disciplined to simply
2858 queue all removals together for processing below.
2860 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
2861 not have this problem, since they maintain an additional
2862 tgt->refcount = 1 reference to the target_mem_desc to start with.
2865 remove_vars
[nrmvars
++] = k
;
2868 case GOMP_MAP_DETACH
:
2871 gomp_mutex_unlock (&devicep
->lock
);
2872 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2877 for (int i
= 0; i
< nrmvars
; i
++)
2878 gomp_remove_var (devicep
, remove_vars
[i
]);
2880 gomp_mutex_unlock (&devicep
->lock
);
2884 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2885 size_t *sizes
, unsigned short *kinds
,
2886 unsigned int flags
, void **depend
)
2888 struct gomp_device_descr
*devicep
= resolve_device (device
);
2890 /* If there are depend clauses, but nowait is not present,
2891 block the parent task until the dependencies are resolved
2892 and then just continue with the rest of the function as if it
2893 is a merged task. Until we are able to schedule task during
2894 variable mapping or unmapping, ignore nowait if depend clauses
2898 struct gomp_thread
*thr
= gomp_thread ();
2899 if (thr
->task
&& thr
->task
->depend_hash
)
2901 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2903 && !thr
->task
->final_task
)
2905 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2906 mapnum
, hostaddrs
, sizes
, kinds
,
2907 flags
, depend
, NULL
,
2908 GOMP_TARGET_TASK_DATA
))
2913 struct gomp_team
*team
= thr
->ts
.team
;
2914 /* If parallel or taskgroup has been cancelled, don't start new
2916 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2918 if (gomp_team_barrier_cancelled (&team
->barrier
))
2920 if (thr
->task
->taskgroup
)
2922 if (thr
->task
->taskgroup
->cancelled
)
2924 if (thr
->task
->taskgroup
->workshare
2925 && thr
->task
->taskgroup
->prev
2926 && thr
->task
->taskgroup
->prev
->cancelled
)
2931 gomp_task_maybe_wait_for_dependencies (depend
);
2937 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2938 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2941 struct gomp_thread
*thr
= gomp_thread ();
2942 struct gomp_team
*team
= thr
->ts
.team
;
2943 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2944 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2946 if (gomp_team_barrier_cancelled (&team
->barrier
))
2948 if (thr
->task
->taskgroup
)
2950 if (thr
->task
->taskgroup
->cancelled
)
2952 if (thr
->task
->taskgroup
->workshare
2953 && thr
->task
->taskgroup
->prev
2954 && thr
->task
->taskgroup
->prev
->cancelled
)
2959 htab_t refcount_set
= htab_create (mapnum
);
2961 /* The variables are mapped separately such that they can be released
2964 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2965 for (i
= 0; i
< mapnum
; i
++)
2966 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2968 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2969 &kinds
[i
], true, &refcount_set
,
2970 GOMP_MAP_VARS_ENTER_DATA
);
2973 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
2975 for (j
= i
+ 1; j
< mapnum
; j
++)
2976 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
2977 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
2979 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
2980 &kinds
[i
], true, &refcount_set
,
2981 GOMP_MAP_VARS_ENTER_DATA
);
2984 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
2986 /* An attach operation must be processed together with the mapped
2987 base-pointer list item. */
2988 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2989 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
2993 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2994 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
2996 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
2997 htab_free (refcount_set
);
3001 gomp_target_task_fn (void *data
)
3003 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3004 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3006 if (ttask
->fn
!= NULL
)
3010 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3011 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3012 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3014 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3015 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
);
3019 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3022 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3026 void *actual_arguments
;
3027 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3030 actual_arguments
= ttask
->hostaddrs
;
3034 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3035 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3036 NULL
, GOMP_MAP_VARS_TARGET
);
3037 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3039 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3041 assert (devicep
->async_run_func
);
3042 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3043 ttask
->args
, (void *) ttask
);
3046 else if (devicep
== NULL
3047 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3048 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3052 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3053 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3054 ttask
->kinds
, true);
3057 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3058 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3059 for (i
= 0; i
< ttask
->mapnum
; i
++)
3060 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3062 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3063 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3064 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3065 i
+= ttask
->sizes
[i
];
3068 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3069 &ttask
->kinds
[i
], true, &refcount_set
,
3070 GOMP_MAP_VARS_ENTER_DATA
);
3072 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3073 ttask
->kinds
, &refcount_set
);
3074 htab_free (refcount_set
);
3080 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3084 struct gomp_task_icv
*icv
= gomp_icv (true);
3085 icv
->thread_limit_var
3086 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3092 omp_target_alloc (size_t size
, int device_num
)
3094 if (device_num
== gomp_get_num_devices ())
3095 return malloc (size
);
3100 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3101 if (devicep
== NULL
)
3104 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3105 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3106 return malloc (size
);
3108 gomp_mutex_lock (&devicep
->lock
);
3109 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3110 gomp_mutex_unlock (&devicep
->lock
);
3115 omp_target_free (void *device_ptr
, int device_num
)
3117 if (device_ptr
== NULL
)
3120 if (device_num
== gomp_get_num_devices ())
3129 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3130 if (devicep
== NULL
)
3133 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3134 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3140 gomp_mutex_lock (&devicep
->lock
);
3141 gomp_free_device_memory (devicep
, device_ptr
);
3142 gomp_mutex_unlock (&devicep
->lock
);
3146 omp_target_is_present (const void *ptr
, int device_num
)
3151 if (device_num
== gomp_get_num_devices ())
3157 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3158 if (devicep
== NULL
)
3161 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3162 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3165 gomp_mutex_lock (&devicep
->lock
);
3166 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3167 struct splay_tree_key_s cur_node
;
3169 cur_node
.host_start
= (uintptr_t) ptr
;
3170 cur_node
.host_end
= cur_node
.host_start
;
3171 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3172 int ret
= n
!= NULL
;
3173 gomp_mutex_unlock (&devicep
->lock
);
3178 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
3179 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
3182 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3185 if (dst_device_num
!= gomp_get_num_devices ())
3187 if (dst_device_num
< 0)
3190 dst_devicep
= resolve_device (dst_device_num
);
3191 if (dst_devicep
== NULL
)
3194 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3195 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3198 if (src_device_num
!= num_devices_openmp
)
3200 if (src_device_num
< 0)
3203 src_devicep
= resolve_device (src_device_num
);
3204 if (src_devicep
== NULL
)
3207 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3208 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3211 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3213 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3216 if (src_devicep
== NULL
)
3218 gomp_mutex_lock (&dst_devicep
->lock
);
3219 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3220 (char *) dst
+ dst_offset
,
3221 (char *) src
+ src_offset
, length
);
3222 gomp_mutex_unlock (&dst_devicep
->lock
);
3223 return (ret
? 0 : EINVAL
);
3225 if (dst_devicep
== NULL
)
3227 gomp_mutex_lock (&src_devicep
->lock
);
3228 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3229 (char *) dst
+ dst_offset
,
3230 (char *) src
+ src_offset
, length
);
3231 gomp_mutex_unlock (&src_devicep
->lock
);
3232 return (ret
? 0 : EINVAL
);
3234 if (src_devicep
== dst_devicep
)
3236 gomp_mutex_lock (&src_devicep
->lock
);
3237 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3238 (char *) dst
+ dst_offset
,
3239 (char *) src
+ src_offset
, length
);
3240 gomp_mutex_unlock (&src_devicep
->lock
);
3241 return (ret
? 0 : EINVAL
);
3247 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3248 int num_dims
, const size_t *volume
,
3249 const size_t *dst_offsets
,
3250 const size_t *src_offsets
,
3251 const size_t *dst_dimensions
,
3252 const size_t *src_dimensions
,
3253 struct gomp_device_descr
*dst_devicep
,
3254 struct gomp_device_descr
*src_devicep
)
3256 size_t dst_slice
= element_size
;
3257 size_t src_slice
= element_size
;
3258 size_t j
, dst_off
, src_off
, length
;
3263 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3264 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3265 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3267 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3269 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3273 else if (src_devicep
== NULL
)
3274 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3275 (char *) dst
+ dst_off
,
3276 (const char *) src
+ src_off
,
3278 else if (dst_devicep
== NULL
)
3279 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3280 (char *) dst
+ dst_off
,
3281 (const char *) src
+ src_off
,
3283 else if (src_devicep
== dst_devicep
)
3284 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3285 (char *) dst
+ dst_off
,
3286 (const char *) src
+ src_off
,
3290 return ret
? 0 : EINVAL
;
3293 /* FIXME: it would be nice to have some plugin function to handle
3294 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3295 be handled in the generic recursion below, and for host-host it
3296 should be used even for any num_dims >= 2. */
3298 for (i
= 1; i
< num_dims
; i
++)
3299 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3300 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3302 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3303 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3305 for (j
= 0; j
< volume
[0]; j
++)
3307 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3308 (const char *) src
+ src_off
,
3309 element_size
, num_dims
- 1,
3310 volume
+ 1, dst_offsets
+ 1,
3311 src_offsets
+ 1, dst_dimensions
+ 1,
3312 src_dimensions
+ 1, dst_devicep
,
3316 dst_off
+= dst_slice
;
3317 src_off
+= src_slice
;
3323 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3324 int num_dims
, const size_t *volume
,
3325 const size_t *dst_offsets
,
3326 const size_t *src_offsets
,
3327 const size_t *dst_dimensions
,
3328 const size_t *src_dimensions
,
3329 int dst_device_num
, int src_device_num
)
3331 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3336 if (dst_device_num
!= gomp_get_num_devices ())
3338 if (dst_device_num
< 0)
3341 dst_devicep
= resolve_device (dst_device_num
);
3342 if (dst_devicep
== NULL
)
3345 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3346 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3349 if (src_device_num
!= num_devices_openmp
)
3351 if (src_device_num
< 0)
3354 src_devicep
= resolve_device (src_device_num
);
3355 if (src_devicep
== NULL
)
3358 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3359 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3363 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
3367 gomp_mutex_lock (&src_devicep
->lock
);
3368 else if (dst_devicep
)
3369 gomp_mutex_lock (&dst_devicep
->lock
);
3370 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3371 volume
, dst_offsets
, src_offsets
,
3372 dst_dimensions
, src_dimensions
,
3373 dst_devicep
, src_devicep
);
3375 gomp_mutex_unlock (&src_devicep
->lock
);
3376 else if (dst_devicep
)
3377 gomp_mutex_unlock (&dst_devicep
->lock
);
3382 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3383 size_t size
, size_t device_offset
, int device_num
)
3385 if (device_num
== gomp_get_num_devices ())
3391 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3392 if (devicep
== NULL
)
3395 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3396 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3399 gomp_mutex_lock (&devicep
->lock
);
3401 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3402 struct splay_tree_key_s cur_node
;
3405 cur_node
.host_start
= (uintptr_t) host_ptr
;
3406 cur_node
.host_end
= cur_node
.host_start
+ size
;
3407 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3410 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3411 == (uintptr_t) device_ptr
+ device_offset
3412 && n
->host_start
<= cur_node
.host_start
3413 && n
->host_end
>= cur_node
.host_end
)
3418 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3419 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3423 tgt
->to_free
= NULL
;
3425 tgt
->list_count
= 0;
3426 tgt
->device_descr
= devicep
;
3427 splay_tree_node array
= tgt
->array
;
3428 splay_tree_key k
= &array
->key
;
3429 k
->host_start
= cur_node
.host_start
;
3430 k
->host_end
= cur_node
.host_end
;
3432 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3433 k
->refcount
= REFCOUNT_INFINITY
;
3434 k
->dynamic_refcount
= 0;
3437 array
->right
= NULL
;
3438 splay_tree_insert (&devicep
->mem_map
, array
);
3441 gomp_mutex_unlock (&devicep
->lock
);
3446 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3448 if (device_num
== gomp_get_num_devices ())
3454 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3455 if (devicep
== NULL
)
3458 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3461 gomp_mutex_lock (&devicep
->lock
);
3463 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3464 struct splay_tree_key_s cur_node
;
3467 cur_node
.host_start
= (uintptr_t) ptr
;
3468 cur_node
.host_end
= cur_node
.host_start
;
3469 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3471 && n
->host_start
== cur_node
.host_start
3472 && n
->refcount
== REFCOUNT_INFINITY
3473 && n
->tgt
->tgt_start
== 0
3474 && n
->tgt
->to_free
== NULL
3475 && n
->tgt
->refcount
== 1
3476 && n
->tgt
->list_count
== 0)
3478 splay_tree_remove (&devicep
->mem_map
, n
);
3479 gomp_unmap_tgt (n
->tgt
);
3483 gomp_mutex_unlock (&devicep
->lock
);
3488 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3491 if (device_num
== gomp_get_num_devices ())
3492 return gomp_pause_host ();
3493 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3495 /* Do nothing for target devices for now. */
3500 omp_pause_resource_all (omp_pause_resource_t kind
)
3503 if (gomp_pause_host ())
3505 /* Do nothing for target devices for now. */
3509 ialias (omp_pause_resource
)
3510 ialias (omp_pause_resource_all
)
3512 #ifdef PLUGIN_SUPPORT
3514 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3516 The handles of the found functions are stored in the corresponding fields
3517 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3520 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3521 const char *plugin_name
)
3523 const char *err
= NULL
, *last_missing
= NULL
;
3525 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3527 #if OFFLOAD_DEFAULTED
3533 /* Check if all required functions are available in the plugin and store
3534 their handlers. None of the symbols can legitimately be NULL,
3535 so we don't need to check dlerror all the time. */
3537 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3539 /* Similar, but missing functions are not an error. Return false if
3540 failed, true otherwise. */
3541 #define DLSYM_OPT(f, n) \
3542 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3543 || (last_missing = #n, 0))
3546 if (device
->version_func () != GOMP_VERSION
)
3548 err
= "plugin version mismatch";
3555 DLSYM (get_num_devices
);
3556 DLSYM (init_device
);
3557 DLSYM (fini_device
);
3559 DLSYM (unload_image
);
3564 device
->capabilities
= device
->get_caps_func ();
3565 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3568 DLSYM_OPT (async_run
, async_run
);
3569 DLSYM_OPT (can_run
, can_run
);
3572 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3574 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3575 || !DLSYM_OPT (openacc
.create_thread_data
,
3576 openacc_create_thread_data
)
3577 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3578 openacc_destroy_thread_data
)
3579 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3580 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3581 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3582 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3583 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3584 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3585 openacc_async_queue_callback
)
3586 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3587 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3588 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3589 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3591 /* Require all the OpenACC handlers if we have
3592 GOMP_OFFLOAD_CAP_OPENACC_200. */
3593 err
= "plugin missing OpenACC handler function";
3598 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3599 openacc_cuda_get_current_device
);
3600 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3601 openacc_cuda_get_current_context
);
3602 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3603 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3604 if (cuda
&& cuda
!= 4)
3606 /* Make sure all the CUDA functions are there if any of them are. */
3607 err
= "plugin missing OpenACC CUDA handler function";
3619 gomp_error ("while loading %s: %s", plugin_name
, err
);
3621 gomp_error ("missing function was %s", last_missing
);
3623 dlclose (plugin_handle
);
3628 /* This function finalizes all initialized devices. */
3631 gomp_target_fini (void)
3634 for (i
= 0; i
< num_devices
; i
++)
3637 struct gomp_device_descr
*devicep
= &devices
[i
];
3638 gomp_mutex_lock (&devicep
->lock
);
3639 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3640 ret
= gomp_fini_device (devicep
);
3641 gomp_mutex_unlock (&devicep
->lock
);
3643 gomp_fatal ("device finalization failed");
3647 /* This function initializes the runtime for offloading.
3648 It parses the list of offload plugins, and tries to load these.
3649 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3650 will be set, and the array DEVICES initialized, containing descriptors for
3651 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3655 gomp_target_init (void)
3657 const char *prefix
="libgomp-plugin-";
3658 const char *suffix
= SONAME_SUFFIX (1);
3659 const char *cur
, *next
;
3661 int i
, new_num_devs
;
3662 int num_devs
= 0, num_devs_openmp
;
3663 struct gomp_device_descr
*devs
= NULL
;
3665 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
3668 cur
= OFFLOAD_PLUGINS
;
3672 struct gomp_device_descr current_device
;
3673 size_t prefix_len
, suffix_len
, cur_len
;
3675 next
= strchr (cur
, ',');
3677 prefix_len
= strlen (prefix
);
3678 cur_len
= next
? next
- cur
: strlen (cur
);
3679 suffix_len
= strlen (suffix
);
3681 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3688 memcpy (plugin_name
, prefix
, prefix_len
);
3689 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3690 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3692 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3694 new_num_devs
= current_device
.get_num_devices_func ();
3695 if (new_num_devs
>= 1)
3697 /* Augment DEVICES and NUM_DEVICES. */
3699 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
3700 * sizeof (struct gomp_device_descr
));
3708 current_device
.name
= current_device
.get_name_func ();
3709 /* current_device.capabilities has already been set. */
3710 current_device
.type
= current_device
.get_type_func ();
3711 current_device
.mem_map
.root
= NULL
;
3712 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3713 for (i
= 0; i
< new_num_devs
; i
++)
3715 current_device
.target_id
= i
;
3716 devs
[num_devs
] = current_device
;
3717 gomp_mutex_init (&devs
[num_devs
].lock
);
3728 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3729 NUM_DEVICES_OPENMP. */
3730 struct gomp_device_descr
*devs_s
3731 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
3738 num_devs_openmp
= 0;
3739 for (i
= 0; i
< num_devs
; i
++)
3740 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3741 devs_s
[num_devs_openmp
++] = devs
[i
];
3742 int num_devs_after_openmp
= num_devs_openmp
;
3743 for (i
= 0; i
< num_devs
; i
++)
3744 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3745 devs_s
[num_devs_after_openmp
++] = devs
[i
];
3749 for (i
= 0; i
< num_devs
; i
++)
3751 /* The 'devices' array can be moved (by the realloc call) until we have
3752 found all the plugins, so registering with the OpenACC runtime (which
3753 takes a copy of the pointer argument) must be delayed until now. */
3754 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3755 goacc_register (&devs
[i
]);
3758 num_devices
= num_devs
;
3759 num_devices_openmp
= num_devs_openmp
;
3761 if (atexit (gomp_target_fini
) != 0)
3762 gomp_fatal ("atexit failed");
3765 #else /* PLUGIN_SUPPORT */
3766 /* If dlfcn.h is unavailable we always fallback to host execution.
3767 GOMP_target* routines are just stubs for this case. */
3769 gomp_target_init (void)
3772 #endif /* PLUGIN_SUPPORT */