1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
143 struct gomp_task_icv
*icv
= gomp_icv (false);
144 device_id
= icv
->default_device_var
;
150 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device
))
153 if (device_id
== omp_invalid_device
)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
161 else if (device_id
>= gomp_get_num_devices ())
163 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id
!= num_devices_openmp
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
171 gomp_mutex_lock (&devices
[device_id
].lock
);
172 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
173 gomp_init_device (&devices
[device_id
]);
174 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
176 gomp_mutex_unlock (&devices
[device_id
].lock
);
178 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 return &devices
[device_id
];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
193 if (key
->host_start
!= key
->host_end
)
194 return splay_tree_lookup (mem_map
, key
);
197 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
202 n
= splay_tree_lookup (mem_map
, key
);
206 return splay_tree_lookup (mem_map
, key
);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
212 return reverse_splay_tree_lookup (mem_map_rev
, key
);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
218 if (key
->host_start
!= key
->host_end
)
219 return splay_tree_lookup (mem_map
, key
);
222 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
228 gomp_device_copy (struct gomp_device_descr
*devicep
,
229 bool (*copy_func
) (int, void *, const void *, size_t),
230 const char *dst
, void *dstaddr
,
231 const char *src
, const void *srcaddr
,
234 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
236 gomp_mutex_unlock (&devicep
->lock
);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
243 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
244 bool (*copy_func
) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue
*),
246 const char *dst
, void *dstaddr
,
247 const char *src
, const void *srcaddr
,
248 const void *srcaddr_orig
,
249 size_t size
, struct goacc_asyncqueue
*aq
)
251 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
253 gomp_mutex_unlock (&devicep
->lock
);
254 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src
, srcaddr_orig
, srcaddr_orig
+ size
,
259 src
, srcaddr
, srcaddr
+ size
,
260 dst
, dstaddr
, dstaddr
+ size
);
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src
, srcaddr
, srcaddr
+ size
,
265 dst
, dstaddr
, dstaddr
+ size
);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
283 struct target_mem_desc
*tgt
;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk
*chunks
;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example). The exception is for EPHEMERAL data, that we know is available
314 already "by construction". */
317 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
319 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
323 if (cbuf
->chunk_cnt
< 0)
325 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
327 cbuf
->chunk_cnt
= -1;
330 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
332 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
336 /* If the last chunk is only used by one mapping, discard it,
337 as it will be one host to device copy anyway and
338 memcpying it around will only waste cycles. */
339 if (cbuf
->use_cnt
== 1)
342 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
343 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
348 /* Return true for mapping kinds which need to copy data from the
349 host to device for regions that weren't previously mapped. */
352 gomp_to_device_kind_p (int kind
)
358 case GOMP_MAP_FORCE_ALLOC
:
359 case GOMP_MAP_FORCE_FROM
:
360 case GOMP_MAP_ALWAYS_FROM
:
367 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
368 non-NULL), when the source data is stack or may otherwise be deallocated
369 before the asynchronous copy takes place, EPHEMERAL must be passed as
372 attribute_hidden
void
373 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
374 struct goacc_asyncqueue
*aq
,
375 void *d
, const void *h
, size_t sz
,
376 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
380 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
381 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
384 long last
= cbuf
->chunk_cnt
- 1;
385 while (first
<= last
)
387 long middle
= (first
+ last
) >> 1;
388 if (cbuf
->chunks
[middle
].end
<= doff
)
390 else if (cbuf
->chunks
[middle
].start
<= doff
)
392 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
394 gomp_mutex_unlock (&devicep
->lock
);
395 gomp_fatal ("internal libgomp cbuf error");
398 /* In an asynchronous context, verify that CBUF isn't used
399 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
400 if (__builtin_expect (aq
!= NULL
, 0))
403 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
413 if (__builtin_expect (aq
!= NULL
, 0))
415 void *h_buf
= (void *) h
;
418 /* We're queueing up an asynchronous copy from data that may
419 disappear before the transfer takes place (i.e. because it is a
420 stack local in a function that is no longer executing). As we've
421 not been able to use CBUF, make a copy of the data into a
423 h_buf
= gomp_malloc (sz
);
424 memcpy (h_buf
, h
, sz
);
426 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
427 "dev", d
, "host", h_buf
, h
, sz
, aq
);
429 /* Free once the transfer has completed. */
430 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
433 gomp_device_copy (devicep
, devicep
->host2dev_func
,
434 "dev", d
, "host", h
, sz
);
437 attribute_hidden
void
438 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
439 struct goacc_asyncqueue
*aq
,
440 void *h
, const void *d
, size_t sz
)
442 if (__builtin_expect (aq
!= NULL
, 0))
443 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
444 "host", h
, "dev", d
, NULL
, sz
, aq
);
446 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
450 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
452 if (!devicep
->free_func (devicep
->target_id
, devptr
))
454 gomp_mutex_unlock (&devicep
->lock
);
455 gomp_fatal ("error in freeing device memory block at %p", devptr
);
459 /* Increment reference count of a splay_tree_key region K by 1.
460 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
461 increment the value if refcount is not yet contained in the set (used for
462 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
463 once for each construct). */
466 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
468 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
471 uintptr_t *refcount_ptr
= &k
->refcount
;
473 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
474 refcount_ptr
= &k
->structelem_refcount
;
475 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
476 refcount_ptr
= k
->structelem_refcount_ptr
;
480 if (htab_find (*refcount_set
, refcount_ptr
))
482 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
483 *slot
= refcount_ptr
;
490 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
491 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
492 track already seen refcounts, and only adjust the value if refcount is not
493 yet contained in the set (like gomp_increment_refcount).
495 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
496 it is already zero and we know we decremented it earlier. This signals that
497 associated maps should be copied back to host.
499 *DO_REMOVE is set to true when we this is the first handling of this refcount
500 and we are setting it to zero. This signals a removal of this key from the
503 Copy and removal are separated due to cases like handling of structure
504 elements, e.g. each map of a structure element representing a possible copy
505 out of a structure field has to be handled individually, but we only signal
506 removal for one (the first encountered) sibing map. */
509 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
510 bool *do_copy
, bool *do_remove
)
512 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
514 *do_copy
= *do_remove
= false;
518 uintptr_t *refcount_ptr
= &k
->refcount
;
520 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
521 refcount_ptr
= &k
->structelem_refcount
;
522 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
523 refcount_ptr
= k
->structelem_refcount_ptr
;
525 bool new_encountered_refcount
;
526 bool set_to_zero
= false;
527 bool is_zero
= false;
529 uintptr_t orig_refcount
= *refcount_ptr
;
533 if (htab_find (*refcount_set
, refcount_ptr
))
535 new_encountered_refcount
= false;
539 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
540 *slot
= refcount_ptr
;
541 new_encountered_refcount
= true;
544 /* If no refcount_set being used, assume all keys are being decremented
545 for the first time. */
546 new_encountered_refcount
= true;
550 else if (*refcount_ptr
> 0)
554 if (*refcount_ptr
== 0)
556 if (orig_refcount
> 0)
562 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
563 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
566 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
567 gomp_map_0len_lookup found oldn for newn.
568 Helper function of gomp_map_vars. */
571 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
572 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
573 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
574 unsigned char kind
, bool always_to_flag
, bool implicit
,
575 struct gomp_coalesce_buf
*cbuf
,
576 htab_t
*refcount_set
)
578 assert (kind
!= GOMP_MAP_ATTACH
579 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
582 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
583 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
584 tgt_var
->is_attach
= false;
585 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
587 /* For implicit maps, old contained in new is valid. */
588 bool implicit_subset
= (implicit
589 && newn
->host_start
<= oldn
->host_start
590 && oldn
->host_end
<= newn
->host_end
);
592 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
594 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
596 if ((kind
& GOMP_MAP_FLAG_FORCE
)
597 /* For implicit maps, old contained in new is valid. */
599 /* Otherwise, new contained inside old is considered valid. */
600 || (oldn
->host_start
<= newn
->host_start
601 && newn
->host_end
<= oldn
->host_end
)))
603 gomp_mutex_unlock (&devicep
->lock
);
604 gomp_fatal ("Trying to map into device [%p..%p) object when "
605 "[%p..%p) is already mapped",
606 (void *) newn
->host_start
, (void *) newn
->host_end
,
607 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
610 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
612 /* Implicit + always should not happen. If this does occur, below
613 address/length adjustment is a TODO. */
614 assert (!implicit_subset
);
616 if (oldn
->aux
&& oldn
->aux
->attach_count
)
618 /* We have to be careful not to overwrite still attached pointers
619 during the copyback to host. */
620 uintptr_t addr
= newn
->host_start
;
621 while (addr
< newn
->host_end
)
623 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
624 if (oldn
->aux
->attach_count
[i
] == 0)
625 gomp_copy_host2dev (devicep
, aq
,
626 (void *) (oldn
->tgt
->tgt_start
628 + addr
- oldn
->host_start
),
630 sizeof (void *), false, cbuf
);
631 addr
+= sizeof (void *);
635 gomp_copy_host2dev (devicep
, aq
,
636 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
637 + newn
->host_start
- oldn
->host_start
),
638 (void *) newn
->host_start
,
639 newn
->host_end
- newn
->host_start
, false, cbuf
);
642 gomp_increment_refcount (oldn
, refcount_set
);
646 get_kind (bool short_mapkind
, void *kinds
, int idx
)
649 return ((unsigned char *) kinds
)[idx
];
651 int val
= ((unsigned short *) kinds
)[idx
];
652 if (GOMP_MAP_IMPLICIT_P (val
))
653 val
&= ~GOMP_MAP_IMPLICIT
;
659 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
664 int val
= ((unsigned short *) kinds
)[idx
];
665 return GOMP_MAP_IMPLICIT_P (val
);
669 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
670 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
671 struct gomp_coalesce_buf
*cbuf
,
672 bool allow_zero_length_array_sections
)
674 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
675 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
676 struct splay_tree_key_s cur_node
;
678 cur_node
.host_start
= host_ptr
;
679 if (cur_node
.host_start
== (uintptr_t) NULL
)
681 cur_node
.tgt_offset
= (uintptr_t) NULL
;
682 gomp_copy_host2dev (devicep
, aq
,
683 (void *) (tgt
->tgt_start
+ target_offset
),
684 (void *) &cur_node
.tgt_offset
, sizeof (void *),
688 /* Add bias to the pointer value. */
689 cur_node
.host_start
+= bias
;
690 cur_node
.host_end
= cur_node
.host_start
;
691 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
694 if (allow_zero_length_array_sections
)
695 cur_node
.tgt_offset
= 0;
698 gomp_mutex_unlock (&devicep
->lock
);
699 gomp_fatal ("Pointer target of array section wasn't mapped");
704 cur_node
.host_start
-= n
->host_start
;
706 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
707 /* At this point tgt_offset is target address of the
708 array section. Now subtract bias to get what we want
709 to initialize the pointer with. */
710 cur_node
.tgt_offset
-= bias
;
712 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
713 (void *) &cur_node
.tgt_offset
, sizeof (void *),
718 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
719 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
720 size_t first
, size_t i
, void **hostaddrs
,
721 size_t *sizes
, void *kinds
,
722 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
724 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
725 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
726 struct splay_tree_key_s cur_node
;
729 const bool short_mapkind
= true;
730 const int typemask
= short_mapkind
? 0xff : 0x7;
732 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
733 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
734 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
735 kind
= get_kind (short_mapkind
, kinds
, i
);
736 implicit
= get_implicit (short_mapkind
, kinds
, i
);
739 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
741 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
742 kind
& typemask
, false, implicit
, cbuf
,
748 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
750 cur_node
.host_start
--;
751 n2
= splay_tree_lookup (mem_map
, &cur_node
);
752 cur_node
.host_start
++;
755 && n2
->host_start
- n
->host_start
756 == n2
->tgt_offset
- n
->tgt_offset
)
758 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
759 kind
& typemask
, false, implicit
, cbuf
,
765 n2
= splay_tree_lookup (mem_map
, &cur_node
);
769 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
771 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
772 kind
& typemask
, false, implicit
, cbuf
,
777 gomp_mutex_unlock (&devicep
->lock
);
778 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
779 "other mapped elements from the same structure weren't mapped "
780 "together with it", (void *) cur_node
.host_start
,
781 (void *) cur_node
.host_end
);
784 attribute_hidden
void
785 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
786 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
787 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
788 struct gomp_coalesce_buf
*cbufp
,
789 bool allow_zero_length_array_sections
)
791 struct splay_tree_key_s s
;
796 gomp_mutex_unlock (&devicep
->lock
);
797 gomp_fatal ("enclosing struct not mapped for attach");
800 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
801 /* We might have a pointer in a packed struct: however we cannot have more
802 than one such pointer in each pointer-sized portion of the struct, so
804 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
807 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
809 if (!n
->aux
->attach_count
)
811 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
813 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
814 n
->aux
->attach_count
[idx
]++;
817 gomp_mutex_unlock (&devicep
->lock
);
818 gomp_fatal ("attach count overflow");
821 if (n
->aux
->attach_count
[idx
] == 1)
823 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
825 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
829 if ((void *) target
== NULL
)
831 gomp_mutex_unlock (&devicep
->lock
);
832 gomp_fatal ("attempt to attach null pointer");
835 s
.host_start
= target
+ bias
;
836 s
.host_end
= s
.host_start
+ 1;
837 tn
= splay_tree_lookup (mem_map
, &s
);
841 if (allow_zero_length_array_sections
)
842 /* When allowing attachment to zero-length array sections, we
843 allow attaching to NULL pointers when the target region is not
848 gomp_mutex_unlock (&devicep
->lock
);
849 gomp_fatal ("pointer target not mapped for attach");
853 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
856 "%s: attaching host %p, target %p (struct base %p) to %p\n",
857 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
858 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
860 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
861 sizeof (void *), true, cbufp
);
864 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
865 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
868 attribute_hidden
void
869 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
870 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
871 uintptr_t detach_from
, bool finalize
,
872 struct gomp_coalesce_buf
*cbufp
)
878 gomp_mutex_unlock (&devicep
->lock
);
879 gomp_fatal ("enclosing struct not mapped for detach");
882 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
884 if (!n
->aux
|| !n
->aux
->attach_count
)
886 gomp_mutex_unlock (&devicep
->lock
);
887 gomp_fatal ("no attachment counters for struct");
891 n
->aux
->attach_count
[idx
] = 1;
893 if (n
->aux
->attach_count
[idx
] == 0)
895 gomp_mutex_unlock (&devicep
->lock
);
896 gomp_fatal ("attach count underflow");
899 n
->aux
->attach_count
[idx
]--;
901 if (n
->aux
->attach_count
[idx
] == 0)
903 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
905 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
908 "%s: detaching host %p, target %p (struct base %p) to %p\n",
909 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
910 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
913 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
914 sizeof (void *), true, cbufp
);
917 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
918 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
921 attribute_hidden
uintptr_t
922 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
924 if (tgt
->list
[i
].key
!= NULL
)
925 return tgt
->list
[i
].key
->tgt
->tgt_start
926 + tgt
->list
[i
].key
->tgt_offset
927 + tgt
->list
[i
].offset
;
929 switch (tgt
->list
[i
].offset
)
932 return (uintptr_t) hostaddrs
[i
];
938 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
939 + tgt
->list
[i
+ 1].key
->tgt_offset
940 + tgt
->list
[i
+ 1].offset
941 + (uintptr_t) hostaddrs
[i
]
942 - (uintptr_t) hostaddrs
[i
+ 1];
945 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
949 static inline __attribute__((always_inline
)) struct target_mem_desc
*
950 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
951 struct goacc_asyncqueue
*aq
, size_t mapnum
,
952 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
953 void *kinds
, bool short_mapkind
,
954 htab_t
*refcount_set
,
955 enum gomp_map_vars_kind pragma_kind
)
957 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
958 bool has_firstprivate
= false;
959 bool has_always_ptrset
= false;
960 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
961 const int rshift
= short_mapkind
? 8 : 3;
962 const int typemask
= short_mapkind
? 0xff : 0x7;
963 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
964 struct splay_tree_key_s cur_node
;
965 struct target_mem_desc
*tgt
966 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
967 tgt
->list_count
= mapnum
;
968 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
969 tgt
->device_descr
= devicep
;
971 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
980 tgt_align
= sizeof (void *);
986 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
988 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
989 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
992 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
994 size_t align
= 4 * sizeof (void *);
996 tgt_size
= mapnum
* sizeof (void *);
998 cbuf
.use_cnt
= 1 + (mapnum
> 1);
999 cbuf
.chunks
[0].start
= 0;
1000 cbuf
.chunks
[0].end
= tgt_size
;
1003 gomp_mutex_lock (&devicep
->lock
);
1004 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1006 gomp_mutex_unlock (&devicep
->lock
);
1011 for (i
= 0; i
< mapnum
; i
++)
1013 int kind
= get_kind (short_mapkind
, kinds
, i
);
1014 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1015 if (hostaddrs
[i
] == NULL
1016 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1018 tgt
->list
[i
].key
= NULL
;
1019 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1022 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1023 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1025 tgt
->list
[i
].key
= NULL
;
1028 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1029 on a separate construct prior to using use_device_{addr,ptr}.
1030 In OpenMP 5.0, map directives need to be ordered by the
1031 middle-end before the use_device_* clauses. If
1032 !not_found_cnt, all mappings requested (if any) are already
1033 mapped, so use_device_{addr,ptr} can be resolved right away.
1034 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1035 now but would succeed after performing the mappings in the
1036 following loop. We can't defer this always to the second
1037 loop, because it is not even invoked when !not_found_cnt
1038 after the first loop. */
1039 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1040 cur_node
.host_end
= cur_node
.host_start
;
1041 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1044 cur_node
.host_start
-= n
->host_start
;
1046 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1047 + cur_node
.host_start
);
1049 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1051 gomp_mutex_unlock (&devicep
->lock
);
1052 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1054 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1055 /* If not present, continue using the host address. */
1058 __builtin_unreachable ();
1059 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1062 tgt
->list
[i
].offset
= 0;
1065 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1067 size_t first
= i
+ 1;
1068 size_t last
= i
+ sizes
[i
];
1069 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1070 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1072 tgt
->list
[i
].key
= NULL
;
1073 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1074 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1077 size_t align
= (size_t) 1 << (kind
>> rshift
);
1078 if (tgt_align
< align
)
1080 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1081 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1082 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1083 not_found_cnt
+= last
- i
;
1084 for (i
= first
; i
<= last
; i
++)
1086 tgt
->list
[i
].key
= NULL
;
1088 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1090 gomp_coalesce_buf_add (&cbuf
,
1091 tgt_size
- cur_node
.host_end
1092 + (uintptr_t) hostaddrs
[i
],
1098 for (i
= first
; i
<= last
; i
++)
1099 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1100 sizes
, kinds
, NULL
, refcount_set
);
1104 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1106 tgt
->list
[i
].key
= NULL
;
1107 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1108 has_firstprivate
= true;
1111 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1112 || ((kind
& typemask
)
1113 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1115 tgt
->list
[i
].key
= NULL
;
1116 has_firstprivate
= true;
1119 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1120 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1121 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1123 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1124 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1126 tgt
->list
[i
].key
= NULL
;
1128 size_t align
= (size_t) 1 << (kind
>> rshift
);
1129 if (tgt_align
< align
)
1131 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1133 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1134 cur_node
.host_end
- cur_node
.host_start
);
1135 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1136 has_firstprivate
= true;
1140 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1142 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1145 tgt
->list
[i
].key
= NULL
;
1146 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1151 n
= splay_tree_lookup (mem_map
, &cur_node
);
1152 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1154 int always_to_cnt
= 0;
1155 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1157 bool has_nullptr
= false;
1159 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1160 if (n
->tgt
->list
[j
].key
== n
)
1162 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1165 if (n
->tgt
->list_count
== 0)
1167 /* 'declare target'; assume has_nullptr; it could also be
1168 statically assigned pointer, but that it should be to
1169 the equivalent variable on the host. */
1170 assert (n
->refcount
== REFCOUNT_INFINITY
);
1174 assert (j
< n
->tgt
->list_count
);
1175 /* Re-map the data if there is an 'always' modifier or if it a
1176 null pointer was there and non a nonnull has been found; that
1177 permits transparent re-mapping for Fortran array descriptors
1178 which were previously mapped unallocated. */
1179 for (j
= i
+ 1; j
< mapnum
; j
++)
1181 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1182 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1184 || !GOMP_MAP_POINTER_P (ptr_kind
)
1185 || *(void **) hostaddrs
[j
] == NULL
))
1187 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1188 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1189 > cur_node
.host_end
))
1193 has_always_ptrset
= true;
1198 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1199 kind
& typemask
, always_to_cnt
> 0, implicit
,
1200 NULL
, refcount_set
);
1205 tgt
->list
[i
].key
= NULL
;
1207 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1209 /* Not present, hence, skip entry - including its MAP_POINTER,
1211 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1213 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1214 == GOMP_MAP_POINTER
))
1217 tgt
->list
[i
].key
= NULL
;
1218 tgt
->list
[i
].offset
= 0;
1222 size_t align
= (size_t) 1 << (kind
>> rshift
);
1224 if (tgt_align
< align
)
1226 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1228 && gomp_to_device_kind_p (kind
& typemask
))
1229 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1230 cur_node
.host_end
- cur_node
.host_start
);
1231 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1232 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1236 for (j
= i
+ 1; j
< mapnum
; j
++)
1237 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1238 kinds
, j
)) & typemask
))
1239 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1241 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1242 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1243 > cur_node
.host_end
))
1247 tgt
->list
[j
].key
= NULL
;
1258 gomp_mutex_unlock (&devicep
->lock
);
1259 gomp_fatal ("unexpected aggregation");
1261 tgt
->to_free
= devaddrs
[0];
1262 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1263 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1265 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1267 /* Allocate tgt_align aligned tgt_size block of memory. */
1268 /* FIXME: Perhaps change interface to allocate properly aligned
1270 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1271 tgt_size
+ tgt_align
- 1);
1274 gomp_mutex_unlock (&devicep
->lock
);
1275 gomp_fatal ("device memory allocation fail");
1278 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1279 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1280 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1282 if (cbuf
.use_cnt
== 1)
1284 if (cbuf
.chunk_cnt
> 0)
1287 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1297 tgt
->to_free
= NULL
;
1303 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1304 tgt_size
= mapnum
* sizeof (void *);
1307 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1310 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1311 splay_tree_node array
= tgt
->array
;
1312 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1313 uintptr_t field_tgt_base
= 0;
1314 splay_tree_key field_tgt_structelem_first
= NULL
;
1316 for (i
= 0; i
< mapnum
; i
++)
1317 if (has_always_ptrset
1319 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1320 == GOMP_MAP_TO_PSET
)
1322 splay_tree_key k
= tgt
->list
[i
].key
;
1323 bool has_nullptr
= false;
1325 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1326 if (k
->tgt
->list
[j
].key
== k
)
1328 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1331 if (k
->tgt
->list_count
== 0)
1334 assert (j
< k
->tgt
->list_count
);
1336 tgt
->list
[i
].has_null_ptr_assoc
= false;
1337 for (j
= i
+ 1; j
< mapnum
; j
++)
1339 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1340 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1342 || !GOMP_MAP_POINTER_P (ptr_kind
)
1343 || *(void **) hostaddrs
[j
] == NULL
))
1345 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1346 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1351 if (*(void **) hostaddrs
[j
] == NULL
)
1352 tgt
->list
[i
].has_null_ptr_assoc
= true;
1353 tgt
->list
[j
].key
= k
;
1354 tgt
->list
[j
].copy_from
= false;
1355 tgt
->list
[j
].always_copy_from
= false;
1356 tgt
->list
[j
].is_attach
= false;
1357 gomp_increment_refcount (k
, refcount_set
);
1358 gomp_map_pointer (k
->tgt
, aq
,
1359 (uintptr_t) *(void **) hostaddrs
[j
],
1360 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1362 sizes
[j
], cbufp
, false);
1367 else if (tgt
->list
[i
].key
== NULL
)
1369 int kind
= get_kind (short_mapkind
, kinds
, i
);
1370 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1371 if (hostaddrs
[i
] == NULL
)
1373 switch (kind
& typemask
)
1375 size_t align
, len
, first
, last
;
1377 case GOMP_MAP_FIRSTPRIVATE
:
1378 align
= (size_t) 1 << (kind
>> rshift
);
1379 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1380 tgt
->list
[i
].offset
= tgt_size
;
1382 gomp_copy_host2dev (devicep
, aq
,
1383 (void *) (tgt
->tgt_start
+ tgt_size
),
1384 (void *) hostaddrs
[i
], len
, false, cbufp
);
1385 /* Save device address in hostaddr to permit latter availablity
1386 when doing a deep-firstprivate with pointer attach. */
1387 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1390 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1391 firstprivate to hostaddrs[i+1], which is assumed to contain a
1395 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1397 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1398 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1400 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1401 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1402 this probably needs revision for 'aq' usage. */
1404 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1405 sizeof (void *), false, cbufp
);
1409 case GOMP_MAP_FIRSTPRIVATE_INT
:
1410 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1412 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1413 /* The OpenACC 'host_data' construct only allows 'use_device'
1414 "mapping" clauses, so in the first loop, 'not_found_cnt'
1415 must always have been zero, so all OpenACC 'use_device'
1416 clauses have already been handled. (We can only easily test
1417 'use_device' with 'if_present' clause here.) */
1418 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1419 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1420 code conceptually simple, similar to the first loop. */
1421 case GOMP_MAP_USE_DEVICE_PTR
:
1422 if (tgt
->list
[i
].offset
== 0)
1424 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1425 cur_node
.host_end
= cur_node
.host_start
;
1426 n
= gomp_map_lookup (mem_map
, &cur_node
);
1429 cur_node
.host_start
-= n
->host_start
;
1431 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1432 + cur_node
.host_start
);
1434 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1436 gomp_mutex_unlock (&devicep
->lock
);
1437 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1439 else if ((kind
& typemask
)
1440 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1441 /* If not present, continue using the host address. */
1444 __builtin_unreachable ();
1445 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1448 case GOMP_MAP_STRUCT
:
1450 last
= i
+ sizes
[i
];
1451 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1452 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1454 if (tgt
->list
[first
].key
!= NULL
)
1456 n
= splay_tree_lookup (mem_map
, &cur_node
);
1459 size_t align
= (size_t) 1 << (kind
>> rshift
);
1460 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1461 - (uintptr_t) hostaddrs
[i
];
1462 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1463 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1464 - (uintptr_t) hostaddrs
[i
];
1465 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1466 field_tgt_offset
= tgt_size
;
1467 field_tgt_clear
= last
;
1468 field_tgt_structelem_first
= NULL
;
1469 tgt_size
+= cur_node
.host_end
1470 - (uintptr_t) hostaddrs
[first
];
1473 for (i
= first
; i
<= last
; i
++)
1474 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1475 sizes
, kinds
, cbufp
, refcount_set
);
1478 case GOMP_MAP_ALWAYS_POINTER
:
1479 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1480 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1481 n
= splay_tree_lookup (mem_map
, &cur_node
);
1483 || n
->host_start
> cur_node
.host_start
1484 || n
->host_end
< cur_node
.host_end
)
1486 gomp_mutex_unlock (&devicep
->lock
);
1487 gomp_fatal ("always pointer not mapped");
1490 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1491 != GOMP_MAP_ALWAYS_POINTER
))
1492 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1493 if (cur_node
.tgt_offset
)
1494 cur_node
.tgt_offset
-= sizes
[i
];
1495 gomp_copy_host2dev (devicep
, aq
,
1496 (void *) (n
->tgt
->tgt_start
1498 + cur_node
.host_start
1500 (void *) &cur_node
.tgt_offset
,
1501 sizeof (void *), true, cbufp
);
1502 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1503 + cur_node
.host_start
- n
->host_start
;
1505 case GOMP_MAP_IF_PRESENT
:
1506 /* Not present - otherwise handled above. Skip over its
1507 MAP_POINTER as well. */
1509 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1510 == GOMP_MAP_POINTER
))
1513 case GOMP_MAP_ATTACH
:
1514 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1516 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1517 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1518 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1521 tgt
->list
[i
].key
= n
;
1522 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1523 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1524 tgt
->list
[i
].copy_from
= false;
1525 tgt
->list
[i
].always_copy_from
= false;
1526 tgt
->list
[i
].is_attach
= true;
1527 /* OpenACC 'attach'/'detach' doesn't affect
1528 structured/dynamic reference counts ('n->refcount',
1529 'n->dynamic_refcount'). */
1532 = ((kind
& typemask
)
1533 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1534 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1535 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1538 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1540 gomp_mutex_unlock (&devicep
->lock
);
1541 gomp_fatal ("outer struct not mapped for attach");
1548 splay_tree_key k
= &array
->key
;
1549 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1550 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1551 k
->host_end
= k
->host_start
+ sizes
[i
];
1553 k
->host_end
= k
->host_start
+ sizeof (void *);
1554 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1555 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1556 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1557 kind
& typemask
, false, implicit
, cbufp
,
1562 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1564 /* Replace target address of the pointer with target address
1565 of mapped object in the splay tree. */
1566 splay_tree_remove (mem_map
, n
);
1568 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1569 k
->aux
->link_key
= n
;
1571 size_t align
= (size_t) 1 << (kind
>> rshift
);
1572 tgt
->list
[i
].key
= k
;
1575 k
->dynamic_refcount
= 0;
1576 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1578 k
->tgt_offset
= k
->host_start
- field_tgt_base
1582 k
->refcount
= REFCOUNT_STRUCTELEM
;
1583 if (field_tgt_structelem_first
== NULL
)
1585 /* Set to first structure element of sequence. */
1586 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1587 field_tgt_structelem_first
= k
;
1590 /* Point to refcount of leading element, but do not
1592 k
->structelem_refcount_ptr
1593 = &field_tgt_structelem_first
->structelem_refcount
;
1595 if (i
== field_tgt_clear
)
1597 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1598 field_tgt_structelem_first
= NULL
;
1601 if (i
== field_tgt_clear
)
1602 field_tgt_clear
= FIELD_TGT_EMPTY
;
1606 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1607 k
->tgt_offset
= tgt_size
;
1608 tgt_size
+= k
->host_end
- k
->host_start
;
1610 /* First increment, from 0 to 1. gomp_increment_refcount
1611 encapsulates the different increment cases, so use this
1612 instead of directly setting 1 during initialization. */
1613 gomp_increment_refcount (k
, refcount_set
);
1615 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1616 tgt
->list
[i
].always_copy_from
1617 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1618 tgt
->list
[i
].is_attach
= false;
1619 tgt
->list
[i
].offset
= 0;
1620 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1623 array
->right
= NULL
;
1624 splay_tree_insert (mem_map
, array
);
1625 switch (kind
& typemask
)
1627 case GOMP_MAP_ALLOC
:
1629 case GOMP_MAP_FORCE_ALLOC
:
1630 case GOMP_MAP_FORCE_FROM
:
1631 case GOMP_MAP_ALWAYS_FROM
:
1634 case GOMP_MAP_TOFROM
:
1635 case GOMP_MAP_FORCE_TO
:
1636 case GOMP_MAP_FORCE_TOFROM
:
1637 case GOMP_MAP_ALWAYS_TO
:
1638 case GOMP_MAP_ALWAYS_TOFROM
:
1639 gomp_copy_host2dev (devicep
, aq
,
1640 (void *) (tgt
->tgt_start
1642 (void *) k
->host_start
,
1643 k
->host_end
- k
->host_start
,
1646 case GOMP_MAP_POINTER
:
1647 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1649 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1650 k
->tgt_offset
, sizes
[i
], cbufp
,
1652 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1654 case GOMP_MAP_TO_PSET
:
1655 gomp_copy_host2dev (devicep
, aq
,
1656 (void *) (tgt
->tgt_start
1658 (void *) k
->host_start
,
1659 k
->host_end
- k
->host_start
,
1661 tgt
->list
[i
].has_null_ptr_assoc
= false;
1663 for (j
= i
+ 1; j
< mapnum
; j
++)
1665 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1667 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1668 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1670 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1671 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1676 tgt
->list
[j
].key
= k
;
1677 tgt
->list
[j
].copy_from
= false;
1678 tgt
->list
[j
].always_copy_from
= false;
1679 tgt
->list
[j
].is_attach
= false;
1680 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1681 /* For OpenMP, the use of refcount_sets causes
1682 errors if we set k->refcount = 1 above but also
1683 increment it again here, for decrementing will
1684 not properly match, since we decrement only once
1685 for each key's refcount. Therefore avoid this
1686 increment for OpenMP constructs. */
1688 gomp_increment_refcount (k
, refcount_set
);
1689 gomp_map_pointer (tgt
, aq
,
1690 (uintptr_t) *(void **) hostaddrs
[j
],
1692 + ((uintptr_t) hostaddrs
[j
]
1694 sizes
[j
], cbufp
, false);
1699 case GOMP_MAP_FORCE_PRESENT
:
1701 /* We already looked up the memory region above and it
1703 size_t size
= k
->host_end
- k
->host_start
;
1704 gomp_mutex_unlock (&devicep
->lock
);
1705 #ifdef HAVE_INTTYPES_H
1706 gomp_fatal ("present clause: !acc_is_present (%p, "
1707 "%"PRIu64
" (0x%"PRIx64
"))",
1708 (void *) k
->host_start
,
1709 (uint64_t) size
, (uint64_t) size
);
1711 gomp_fatal ("present clause: !acc_is_present (%p, "
1712 "%lu (0x%lx))", (void *) k
->host_start
,
1713 (unsigned long) size
, (unsigned long) size
);
1717 case GOMP_MAP_FORCE_DEVICEPTR
:
1718 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1719 gomp_copy_host2dev (devicep
, aq
,
1720 (void *) (tgt
->tgt_start
1722 (void *) k
->host_start
,
1723 sizeof (void *), false, cbufp
);
1726 gomp_mutex_unlock (&devicep
->lock
);
1727 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1731 if (k
->aux
&& k
->aux
->link_key
)
1733 /* Set link pointer on target to the device address of the
1735 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1736 /* We intentionally do not use coalescing here, as it's not
1737 data allocated by the current call to this function. */
1738 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1739 &tgt_addr
, sizeof (void *), true, NULL
);
1746 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1748 for (i
= 0; i
< mapnum
; i
++)
1750 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1751 gomp_copy_host2dev (devicep
, aq
,
1752 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1753 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1761 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1762 gomp_copy_host2dev (devicep
, aq
,
1763 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1764 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1765 - cbuf
.chunks
[0].start
),
1766 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1769 /* Free once the transfer has completed. */
1770 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1777 /* If the variable from "omp target enter data" map-list was already mapped,
1778 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1780 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1786 gomp_mutex_unlock (&devicep
->lock
);
1790 static struct target_mem_desc
*
1791 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1792 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1793 bool short_mapkind
, htab_t
*refcount_set
,
1794 enum gomp_map_vars_kind pragma_kind
)
1796 /* This management of a local refcount_set is for convenience of callers
1797 who do not share a refcount_set over multiple map/unmap uses. */
1798 htab_t local_refcount_set
= NULL
;
1799 if (refcount_set
== NULL
)
1801 local_refcount_set
= htab_create (mapnum
);
1802 refcount_set
= &local_refcount_set
;
1805 struct target_mem_desc
*tgt
;
1806 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1807 sizes
, kinds
, short_mapkind
, refcount_set
,
1809 if (local_refcount_set
)
1810 htab_free (local_refcount_set
);
1815 attribute_hidden
struct target_mem_desc
*
1816 goacc_map_vars (struct gomp_device_descr
*devicep
,
1817 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1818 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1819 void *kinds
, bool short_mapkind
,
1820 enum gomp_map_vars_kind pragma_kind
)
1822 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1823 sizes
, kinds
, short_mapkind
, NULL
,
1824 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1828 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1830 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1832 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1839 gomp_unref_tgt (void *ptr
)
1841 bool is_tgt_unmapped
= false;
1843 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1845 if (tgt
->refcount
> 1)
1849 gomp_unmap_tgt (tgt
);
1850 is_tgt_unmapped
= true;
1853 return is_tgt_unmapped
;
1857 gomp_unref_tgt_void (void *ptr
)
1859 (void) gomp_unref_tgt (ptr
);
1863 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1865 splay_tree_remove (sp
, k
);
1868 if (k
->aux
->link_key
)
1869 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1870 if (k
->aux
->attach_count
)
1871 free (k
->aux
->attach_count
);
1877 static inline __attribute__((always_inline
)) bool
1878 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1879 struct goacc_asyncqueue
*aq
)
1881 bool is_tgt_unmapped
= false;
1883 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1885 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1886 /* Infer the splay_tree_key of the first structelem key using the
1887 pointer to the first structleme_refcount. */
1888 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1889 - offsetof (struct splay_tree_key_s
,
1890 structelem_refcount
));
1891 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1893 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1894 with the splay_tree_keys embedded inside. */
1895 splay_tree_node node
=
1896 (splay_tree_node
) ((char *) k
1897 - offsetof (struct splay_tree_node_s
, key
));
1900 /* Starting from the _FIRST key, and continue for all following
1902 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1903 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1910 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1913 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1916 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1917 return is_tgt_unmapped
;
1920 attribute_hidden
bool
1921 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1923 return gomp_remove_var_internal (devicep
, k
, NULL
);
1926 /* Remove a variable asynchronously. This actually removes the variable
1927 mapping immediately, but retains the linked target_mem_desc until the
1928 asynchronous operation has completed (as it may still refer to target
1929 memory). The device lock must be held before entry, and remains locked on
1932 attribute_hidden
void
1933 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1934 struct goacc_asyncqueue
*aq
)
1936 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1939 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1940 variables back from device to host: if it is false, it is assumed that this
1941 has been done already. */
1943 static inline __attribute__((always_inline
)) void
1944 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1945 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1947 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1949 if (tgt
->list_count
== 0)
1955 gomp_mutex_lock (&devicep
->lock
);
1956 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1958 gomp_mutex_unlock (&devicep
->lock
);
1966 /* We must perform detachments before any copies back to the host. */
1967 for (i
= 0; i
< tgt
->list_count
; i
++)
1969 splay_tree_key k
= tgt
->list
[i
].key
;
1971 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1972 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1973 + tgt
->list
[i
].offset
,
1977 for (i
= 0; i
< tgt
->list_count
; i
++)
1979 splay_tree_key k
= tgt
->list
[i
].key
;
1983 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1984 counts ('n->refcount', 'n->dynamic_refcount'). */
1985 if (tgt
->list
[i
].is_attach
)
1988 bool do_copy
, do_remove
;
1989 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1991 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1992 || tgt
->list
[i
].always_copy_from
)
1993 gomp_copy_dev2host (devicep
, aq
,
1994 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1995 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1996 + tgt
->list
[i
].offset
),
1997 tgt
->list
[i
].length
);
2000 struct target_mem_desc
*k_tgt
= k
->tgt
;
2001 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2002 /* It would be bad if TGT got unmapped while we're still iterating
2003 over its LIST_COUNT, and also expect to use it in the following
2005 assert (!is_tgt_unmapped
2011 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2014 gomp_unref_tgt ((void *) tgt
);
2016 gomp_mutex_unlock (&devicep
->lock
);
2020 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2021 htab_t
*refcount_set
)
2023 /* This management of a local refcount_set is for convenience of callers
2024 who do not share a refcount_set over multiple map/unmap uses. */
2025 htab_t local_refcount_set
= NULL
;
2026 if (refcount_set
== NULL
)
2028 local_refcount_set
= htab_create (tgt
->list_count
);
2029 refcount_set
= &local_refcount_set
;
2032 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2034 if (local_refcount_set
)
2035 htab_free (local_refcount_set
);
2038 attribute_hidden
void
2039 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2040 struct goacc_asyncqueue
*aq
)
2042 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2046 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2047 size_t *sizes
, void *kinds
, bool short_mapkind
)
2050 struct splay_tree_key_s cur_node
;
2051 const int typemask
= short_mapkind
? 0xff : 0x7;
2059 gomp_mutex_lock (&devicep
->lock
);
2060 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2062 gomp_mutex_unlock (&devicep
->lock
);
2066 for (i
= 0; i
< mapnum
; i
++)
2069 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2070 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2071 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2074 int kind
= get_kind (short_mapkind
, kinds
, i
);
2075 if (n
->host_start
> cur_node
.host_start
2076 || n
->host_end
< cur_node
.host_end
)
2078 gomp_mutex_unlock (&devicep
->lock
);
2079 gomp_fatal ("Trying to update [%p..%p) object when "
2080 "only [%p..%p) is mapped",
2081 (void *) cur_node
.host_start
,
2082 (void *) cur_node
.host_end
,
2083 (void *) n
->host_start
,
2084 (void *) n
->host_end
);
2087 if (n
->aux
&& n
->aux
->attach_count
)
2089 uintptr_t addr
= cur_node
.host_start
;
2090 while (addr
< cur_node
.host_end
)
2092 /* We have to be careful not to overwrite still attached
2093 pointers during host<->device updates. */
2094 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2095 if (n
->aux
->attach_count
[i
] == 0)
2097 void *devaddr
= (void *) (n
->tgt
->tgt_start
2099 + addr
- n
->host_start
);
2100 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2101 gomp_copy_host2dev (devicep
, NULL
,
2102 devaddr
, (void *) addr
,
2103 sizeof (void *), false, NULL
);
2104 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2105 gomp_copy_dev2host (devicep
, NULL
,
2106 (void *) addr
, devaddr
,
2109 addr
+= sizeof (void *);
2114 void *hostaddr
= (void *) cur_node
.host_start
;
2115 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2116 + cur_node
.host_start
2118 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2120 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2121 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2123 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2124 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2128 gomp_mutex_unlock (&devicep
->lock
);
2131 static struct gomp_offload_icv_list
*
2132 gomp_get_offload_icv_item (int dev_num
)
2134 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2135 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2141 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2142 depending on the device num and the variable hierarchy
2143 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2144 device and thus no item with that device number is contained in
2145 gomp_offload_icv_list, then a new item is created and added to the list. */
2147 static struct gomp_offload_icvs
*
2148 get_gomp_offload_icvs (int dev_num
)
2150 struct gomp_icv_list
*dev
2151 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2152 struct gomp_icv_list
*all
2153 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2154 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2155 struct gomp_offload_icv_list
*offload_icvs
2156 = gomp_get_offload_icv_item (dev_num
);
2158 if (offload_icvs
!= NULL
)
2159 return &offload_icvs
->icvs
;
2161 struct gomp_offload_icv_list
*new
2162 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2164 new->device_num
= dev_num
;
2165 new->icvs
.device_num
= dev_num
;
2166 new->next
= gomp_offload_icv_list
;
2168 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2169 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2170 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2171 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2172 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2173 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2175 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2178 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2179 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2180 else if (dev
!= NULL
2181 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2182 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2183 else if (all
!= NULL
2184 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2185 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2187 new->icvs
.teams_thread_limit
2188 = gomp_default_icv_values
.teams_thread_limit_var
;
2191 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2192 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2193 else if (dev
!= NULL
2194 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2195 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2196 else if (all
!= NULL
2197 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2198 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2200 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2202 gomp_offload_icv_list
= new;
2206 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2207 And insert to splay tree the mapping between addresses from HOST_TABLE and
2208 from loaded target image. We rely in the host and device compiler
2209 emitting variable and functions in the same order. */
2212 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2213 const void *host_table
, const void *target_data
,
2214 bool is_register_lock
)
2216 void **host_func_table
= ((void ***) host_table
)[0];
2217 void **host_funcs_end
= ((void ***) host_table
)[1];
2218 void **host_var_table
= ((void ***) host_table
)[2];
2219 void **host_vars_end
= ((void ***) host_table
)[3];
2221 /* The func table contains only addresses, the var table contains addresses
2222 and corresponding sizes. */
2223 int num_funcs
= host_funcs_end
- host_func_table
;
2224 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2226 /* Load image to device and get target addresses for the image. */
2227 struct addr_pair
*target_table
= NULL
;
2228 uint64_t *rev_target_fn_table
= NULL
;
2229 int i
, num_target_entries
;
2231 /* With reverse offload, insert also target-host addresses. */
2232 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2235 = devicep
->load_image_func (devicep
->target_id
, version
,
2236 target_data
, &target_table
,
2237 rev_lookup
? &rev_target_fn_table
: NULL
);
2239 if (num_target_entries
!= num_funcs
+ num_vars
2240 /* "+1" due to the additional ICV struct. */
2241 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2243 gomp_mutex_unlock (&devicep
->lock
);
2244 if (is_register_lock
)
2245 gomp_mutex_unlock (®ister_lock
);
2246 gomp_fatal ("Cannot map target functions or variables"
2247 " (expected %u, have %u)", num_funcs
+ num_vars
,
2248 num_target_entries
);
2251 /* Insert host-target address mapping into splay tree. */
2252 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2253 /* "+1" due to the additional ICV struct. */
2254 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2255 * sizeof (*tgt
->array
));
2256 if (rev_target_fn_table
)
2257 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2259 tgt
->rev_array
= NULL
;
2260 tgt
->refcount
= REFCOUNT_INFINITY
;
2263 tgt
->to_free
= NULL
;
2265 tgt
->list_count
= 0;
2266 tgt
->device_descr
= devicep
;
2267 splay_tree_node array
= tgt
->array
;
2268 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2270 for (i
= 0; i
< num_funcs
; i
++)
2272 splay_tree_key k
= &array
->key
;
2273 k
->host_start
= (uintptr_t) host_func_table
[i
];
2274 k
->host_end
= k
->host_start
+ 1;
2276 k
->tgt_offset
= target_table
[i
].start
;
2277 k
->refcount
= REFCOUNT_INFINITY
;
2278 k
->dynamic_refcount
= 0;
2281 array
->right
= NULL
;
2282 splay_tree_insert (&devicep
->mem_map
, array
);
2283 if (rev_target_fn_table
)
2285 reverse_splay_tree_key k2
= &rev_array
->key
;
2286 k2
->dev
= rev_target_fn_table
[i
];
2288 rev_array
->left
= NULL
;
2289 rev_array
->right
= NULL
;
2291 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2297 /* Most significant bit of the size in host and target tables marks
2298 "omp declare target link" variables. */
2299 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2300 const uintptr_t size_mask
= ~link_bit
;
2302 for (i
= 0; i
< num_vars
; i
++)
2304 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2305 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2306 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2308 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2310 gomp_mutex_unlock (&devicep
->lock
);
2311 if (is_register_lock
)
2312 gomp_mutex_unlock (®ister_lock
);
2313 gomp_fatal ("Cannot map target variables (size mismatch)");
2316 splay_tree_key k
= &array
->key
;
2317 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2319 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2321 k
->tgt_offset
= target_var
->start
;
2322 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2323 k
->dynamic_refcount
= 0;
2326 array
->right
= NULL
;
2327 splay_tree_insert (&devicep
->mem_map
, array
);
2331 /* Last entry is for a ICVs variable.
2332 Tolerate case where plugin does not return those entries. */
2333 if (num_funcs
+ num_vars
< num_target_entries
)
2335 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2337 /* Start address will be non-zero for the ICVs variable if
2338 the variable was found in this image. */
2339 if (var
->start
!= 0)
2341 /* The index of the devicep within devices[] is regarded as its
2342 'device number', which is different from the per-device type
2343 devicep->target_id. */
2344 int dev_num
= (int) (devicep
- &devices
[0]);
2345 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2346 size_t var_size
= var
->end
- var
->start
;
2347 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2349 gomp_mutex_unlock (&devicep
->lock
);
2350 if (is_register_lock
)
2351 gomp_mutex_unlock (®ister_lock
);
2352 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2355 /* Copy the ICVs variable to place on device memory, hereby
2356 actually designating its device number into effect. */
2357 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2358 var_size
, false, NULL
);
2359 splay_tree_key k
= &array
->key
;
2360 k
->host_start
= (uintptr_t) icvs
;
2362 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2364 k
->tgt_offset
= var
->start
;
2365 k
->refcount
= REFCOUNT_INFINITY
;
2366 k
->dynamic_refcount
= 0;
2369 array
->right
= NULL
;
2370 splay_tree_insert (&devicep
->mem_map
, array
);
2375 free (target_table
);
2378 /* Unload the mappings described by target_data from device DEVICE_P.
2379 The device must be locked. */
2382 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2384 const void *host_table
, const void *target_data
)
2386 void **host_func_table
= ((void ***) host_table
)[0];
2387 void **host_funcs_end
= ((void ***) host_table
)[1];
2388 void **host_var_table
= ((void ***) host_table
)[2];
2389 void **host_vars_end
= ((void ***) host_table
)[3];
2391 /* The func table contains only addresses, the var table contains addresses
2392 and corresponding sizes. */
2393 int num_funcs
= host_funcs_end
- host_func_table
;
2394 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2396 struct splay_tree_key_s k
;
2397 splay_tree_key node
= NULL
;
2399 /* Find mapping at start of node array */
2400 if (num_funcs
|| num_vars
)
2402 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2403 : (uintptr_t) host_var_table
[0]);
2404 k
.host_end
= k
.host_start
+ 1;
2405 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2408 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2410 gomp_mutex_unlock (&devicep
->lock
);
2411 gomp_fatal ("image unload fail");
2413 if (devicep
->mem_map_rev
.root
)
2415 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2417 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2418 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2419 free (node
->tgt
->rev_array
);
2420 devicep
->mem_map_rev
.root
= NULL
;
2423 /* Remove mappings from splay tree. */
2425 for (i
= 0; i
< num_funcs
; i
++)
2427 k
.host_start
= (uintptr_t) host_func_table
[i
];
2428 k
.host_end
= k
.host_start
+ 1;
2429 splay_tree_remove (&devicep
->mem_map
, &k
);
2432 /* Most significant bit of the size in host and target tables marks
2433 "omp declare target link" variables. */
2434 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2435 const uintptr_t size_mask
= ~link_bit
;
2436 bool is_tgt_unmapped
= false;
2438 for (i
= 0; i
< num_vars
; i
++)
2440 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2442 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2444 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2445 splay_tree_remove (&devicep
->mem_map
, &k
);
2448 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2449 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2453 if (node
&& !is_tgt_unmapped
)
2461 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2463 char *end
= buf
+ size
, *p
= buf
;
2464 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2465 p
+= snprintf (p
, end
- p
, "unified_address");
2466 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2467 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2468 (p
== buf
? "" : ", "));
2469 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2470 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2471 (p
== buf
? "" : ", "));
2474 /* This function should be called from every offload image while loading.
2475 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2476 the target, and DATA. */
2479 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2480 int target_type
, const void *data
)
2484 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2485 gomp_fatal ("Library too old for offload (version %u < %u)",
2486 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2489 const void *target_data
;
2490 if (GOMP_VERSION_LIB (version
) > 1)
2492 omp_req
= (int) (size_t) ((void **) data
)[0];
2493 target_data
= &((void **) data
)[1];
2501 gomp_mutex_lock (®ister_lock
);
2503 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2505 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2506 "reverse_offload")];
2507 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2508 "reverse_offload")];
2509 gomp_requires_to_name (buf2
, sizeof (buf2
),
2510 omp_req
!= GOMP_REQUIRES_TARGET_USED
2511 ? omp_req
: omp_requires_mask
);
2512 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2513 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2515 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2516 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2517 "in multiple compilation units: '%s' vs. '%s'",
2521 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2522 "some compilation units", buf2
);
2524 omp_requires_mask
= omp_req
;
2526 /* Load image to all initialized devices. */
2527 for (i
= 0; i
< num_devices
; i
++)
2529 struct gomp_device_descr
*devicep
= &devices
[i
];
2530 gomp_mutex_lock (&devicep
->lock
);
2531 if (devicep
->type
== target_type
2532 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2533 gomp_load_image_to_device (devicep
, version
,
2534 host_table
, target_data
, true);
2535 gomp_mutex_unlock (&devicep
->lock
);
2538 /* Insert image to array of pending images. */
2540 = gomp_realloc_unlock (offload_images
,
2541 (num_offload_images
+ 1)
2542 * sizeof (struct offload_image_descr
));
2543 offload_images
[num_offload_images
].version
= version
;
2544 offload_images
[num_offload_images
].type
= target_type
;
2545 offload_images
[num_offload_images
].host_table
= host_table
;
2546 offload_images
[num_offload_images
].target_data
= target_data
;
2548 num_offload_images
++;
2549 gomp_mutex_unlock (®ister_lock
);
2552 /* Legacy entry point. */
2555 GOMP_offload_register (const void *host_table
, int target_type
,
2556 const void *target_data
)
2558 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2561 /* This function should be called from every offload image while unloading.
2562 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2563 the target, and DATA. */
2566 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2567 int target_type
, const void *data
)
2571 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2572 gomp_fatal ("Library too old for offload (version %u < %u)",
2573 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2575 const void *target_data
;
2576 if (GOMP_VERSION_LIB (version
) > 1)
2577 target_data
= &((void **) data
)[1];
2581 gomp_mutex_lock (®ister_lock
);
2583 /* Unload image from all initialized devices. */
2584 for (i
= 0; i
< num_devices
; i
++)
2586 struct gomp_device_descr
*devicep
= &devices
[i
];
2587 gomp_mutex_lock (&devicep
->lock
);
2588 if (devicep
->type
== target_type
2589 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2590 gomp_unload_image_from_device (devicep
, version
,
2591 host_table
, target_data
);
2592 gomp_mutex_unlock (&devicep
->lock
);
2595 /* Remove image from array of pending images. */
2596 for (i
= 0; i
< num_offload_images
; i
++)
2597 if (offload_images
[i
].target_data
== target_data
)
2599 offload_images
[i
] = offload_images
[--num_offload_images
];
2603 gomp_mutex_unlock (®ister_lock
);
2606 /* Legacy entry point. */
2609 GOMP_offload_unregister (const void *host_table
, int target_type
,
2610 const void *target_data
)
2612 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2615 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2616 must be locked on entry, and remains locked on return. */
2618 attribute_hidden
void
2619 gomp_init_device (struct gomp_device_descr
*devicep
)
2622 if (!devicep
->init_device_func (devicep
->target_id
))
2624 gomp_mutex_unlock (&devicep
->lock
);
2625 gomp_fatal ("device initialization failed");
2628 /* Load to device all images registered by the moment. */
2629 for (i
= 0; i
< num_offload_images
; i
++)
2631 struct offload_image_descr
*image
= &offload_images
[i
];
2632 if (image
->type
== devicep
->type
)
2633 gomp_load_image_to_device (devicep
, image
->version
,
2634 image
->host_table
, image
->target_data
,
2638 /* Initialize OpenACC asynchronous queues. */
2639 goacc_init_asyncqueues (devicep
);
2641 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2644 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2645 must be locked on entry, and remains locked on return. */
2647 attribute_hidden
bool
2648 gomp_fini_device (struct gomp_device_descr
*devicep
)
2650 bool ret
= goacc_fini_asyncqueues (devicep
);
2651 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2652 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2656 attribute_hidden
void
2657 gomp_unload_device (struct gomp_device_descr
*devicep
)
2659 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2663 /* Unload from device all images registered at the moment. */
2664 for (i
= 0; i
< num_offload_images
; i
++)
2666 struct offload_image_descr
*image
= &offload_images
[i
];
2667 if (image
->type
== devicep
->type
)
2668 gomp_unload_image_from_device (devicep
, image
->version
,
2670 image
->target_data
);
2675 /* Host fallback for GOMP_target{,_ext} routines. */
2678 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2679 struct gomp_device_descr
*devicep
, void **args
)
2681 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2683 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2685 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2686 "be used for offloading");
2689 memset (thr
, '\0', sizeof (*thr
));
2690 if (gomp_places_list
)
2692 thr
->place
= old_thr
.place
;
2693 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2698 intptr_t id
= (intptr_t) *args
++, val
;
2699 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2700 val
= (intptr_t) *args
++;
2702 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2703 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2705 id
&= GOMP_TARGET_ARG_ID_MASK
;
2706 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2708 val
= val
> INT_MAX
? INT_MAX
: val
;
2710 gomp_icv (true)->thread_limit_var
= val
;
2715 gomp_free_thread (thr
);
2719 /* Calculate alignment and size requirements of a private copy of data shared
2720 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2723 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2724 unsigned short *kinds
, size_t *tgt_align
,
2728 for (i
= 0; i
< mapnum
; i
++)
2729 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2731 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2732 if (*tgt_align
< align
)
2734 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2735 *tgt_size
+= sizes
[i
];
2739 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2742 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2743 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2746 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2748 tgt
+= tgt_align
- al
;
2751 for (i
= 0; i
< mapnum
; i
++)
2752 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2754 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2755 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2756 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2757 hostaddrs
[i
] = tgt
+ tgt_size
;
2758 tgt_size
= tgt_size
+ sizes
[i
];
2759 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2761 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2767 /* Helper function of GOMP_target{,_ext} routines. */
2770 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2771 void (*host_fn
) (void *))
2773 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2774 return (void *) host_fn
;
2777 gomp_mutex_lock (&devicep
->lock
);
2778 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2780 gomp_mutex_unlock (&devicep
->lock
);
2784 struct splay_tree_key_s k
;
2785 k
.host_start
= (uintptr_t) host_fn
;
2786 k
.host_end
= k
.host_start
+ 1;
2787 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2788 gomp_mutex_unlock (&devicep
->lock
);
2792 return (void *) tgt_fn
->tgt_offset
;
2796 /* Called when encountering a target directive. If DEVICE
2797 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2798 GOMP_DEVICE_HOST_FALLBACK (or any value
2799 larger than last available hw device), use host fallback.
2800 FN is address of host code, UNUSED is part of the current ABI, but
2801 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2802 with MAPNUM entries, with addresses of the host objects,
2803 sizes of the host objects (resp. for pointer kind pointer bias
2804 and assumed sizeof (void *) size) and kinds. */
2807 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2808 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2809 unsigned char *kinds
)
2811 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2815 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2816 /* All shared memory devices should use the GOMP_target_ext function. */
2817 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2818 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2819 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2821 htab_t refcount_set
= htab_create (mapnum
);
2822 struct target_mem_desc
*tgt_vars
2823 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2824 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2825 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2827 htab_clear (refcount_set
);
2828 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2829 htab_free (refcount_set
);
2832 static inline unsigned int
2833 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2835 /* If we cannot run asynchronously, simply ignore nowait. */
2836 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2837 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2843 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2845 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2849 void *host_ptr
= &item
->icvs
;
2850 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2851 if (dev_ptr
!= NULL
)
2852 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2853 sizeof (struct gomp_offload_icvs
));
2856 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2857 and several arguments have been added:
2858 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2859 DEPEND is array of dependencies, see GOMP_task for details.
2861 ARGS is a pointer to an array consisting of a variable number of both
2862 device-independent and device-specific arguments, which can take one two
2863 elements where the first specifies for which device it is intended, the type
2864 and optionally also the value. If the value is not present in the first
2865 one, the whole second element the actual value. The last element of the
2866 array is a single NULL. Among the device independent can be for example
2867 NUM_TEAMS and THREAD_LIMIT.
2869 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2870 that value, or 1 if teams construct is not present, or 0, if
2871 teams construct does not have num_teams clause and so the choice is
2872 implementation defined, and -1 if it can't be determined on the host
2873 what value will GOMP_teams have on the device.
2874 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2875 body with that value, or 0, if teams construct does not have thread_limit
2876 clause or the teams construct is not present, or -1 if it can't be
2877 determined on the host what value will GOMP_teams have on the device. */
2880 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2881 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2882 unsigned int flags
, void **depend
, void **args
)
2884 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2885 size_t tgt_align
= 0, tgt_size
= 0;
2886 bool fpc_done
= false;
2888 /* Obtain the original TEAMS and THREADS values from ARGS. */
2889 intptr_t orig_teams
= 1, orig_threads
= 0;
2890 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2891 void **tmpargs
= args
;
2894 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2895 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2897 val
= (intptr_t) *tmpargs
++;
2902 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2906 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2908 val
= val
> INT_MAX
? INT_MAX
: val
;
2909 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2914 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2921 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2922 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2923 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2924 value could not be determined. No change.
2925 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2926 Set device-specific value.
2927 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2929 if (orig_teams
== -2)
2931 else if (orig_teams
== 0)
2933 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2935 new_teams
= item
->icvs
.nteams
;
2937 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2938 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2939 e.g. a THREAD_LIMIT clause. */
2940 if (orig_teams
> -2 && orig_threads
== 0)
2942 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2944 new_threads
= item
->icvs
.teams_thread_limit
;
2947 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2949 void **new_args
= args
;
2950 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
2952 size_t tms_len
= (orig_teams
== new_teams
2954 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
2956 size_t ths_len
= (orig_threads
== new_threads
2958 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
2960 /* One additional item after the last arg must be NULL. */
2961 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
2963 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
2966 void **tmp_new_args
= new_args
;
2967 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2968 too if they have not been changed and skipped otherwise. */
2971 intptr_t id
= (intptr_t) *tmpargs
;
2972 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
2973 && orig_teams
!= new_teams
)
2974 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
2975 && orig_threads
!= new_threads
))
2978 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2983 *tmp_new_args
++ = *tmpargs
++;
2984 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2985 *tmp_new_args
++ = *tmpargs
++;
2989 /* Add the new TEAMS arg to the new args list if it has been changed. */
2990 if (orig_teams
!= new_teams
)
2992 intptr_t new_val
= new_teams
;
2995 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
2996 | GOMP_TARGET_ARG_NUM_TEAMS
;
2997 *tmp_new_args
++ = (void *) new_val
;
3001 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3002 | GOMP_TARGET_ARG_NUM_TEAMS
);
3003 *tmp_new_args
++ = (void *) new_val
;
3007 /* Add the new THREADS arg to the new args list if it has been changed. */
3008 if (orig_threads
!= new_threads
)
3010 intptr_t new_val
= new_threads
;
3013 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3014 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3015 *tmp_new_args
++ = (void *) new_val
;
3019 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3020 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3021 *tmp_new_args
++ = (void *) new_val
;
3025 *tmp_new_args
= NULL
;
3028 flags
= clear_unsupported_flags (devicep
, flags
);
3030 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3032 struct gomp_thread
*thr
= gomp_thread ();
3033 /* Create a team if we don't have any around, as nowait
3034 target tasks make sense to run asynchronously even when
3035 outside of any parallel. */
3036 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3038 struct gomp_team
*team
= gomp_new_team (1);
3039 struct gomp_task
*task
= thr
->task
;
3040 struct gomp_task
**implicit_task
= &task
;
3041 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3042 team
->prev_ts
= thr
->ts
;
3043 thr
->ts
.team
= team
;
3044 thr
->ts
.team_id
= 0;
3045 thr
->ts
.work_share
= &team
->work_shares
[0];
3046 thr
->ts
.last_work_share
= NULL
;
3047 #ifdef HAVE_SYNC_BUILTINS
3048 thr
->ts
.single_count
= 0;
3050 thr
->ts
.static_trip
= 0;
3051 thr
->task
= &team
->implicit_task
[0];
3052 gomp_init_task (thr
->task
, NULL
, icv
);
3053 while (*implicit_task
3054 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3055 implicit_task
= &(*implicit_task
)->parent
;
3058 thr
->task
= *implicit_task
;
3060 free (*implicit_task
);
3061 thr
->task
= &team
->implicit_task
[0];
3064 pthread_setspecific (gomp_thread_destructor
, thr
);
3065 if (implicit_task
!= &task
)
3067 *implicit_task
= thr
->task
;
3072 && !thr
->task
->final_task
)
3074 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3075 sizes
, kinds
, flags
, depend
, new_args
,
3076 GOMP_TARGET_TASK_BEFORE_MAP
);
3081 /* If there are depend clauses, but nowait is not present
3082 (or we are in a final task), block the parent task until the
3083 dependencies are resolved and then just continue with the rest
3084 of the function as if it is a merged task. */
3087 struct gomp_thread
*thr
= gomp_thread ();
3088 if (thr
->task
&& thr
->task
->depend_hash
)
3090 /* If we might need to wait, copy firstprivate now. */
3091 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3092 &tgt_align
, &tgt_size
);
3095 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3096 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3097 tgt_align
, tgt_size
);
3100 gomp_task_maybe_wait_for_dependencies (depend
);
3106 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3107 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3108 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3112 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3113 &tgt_align
, &tgt_size
);
3116 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3117 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3118 tgt_align
, tgt_size
);
3121 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3125 struct target_mem_desc
*tgt_vars
;
3126 htab_t refcount_set
= NULL
;
3128 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3132 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3133 &tgt_align
, &tgt_size
);
3136 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3137 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3138 tgt_align
, tgt_size
);
3145 refcount_set
= htab_create (mapnum
);
3146 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3147 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3149 devicep
->run_func (devicep
->target_id
, fn_addr
,
3150 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3154 htab_clear (refcount_set
);
3155 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3158 htab_free (refcount_set
);
3160 /* Copy back ICVs from device to host.
3161 HOST_PTR is expected to exist since it was added in
3162 gomp_load_image_to_device if not already available. */
3163 gomp_copy_back_icvs (devicep
, device
);
3168 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3169 keeping track of all variable handling - assuming that reverse offload occurs
3170 ony very rarely. Downside is that the reverse search is slow. */
3172 struct gomp_splay_tree_rev_lookup_data
{
3173 uintptr_t tgt_start
;
3179 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3181 struct gomp_splay_tree_rev_lookup_data
*data
;
3182 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3183 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3185 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3189 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3190 if (key
->tgt
->list
[j
].key
== key
)
3192 assert (j
< key
->tgt
->list_count
);
3193 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3195 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3196 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3204 static inline splay_tree_key
3205 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3208 struct gomp_splay_tree_rev_lookup_data data
;
3210 data
.tgt_start
= tgt_start
;
3211 data
.tgt_end
= tgt_end
;
3213 if (tgt_start
!= tgt_end
)
3215 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3220 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3221 if (data
.key
!= NULL
|| zero_len
)
3226 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3233 bool present
, aligned
;
3237 /* Search just mapped reverse-offload data; returns index if found,
3241 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3242 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3243 uint64_t tgt_start
, uint64_t tgt_end
)
3245 const bool short_mapkind
= true;
3246 const int typemask
= short_mapkind
? 0xff : 0x7;
3248 for (i
= 0; i
< n
; i
++)
3250 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3251 == GOMP_MAP_STRUCT
);
3254 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3257 if (i
+ sizes
[i
] < n
)
3258 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3260 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3262 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3263 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3272 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3273 unsigned short *kinds
, uint64_t *sizes
,
3274 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3278 if (tgt_start
!= tgt_end
)
3279 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3280 tgt_start
, tgt_end
);
3282 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3283 tgt_start
, tgt_end
);
3284 if (i
< n
|| zero_len
)
3289 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3290 tgt_start
, tgt_end
);
3293 /* Handle reverse offload. This is called by the device plugins for a
3294 reverse offload; it is not called if the outer target runs on the host.
3295 The mapping is simplified device-affecting constructs (except for target
3296 with device(ancestor:1)) must not be encountered; in particular not
3297 target (enter/exit) data. */
3300 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3301 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3302 void (*dev_to_host_cpy
) (void *, const void *, size_t, void*),
3303 void (*host_to_dev_cpy
) (void *, const void *, size_t, void*),
3306 /* Return early if there is no offload code. */
3307 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3309 /* Currently, this fails because of calculate_firstprivate_requirements
3310 below; it could be fixed but additional code needs to be updated to
3311 handle 32bit hosts - thus, it is not worthwhile. */
3312 if (sizeof (void *) != sizeof (uint64_t))
3313 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3315 struct cpy_data
*cdata
= NULL
;
3318 unsigned short *kinds
;
3319 const bool short_mapkind
= true;
3320 const int typemask
= short_mapkind
? 0xff : 0x7;
3321 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3323 reverse_splay_tree_key n
;
3324 struct reverse_splay_tree_key_s k
;
3327 gomp_mutex_lock (&devicep
->lock
);
3328 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3329 gomp_mutex_unlock (&devicep
->lock
);
3332 gomp_fatal ("Cannot find reverse-offload function");
3333 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3335 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3337 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3338 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3339 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3343 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3344 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3345 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3346 if (dev_to_host_cpy
)
3348 dev_to_host_cpy (devaddrs
, (const void *) (uintptr_t) devaddrs_ptr
,
3349 mapnum
* sizeof (uint64_t), token
);
3350 dev_to_host_cpy (sizes
, (const void *) (uintptr_t) sizes_ptr
,
3351 mapnum
* sizeof (uint64_t), token
);
3352 dev_to_host_cpy (kinds
, (const void *) (uintptr_t) kinds_ptr
,
3353 mapnum
* sizeof (unsigned short), token
);
3357 gomp_copy_dev2host (devicep
, NULL
, devaddrs
,
3358 (const void *) (uintptr_t) devaddrs_ptr
,
3359 mapnum
* sizeof (uint64_t));
3360 gomp_copy_dev2host (devicep
, NULL
, sizes
,
3361 (const void *) (uintptr_t) sizes_ptr
,
3362 mapnum
* sizeof (uint64_t));
3363 gomp_copy_dev2host (devicep
, NULL
, kinds
, (const void *) (uintptr_t) kinds_ptr
,
3364 mapnum
* sizeof (unsigned short));
3368 size_t tgt_align
= 0, tgt_size
= 0;
3370 /* If actually executed on 32bit systems, the casts lead to wrong code;
3371 but 32bit with offloading is not supported; see top of this function. */
3372 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3373 (void *) (uintptr_t) kinds
,
3374 &tgt_align
, &tgt_size
);
3378 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3379 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3381 tgt
+= tgt_align
- al
;
3383 for (uint64_t i
= 0; i
< mapnum
; i
++)
3384 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3385 && devaddrs
[i
] != 0)
3387 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3388 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3389 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3390 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3392 else if (dev_to_host_cpy
)
3393 dev_to_host_cpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3394 (size_t) sizes
[i
], token
);
3396 gomp_copy_dev2host (devicep
, NULL
, tgt
+ tgt_size
,
3397 (void *) (uintptr_t) devaddrs
[i
],
3399 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3400 tgt_size
= tgt_size
+ sizes
[i
];
3401 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3403 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3404 == GOMP_MAP_ATTACH
))
3406 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3407 = (uint64_t) devaddrs
[i
];
3413 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3415 size_t j
, struct_cpy
= 0;
3417 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3418 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3419 gomp_mutex_lock (&devicep
->lock
);
3420 for (uint64_t i
= 0; i
< mapnum
; i
++)
3422 if (devaddrs
[i
] == 0)
3425 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3428 case GOMP_MAP_FIRSTPRIVATE
:
3429 case GOMP_MAP_FIRSTPRIVATE_INT
:
3432 case GOMP_MAP_DELETE
:
3433 case GOMP_MAP_RELEASE
:
3434 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3435 /* Assume it is present; look it up - but ignore otherwise. */
3436 case GOMP_MAP_ALLOC
:
3438 case GOMP_MAP_FORCE_ALLOC
:
3439 case GOMP_MAP_FORCE_FROM
:
3440 case GOMP_MAP_ALWAYS_FROM
:
3442 case GOMP_MAP_TOFROM
:
3443 case GOMP_MAP_FORCE_TO
:
3444 case GOMP_MAP_FORCE_TOFROM
:
3445 case GOMP_MAP_ALWAYS_TO
:
3446 case GOMP_MAP_ALWAYS_TOFROM
:
3447 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3448 cdata
[i
].devaddr
= devaddrs
[i
];
3449 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3450 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3451 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3453 devaddrs
[i
] + sizes
[i
], zero_len
);
3457 cdata
[i
].present
= true;
3458 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3462 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3464 devaddrs
[i
] + sizes
[i
], zero_len
);
3465 cdata
[i
].present
= n2
!= NULL
;
3467 if (!cdata
[i
].present
3468 && kind
!= GOMP_MAP_DELETE
3469 && kind
!= GOMP_MAP_RELEASE
3470 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3472 cdata
[i
].aligned
= true;
3473 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3475 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3478 else if (n2
!= NULL
)
3479 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3480 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3481 if (((!cdata
[i
].present
|| struct_cpy
)
3482 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3483 || kind
== GOMP_MAP_FORCE_TO
3484 || kind
== GOMP_MAP_FORCE_TOFROM
3485 || kind
== GOMP_MAP_ALWAYS_TO
3486 || kind
== GOMP_MAP_ALWAYS_TOFROM
)
3488 if (dev_to_host_cpy
)
3489 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3490 (void *) (uintptr_t) cdata
[i
].devaddr
,
3493 gomp_copy_dev2host (devicep
, NULL
,
3494 (void *) (uintptr_t) devaddrs
[i
],
3495 (void *) (uintptr_t) cdata
[i
].devaddr
,
3501 case GOMP_MAP_ATTACH
:
3502 case GOMP_MAP_POINTER
:
3503 case GOMP_MAP_ALWAYS_POINTER
:
3504 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3505 devaddrs
[i
] + sizes
[i
],
3506 devaddrs
[i
] + sizes
[i
]
3507 + sizeof (void*), false);
3508 cdata
[i
].present
= n2
!= NULL
;
3509 cdata
[i
].devaddr
= devaddrs
[i
];
3511 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3512 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3515 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3516 devaddrs
[i
] + sizes
[i
],
3517 devaddrs
[i
] + sizes
[i
]
3518 + sizeof (void*), false);
3521 cdata
[i
].present
= true;
3522 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3523 - cdata
[j
].devaddr
);
3526 if (!cdata
[i
].present
)
3527 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3528 /* Assume that when present, the pointer is already correct. */
3530 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3533 case GOMP_MAP_TO_PSET
:
3534 /* Assume that when present, the pointers are fine and no 'to:'
3536 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3537 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3539 cdata
[i
].present
= n2
!= NULL
;
3540 cdata
[i
].devaddr
= devaddrs
[i
];
3542 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3543 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3546 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3548 devaddrs
[i
] + sizes
[i
], false);
3551 cdata
[i
].present
= true;
3552 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3553 - cdata
[j
].devaddr
);
3556 if (!cdata
[i
].present
)
3558 cdata
[i
].aligned
= true;
3559 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3561 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3563 if (dev_to_host_cpy
)
3564 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3565 (void *) (uintptr_t) cdata
[i
].devaddr
,
3568 gomp_copy_dev2host (devicep
, NULL
,
3569 (void *) (uintptr_t) devaddrs
[i
],
3570 (void *) (uintptr_t) cdata
[i
].devaddr
,
3573 for (j
= i
+ 1; j
< mapnum
; j
++)
3575 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3576 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3577 && !GOMP_MAP_POINTER_P (kind
))
3579 if (devaddrs
[j
] < devaddrs
[i
])
3581 if (cdata
[i
].present
)
3583 if (devaddrs
[j
] == 0)
3585 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3590 /* Dereference devaddrs[j] to get the device addr. */
3591 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3592 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3594 cdata
[j
].present
= true;
3595 cdata
[j
].devaddr
= devaddrs
[j
];
3596 if (devaddrs
[j
] == 0)
3598 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3600 devaddrs
[j
] + sizeof (void*),
3603 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3604 - cdata
[k
].devaddr
);
3607 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3609 devaddrs
[j
] + sizeof (void*),
3613 gomp_mutex_unlock (&devicep
->lock
);
3614 gomp_fatal ("Pointer target wasn't mapped");
3616 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3617 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3619 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3620 = (void *) (uintptr_t) devaddrs
[j
];
3624 case GOMP_MAP_STRUCT
:
3625 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3626 devaddrs
[i
+ sizes
[i
]]
3627 + sizes
[i
+ sizes
[i
]], false);
3628 cdata
[i
].present
= n2
!= NULL
;
3629 cdata
[i
].devaddr
= devaddrs
[i
];
3630 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3633 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3635 + sizes
[i
+ sizes
[i
]]);
3636 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3637 cdata
[i
].aligned
= true;
3638 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3639 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3642 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3643 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3646 gomp_mutex_unlock (&devicep
->lock
);
3647 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3650 gomp_mutex_unlock (&devicep
->lock
);
3655 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3657 uint64_t struct_cpy
= 0;
3658 bool clean_struct
= false;
3659 for (uint64_t i
= 0; i
< mapnum
; i
++)
3661 if (cdata
[i
].devaddr
== 0)
3663 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3664 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3667 case GOMP_MAP_FORCE_FROM
:
3668 case GOMP_MAP_FORCE_TOFROM
:
3669 case GOMP_MAP_ALWAYS_FROM
:
3670 case GOMP_MAP_ALWAYS_TOFROM
:
3674 case GOMP_MAP_TOFROM
:
3675 if (copy
&& host_to_dev_cpy
)
3676 host_to_dev_cpy ((void *) (uintptr_t) cdata
[i
].devaddr
,
3677 (void *) (uintptr_t) devaddrs
[i
],
3680 gomp_copy_host2dev (devicep
, NULL
,
3681 (void *) (uintptr_t) cdata
[i
].devaddr
,
3682 (void *) (uintptr_t) devaddrs
[i
],
3683 sizes
[i
], false, NULL
);
3692 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3694 clean_struct
= true;
3695 struct_cpy
= sizes
[i
];
3697 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3698 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3699 else if (!cdata
[i
].present
)
3700 free ((void *) (uintptr_t) devaddrs
[i
]);
3703 for (uint64_t i
= 0; i
< mapnum
; i
++)
3704 if (!cdata
[i
].present
3705 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3706 == GOMP_MAP_STRUCT
))
3708 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3709 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3718 /* Host fallback for GOMP_target_data{,_ext} routines. */
3721 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3723 struct gomp_task_icv
*icv
= gomp_icv (false);
3725 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3727 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3728 "be used for offloading");
3730 if (icv
->target_data
)
3732 /* Even when doing a host fallback, if there are any active
3733 #pragma omp target data constructs, need to remember the
3734 new #pragma omp target data, otherwise GOMP_target_end_data
3735 would get out of sync. */
3736 struct target_mem_desc
*tgt
3737 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3738 NULL
, GOMP_MAP_VARS_DATA
);
3739 tgt
->prev
= icv
->target_data
;
3740 icv
->target_data
= tgt
;
3745 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3746 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3748 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3751 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3752 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3753 return gomp_target_data_fallback (devicep
);
3755 struct target_mem_desc
*tgt
3756 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3757 NULL
, GOMP_MAP_VARS_DATA
);
3758 struct gomp_task_icv
*icv
= gomp_icv (true);
3759 tgt
->prev
= icv
->target_data
;
3760 icv
->target_data
= tgt
;
3764 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3765 size_t *sizes
, unsigned short *kinds
)
3767 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3770 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3771 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3772 return gomp_target_data_fallback (devicep
);
3774 struct target_mem_desc
*tgt
3775 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3776 NULL
, GOMP_MAP_VARS_DATA
);
3777 struct gomp_task_icv
*icv
= gomp_icv (true);
3778 tgt
->prev
= icv
->target_data
;
3779 icv
->target_data
= tgt
;
3783 GOMP_target_end_data (void)
3785 struct gomp_task_icv
*icv
= gomp_icv (false);
3786 if (icv
->target_data
)
3788 struct target_mem_desc
*tgt
= icv
->target_data
;
3789 icv
->target_data
= tgt
->prev
;
3790 gomp_unmap_vars (tgt
, true, NULL
);
3795 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3796 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3798 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3801 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3802 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3805 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3809 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3810 size_t *sizes
, unsigned short *kinds
,
3811 unsigned int flags
, void **depend
)
3813 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3815 /* If there are depend clauses, but nowait is not present,
3816 block the parent task until the dependencies are resolved
3817 and then just continue with the rest of the function as if it
3818 is a merged task. Until we are able to schedule task during
3819 variable mapping or unmapping, ignore nowait if depend clauses
3823 struct gomp_thread
*thr
= gomp_thread ();
3824 if (thr
->task
&& thr
->task
->depend_hash
)
3826 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3828 && !thr
->task
->final_task
)
3830 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3831 mapnum
, hostaddrs
, sizes
, kinds
,
3832 flags
| GOMP_TARGET_FLAG_UPDATE
,
3833 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3838 struct gomp_team
*team
= thr
->ts
.team
;
3839 /* If parallel or taskgroup has been cancelled, don't start new
3841 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3843 if (gomp_team_barrier_cancelled (&team
->barrier
))
3845 if (thr
->task
->taskgroup
)
3847 if (thr
->task
->taskgroup
->cancelled
)
3849 if (thr
->task
->taskgroup
->workshare
3850 && thr
->task
->taskgroup
->prev
3851 && thr
->task
->taskgroup
->prev
->cancelled
)
3856 gomp_task_maybe_wait_for_dependencies (depend
);
3862 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3863 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3866 struct gomp_thread
*thr
= gomp_thread ();
3867 struct gomp_team
*team
= thr
->ts
.team
;
3868 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3869 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3871 if (gomp_team_barrier_cancelled (&team
->barrier
))
3873 if (thr
->task
->taskgroup
)
3875 if (thr
->task
->taskgroup
->cancelled
)
3877 if (thr
->task
->taskgroup
->workshare
3878 && thr
->task
->taskgroup
->prev
3879 && thr
->task
->taskgroup
->prev
->cancelled
)
3884 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3888 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3889 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3890 htab_t
*refcount_set
)
3892 const int typemask
= 0xff;
3894 gomp_mutex_lock (&devicep
->lock
);
3895 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3897 gomp_mutex_unlock (&devicep
->lock
);
3901 for (i
= 0; i
< mapnum
; i
++)
3902 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3904 struct splay_tree_key_s cur_node
;
3905 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3906 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3907 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3910 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3915 splay_tree_key remove_vars
[mapnum
];
3917 for (i
= 0; i
< mapnum
; i
++)
3919 struct splay_tree_key_s cur_node
;
3920 unsigned char kind
= kinds
[i
] & typemask
;
3924 case GOMP_MAP_ALWAYS_FROM
:
3925 case GOMP_MAP_DELETE
:
3926 case GOMP_MAP_RELEASE
:
3927 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3928 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3929 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3930 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3931 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3932 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3933 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3934 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3938 bool delete_p
= (kind
== GOMP_MAP_DELETE
3939 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3940 bool do_copy
, do_remove
;
3941 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3944 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3945 || kind
== GOMP_MAP_ALWAYS_FROM
)
3947 if (k
->aux
&& k
->aux
->attach_count
)
3949 /* We have to be careful not to overwrite still attached
3950 pointers during the copyback to host. */
3951 uintptr_t addr
= k
->host_start
;
3952 while (addr
< k
->host_end
)
3954 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
3955 if (k
->aux
->attach_count
[i
] == 0)
3956 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
3957 (void *) (k
->tgt
->tgt_start
3959 + addr
- k
->host_start
),
3961 addr
+= sizeof (void *);
3965 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3966 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3967 + cur_node
.host_start
3969 cur_node
.host_end
- cur_node
.host_start
);
3972 /* Structure elements lists are removed altogether at once, which
3973 may cause immediate deallocation of the target_mem_desc, causing
3974 errors if we still have following element siblings to copy back.
3975 While we're at it, it also seems more disciplined to simply
3976 queue all removals together for processing below.
3978 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3979 not have this problem, since they maintain an additional
3980 tgt->refcount = 1 reference to the target_mem_desc to start with.
3983 remove_vars
[nrmvars
++] = k
;
3986 case GOMP_MAP_DETACH
:
3989 gomp_mutex_unlock (&devicep
->lock
);
3990 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3995 for (int i
= 0; i
< nrmvars
; i
++)
3996 gomp_remove_var (devicep
, remove_vars
[i
]);
3998 gomp_mutex_unlock (&devicep
->lock
);
4002 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4003 size_t *sizes
, unsigned short *kinds
,
4004 unsigned int flags
, void **depend
)
4006 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4008 /* If there are depend clauses, but nowait is not present,
4009 block the parent task until the dependencies are resolved
4010 and then just continue with the rest of the function as if it
4011 is a merged task. Until we are able to schedule task during
4012 variable mapping or unmapping, ignore nowait if depend clauses
4016 struct gomp_thread
*thr
= gomp_thread ();
4017 if (thr
->task
&& thr
->task
->depend_hash
)
4019 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4021 && !thr
->task
->final_task
)
4023 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4024 mapnum
, hostaddrs
, sizes
, kinds
,
4025 flags
, depend
, NULL
,
4026 GOMP_TARGET_TASK_DATA
))
4031 struct gomp_team
*team
= thr
->ts
.team
;
4032 /* If parallel or taskgroup has been cancelled, don't start new
4034 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4036 if (gomp_team_barrier_cancelled (&team
->barrier
))
4038 if (thr
->task
->taskgroup
)
4040 if (thr
->task
->taskgroup
->cancelled
)
4042 if (thr
->task
->taskgroup
->workshare
4043 && thr
->task
->taskgroup
->prev
4044 && thr
->task
->taskgroup
->prev
->cancelled
)
4049 gomp_task_maybe_wait_for_dependencies (depend
);
4055 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4056 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4059 struct gomp_thread
*thr
= gomp_thread ();
4060 struct gomp_team
*team
= thr
->ts
.team
;
4061 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4062 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4064 if (gomp_team_barrier_cancelled (&team
->barrier
))
4066 if (thr
->task
->taskgroup
)
4068 if (thr
->task
->taskgroup
->cancelled
)
4070 if (thr
->task
->taskgroup
->workshare
4071 && thr
->task
->taskgroup
->prev
4072 && thr
->task
->taskgroup
->prev
->cancelled
)
4077 htab_t refcount_set
= htab_create (mapnum
);
4079 /* The variables are mapped separately such that they can be released
4082 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4083 for (i
= 0; i
< mapnum
; i
++)
4084 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4086 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4087 &kinds
[i
], true, &refcount_set
,
4088 GOMP_MAP_VARS_ENTER_DATA
);
4091 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4093 for (j
= i
+ 1; j
< mapnum
; j
++)
4094 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4095 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4097 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4098 &kinds
[i
], true, &refcount_set
,
4099 GOMP_MAP_VARS_ENTER_DATA
);
4102 else if (i
+ 1 < mapnum
4103 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4104 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4105 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4107 /* An attach operation must be processed together with the mapped
4108 base-pointer list item. */
4109 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4110 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4114 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4115 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4117 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4118 htab_free (refcount_set
);
4122 gomp_target_task_fn (void *data
)
4124 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4125 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4127 if (ttask
->fn
!= NULL
)
4131 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4132 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4133 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4135 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4136 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4141 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4144 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4148 void *actual_arguments
;
4149 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4152 actual_arguments
= ttask
->hostaddrs
;
4156 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4157 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4158 NULL
, GOMP_MAP_VARS_TARGET
);
4159 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4161 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4163 assert (devicep
->async_run_func
);
4164 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4165 ttask
->args
, (void *) ttask
);
4168 else if (devicep
== NULL
4169 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4170 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4174 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4175 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4176 ttask
->kinds
, true);
4179 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4180 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4181 for (i
= 0; i
< ttask
->mapnum
; i
++)
4182 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4184 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4185 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4186 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4187 i
+= ttask
->sizes
[i
];
4190 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4191 &ttask
->kinds
[i
], true, &refcount_set
,
4192 GOMP_MAP_VARS_ENTER_DATA
);
4194 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4195 ttask
->kinds
, &refcount_set
);
4196 htab_free (refcount_set
);
4202 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4206 struct gomp_task_icv
*icv
= gomp_icv (true);
4207 icv
->thread_limit_var
4208 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4214 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4215 unsigned int thread_limit
, bool first
)
4217 struct gomp_thread
*thr
= gomp_thread ();
4222 struct gomp_task_icv
*icv
= gomp_icv (true);
4223 icv
->thread_limit_var
4224 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4226 (void) num_teams_high
;
4227 if (num_teams_low
== 0)
4229 thr
->num_teams
= num_teams_low
- 1;
4232 else if (thr
->team_num
== thr
->num_teams
)
4240 omp_target_alloc (size_t size
, int device_num
)
4242 if (device_num
== omp_initial_device
4243 || device_num
== gomp_get_num_devices ())
4244 return malloc (size
);
4246 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4247 if (devicep
== NULL
)
4250 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4251 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4252 return malloc (size
);
4254 gomp_mutex_lock (&devicep
->lock
);
4255 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4256 gomp_mutex_unlock (&devicep
->lock
);
4261 omp_target_free (void *device_ptr
, int device_num
)
4263 if (device_num
== omp_initial_device
4264 || device_num
== gomp_get_num_devices ())
4270 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4271 if (devicep
== NULL
|| device_ptr
== NULL
)
4274 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4275 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4281 gomp_mutex_lock (&devicep
->lock
);
4282 gomp_free_device_memory (devicep
, device_ptr
);
4283 gomp_mutex_unlock (&devicep
->lock
);
4287 omp_target_is_present (const void *ptr
, int device_num
)
4289 if (device_num
== omp_initial_device
4290 || device_num
== gomp_get_num_devices ())
4293 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4294 if (devicep
== NULL
)
4300 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4301 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4304 gomp_mutex_lock (&devicep
->lock
);
4305 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4306 struct splay_tree_key_s cur_node
;
4308 cur_node
.host_start
= (uintptr_t) ptr
;
4309 cur_node
.host_end
= cur_node
.host_start
;
4310 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4311 int ret
= n
!= NULL
;
4312 gomp_mutex_unlock (&devicep
->lock
);
4317 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4318 struct gomp_device_descr
**dst_devicep
,
4319 struct gomp_device_descr
**src_devicep
)
4321 if (dst_device_num
!= gomp_get_num_devices ()
4322 /* Above gomp_get_num_devices has to be called unconditionally. */
4323 && dst_device_num
!= omp_initial_device
)
4325 *dst_devicep
= resolve_device (dst_device_num
, false);
4326 if (*dst_devicep
== NULL
)
4329 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4330 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4331 *dst_devicep
= NULL
;
4334 if (src_device_num
!= num_devices_openmp
4335 && src_device_num
!= omp_initial_device
)
4337 *src_devicep
= resolve_device (src_device_num
, false);
4338 if (*src_devicep
== NULL
)
4341 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4342 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4343 *src_devicep
= NULL
;
4350 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4351 size_t dst_offset
, size_t src_offset
,
4352 struct gomp_device_descr
*dst_devicep
,
4353 struct gomp_device_descr
*src_devicep
)
4356 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4358 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4361 if (src_devicep
== NULL
)
4363 gomp_mutex_lock (&dst_devicep
->lock
);
4364 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4365 (char *) dst
+ dst_offset
,
4366 (char *) src
+ src_offset
, length
);
4367 gomp_mutex_unlock (&dst_devicep
->lock
);
4368 return (ret
? 0 : EINVAL
);
4370 if (dst_devicep
== NULL
)
4372 gomp_mutex_lock (&src_devicep
->lock
);
4373 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4374 (char *) dst
+ dst_offset
,
4375 (char *) src
+ src_offset
, length
);
4376 gomp_mutex_unlock (&src_devicep
->lock
);
4377 return (ret
? 0 : EINVAL
);
4379 if (src_devicep
== dst_devicep
)
4381 gomp_mutex_lock (&src_devicep
->lock
);
4382 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4383 (char *) dst
+ dst_offset
,
4384 (char *) src
+ src_offset
, length
);
4385 gomp_mutex_unlock (&src_devicep
->lock
);
4386 return (ret
? 0 : EINVAL
);
4392 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4393 size_t src_offset
, int dst_device_num
, int src_device_num
)
4395 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4396 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4397 &dst_devicep
, &src_devicep
);
4402 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4403 dst_devicep
, src_devicep
);
4415 struct gomp_device_descr
*dst_devicep
;
4416 struct gomp_device_descr
*src_devicep
;
4417 } omp_target_memcpy_data
;
4420 omp_target_memcpy_async_helper (void *args
)
4422 omp_target_memcpy_data
*a
= args
;
4423 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4424 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4425 gomp_fatal ("omp_target_memcpy failed");
4429 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4430 size_t dst_offset
, size_t src_offset
,
4431 int dst_device_num
, int src_device_num
,
4432 int depobj_count
, omp_depend_t
*depobj_list
)
4434 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4435 unsigned int flags
= 0;
4436 void *depend
[depobj_count
+ 5];
4438 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4439 &dst_devicep
, &src_devicep
);
4441 omp_target_memcpy_data s
= {
4445 .dst_offset
= dst_offset
,
4446 .src_offset
= src_offset
,
4447 .dst_devicep
= dst_devicep
,
4448 .src_devicep
= src_devicep
4454 if (depobj_count
> 0 && depobj_list
!= NULL
)
4456 flags
|= GOMP_TASK_FLAG_DEPEND
;
4458 depend
[1] = (void *) (uintptr_t) depobj_count
;
4459 depend
[2] = depend
[3] = depend
[4] = 0;
4460 for (i
= 0; i
< depobj_count
; ++i
)
4461 depend
[i
+ 5] = &depobj_list
[i
];
4464 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4465 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4471 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4472 int num_dims
, const size_t *volume
,
4473 const size_t *dst_offsets
,
4474 const size_t *src_offsets
,
4475 const size_t *dst_dimensions
,
4476 const size_t *src_dimensions
,
4477 struct gomp_device_descr
*dst_devicep
,
4478 struct gomp_device_descr
*src_devicep
)
4480 size_t dst_slice
= element_size
;
4481 size_t src_slice
= element_size
;
4482 size_t j
, dst_off
, src_off
, length
;
4487 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4488 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4489 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4491 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4493 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4497 else if (src_devicep
== NULL
)
4498 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4499 (char *) dst
+ dst_off
,
4500 (const char *) src
+ src_off
,
4502 else if (dst_devicep
== NULL
)
4503 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4504 (char *) dst
+ dst_off
,
4505 (const char *) src
+ src_off
,
4507 else if (src_devicep
== dst_devicep
)
4508 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4509 (char *) dst
+ dst_off
,
4510 (const char *) src
+ src_off
,
4514 return ret
? 0 : EINVAL
;
4517 /* FIXME: it would be nice to have some plugin function to handle
4518 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4519 be handled in the generic recursion below, and for host-host it
4520 should be used even for any num_dims >= 2. */
4522 for (i
= 1; i
< num_dims
; i
++)
4523 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4524 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4526 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4527 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4529 for (j
= 0; j
< volume
[0]; j
++)
4531 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4532 (const char *) src
+ src_off
,
4533 element_size
, num_dims
- 1,
4534 volume
+ 1, dst_offsets
+ 1,
4535 src_offsets
+ 1, dst_dimensions
+ 1,
4536 src_dimensions
+ 1, dst_devicep
,
4540 dst_off
+= dst_slice
;
4541 src_off
+= src_slice
;
4547 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4549 struct gomp_device_descr
**dst_devicep
,
4550 struct gomp_device_descr
**src_devicep
)
4555 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4556 dst_devicep
, src_devicep
);
4560 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
4567 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4568 size_t element_size
, int num_dims
,
4569 const size_t *volume
, const size_t *dst_offsets
,
4570 const size_t *src_offsets
,
4571 const size_t *dst_dimensions
,
4572 const size_t *src_dimensions
,
4573 struct gomp_device_descr
*dst_devicep
,
4574 struct gomp_device_descr
*src_devicep
)
4577 gomp_mutex_lock (&src_devicep
->lock
);
4578 else if (dst_devicep
)
4579 gomp_mutex_lock (&dst_devicep
->lock
);
4580 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4581 volume
, dst_offsets
, src_offsets
,
4582 dst_dimensions
, src_dimensions
,
4583 dst_devicep
, src_devicep
);
4585 gomp_mutex_unlock (&src_devicep
->lock
);
4586 else if (dst_devicep
)
4587 gomp_mutex_unlock (&dst_devicep
->lock
);
4593 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4594 int num_dims
, const size_t *volume
,
4595 const size_t *dst_offsets
,
4596 const size_t *src_offsets
,
4597 const size_t *dst_dimensions
,
4598 const size_t *src_dimensions
,
4599 int dst_device_num
, int src_device_num
)
4601 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4603 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4604 src_device_num
, &dst_devicep
,
4610 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4611 volume
, dst_offsets
, src_offsets
,
4612 dst_dimensions
, src_dimensions
,
4613 dst_devicep
, src_devicep
);
4622 size_t element_size
;
4623 const size_t *volume
;
4624 const size_t *dst_offsets
;
4625 const size_t *src_offsets
;
4626 const size_t *dst_dimensions
;
4627 const size_t *src_dimensions
;
4628 struct gomp_device_descr
*dst_devicep
;
4629 struct gomp_device_descr
*src_devicep
;
4631 } omp_target_memcpy_rect_data
;
4634 omp_target_memcpy_rect_async_helper (void *args
)
4636 omp_target_memcpy_rect_data
*a
= args
;
4637 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4638 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4639 a
->src_offsets
, a
->dst_dimensions
,
4640 a
->src_dimensions
, a
->dst_devicep
,
4643 gomp_fatal ("omp_target_memcpy_rect failed");
4647 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4648 int num_dims
, const size_t *volume
,
4649 const size_t *dst_offsets
,
4650 const size_t *src_offsets
,
4651 const size_t *dst_dimensions
,
4652 const size_t *src_dimensions
,
4653 int dst_device_num
, int src_device_num
,
4654 int depobj_count
, omp_depend_t
*depobj_list
)
4656 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4658 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4659 src_device_num
, &dst_devicep
,
4661 void *depend
[depobj_count
+ 5];
4664 omp_target_memcpy_rect_data s
= {
4667 .element_size
= element_size
,
4668 .num_dims
= num_dims
,
4670 .dst_offsets
= dst_offsets
,
4671 .src_offsets
= src_offsets
,
4672 .dst_dimensions
= dst_dimensions
,
4673 .src_dimensions
= src_dimensions
,
4674 .dst_devicep
= dst_devicep
,
4675 .src_devicep
= src_devicep
4681 if (depobj_count
> 0 && depobj_list
!= NULL
)
4683 flags
|= GOMP_TASK_FLAG_DEPEND
;
4685 depend
[1] = (void *) (uintptr_t) depobj_count
;
4686 depend
[2] = depend
[3] = depend
[4] = 0;
4687 for (i
= 0; i
< depobj_count
; ++i
)
4688 depend
[i
+ 5] = &depobj_list
[i
];
4691 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4692 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4698 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4699 size_t size
, size_t device_offset
, int device_num
)
4701 if (device_num
== omp_initial_device
4702 || device_num
== gomp_get_num_devices ())
4705 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4706 if (devicep
== NULL
)
4709 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4710 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4713 gomp_mutex_lock (&devicep
->lock
);
4715 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4716 struct splay_tree_key_s cur_node
;
4719 cur_node
.host_start
= (uintptr_t) host_ptr
;
4720 cur_node
.host_end
= cur_node
.host_start
+ size
;
4721 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4724 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4725 == (uintptr_t) device_ptr
+ device_offset
4726 && n
->host_start
<= cur_node
.host_start
4727 && n
->host_end
>= cur_node
.host_end
)
4732 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4733 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4737 tgt
->to_free
= NULL
;
4739 tgt
->list_count
= 0;
4740 tgt
->device_descr
= devicep
;
4741 splay_tree_node array
= tgt
->array
;
4742 splay_tree_key k
= &array
->key
;
4743 k
->host_start
= cur_node
.host_start
;
4744 k
->host_end
= cur_node
.host_end
;
4746 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4747 k
->refcount
= REFCOUNT_INFINITY
;
4748 k
->dynamic_refcount
= 0;
4751 array
->right
= NULL
;
4752 splay_tree_insert (&devicep
->mem_map
, array
);
4755 gomp_mutex_unlock (&devicep
->lock
);
4760 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4762 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4763 if (devicep
== NULL
)
4766 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4769 gomp_mutex_lock (&devicep
->lock
);
4771 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4772 struct splay_tree_key_s cur_node
;
4775 cur_node
.host_start
= (uintptr_t) ptr
;
4776 cur_node
.host_end
= cur_node
.host_start
;
4777 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4779 && n
->host_start
== cur_node
.host_start
4780 && n
->refcount
== REFCOUNT_INFINITY
4781 && n
->tgt
->tgt_start
== 0
4782 && n
->tgt
->to_free
== NULL
4783 && n
->tgt
->refcount
== 1
4784 && n
->tgt
->list_count
== 0)
4786 splay_tree_remove (&devicep
->mem_map
, n
);
4787 gomp_unmap_tgt (n
->tgt
);
4791 gomp_mutex_unlock (&devicep
->lock
);
4796 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4798 if (device_num
== omp_initial_device
4799 || device_num
== omp_get_initial_device ())
4800 return (void *) ptr
;
4802 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4803 if (devicep
== NULL
)
4806 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4807 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4808 return (void *) ptr
;
4810 gomp_mutex_lock (&devicep
->lock
);
4812 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4813 struct splay_tree_key_s cur_node
;
4816 cur_node
.host_start
= (uintptr_t) ptr
;
4817 cur_node
.host_end
= cur_node
.host_start
;
4818 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4822 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4823 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4826 gomp_mutex_unlock (&devicep
->lock
);
4832 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4834 if (device_num
== omp_initial_device
4835 || device_num
== gomp_get_num_devices ())
4838 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4839 if (devicep
== NULL
)
4842 /* TODO: Unified shared memory must be handled when available. */
4844 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4848 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4851 if (device_num
== omp_initial_device
4852 || device_num
== gomp_get_num_devices ())
4853 return gomp_pause_host ();
4855 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4856 if (devicep
== NULL
)
4859 /* Do nothing for target devices for now. */
4864 omp_pause_resource_all (omp_pause_resource_t kind
)
4867 if (gomp_pause_host ())
4869 /* Do nothing for target devices for now. */
4873 ialias (omp_pause_resource
)
4874 ialias (omp_pause_resource_all
)
4876 #ifdef PLUGIN_SUPPORT
4878 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4880 The handles of the found functions are stored in the corresponding fields
4881 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4884 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4885 const char *plugin_name
)
4887 const char *err
= NULL
, *last_missing
= NULL
;
4889 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4891 #if OFFLOAD_DEFAULTED
4897 /* Check if all required functions are available in the plugin and store
4898 their handlers. None of the symbols can legitimately be NULL,
4899 so we don't need to check dlerror all the time. */
4901 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4903 /* Similar, but missing functions are not an error. Return false if
4904 failed, true otherwise. */
4905 #define DLSYM_OPT(f, n) \
4906 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4907 || (last_missing = #n, 0))
4910 if (device
->version_func () != GOMP_VERSION
)
4912 err
= "plugin version mismatch";
4919 DLSYM (get_num_devices
);
4920 DLSYM (init_device
);
4921 DLSYM (fini_device
);
4923 DLSYM (unload_image
);
4928 device
->capabilities
= device
->get_caps_func ();
4929 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4932 DLSYM_OPT (async_run
, async_run
);
4933 DLSYM_OPT (can_run
, can_run
);
4936 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4938 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4939 || !DLSYM_OPT (openacc
.create_thread_data
,
4940 openacc_create_thread_data
)
4941 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4942 openacc_destroy_thread_data
)
4943 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4944 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4945 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4946 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4947 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4948 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4949 openacc_async_queue_callback
)
4950 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4951 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4952 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4953 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4955 /* Require all the OpenACC handlers if we have
4956 GOMP_OFFLOAD_CAP_OPENACC_200. */
4957 err
= "plugin missing OpenACC handler function";
4962 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4963 openacc_cuda_get_current_device
);
4964 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4965 openacc_cuda_get_current_context
);
4966 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4967 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4968 if (cuda
&& cuda
!= 4)
4970 /* Make sure all the CUDA functions are there if any of them are. */
4971 err
= "plugin missing OpenACC CUDA handler function";
4983 gomp_error ("while loading %s: %s", plugin_name
, err
);
4985 gomp_error ("missing function was %s", last_missing
);
4987 dlclose (plugin_handle
);
4992 /* This function finalizes all initialized devices. */
4995 gomp_target_fini (void)
4998 for (i
= 0; i
< num_devices
; i
++)
5001 struct gomp_device_descr
*devicep
= &devices
[i
];
5002 gomp_mutex_lock (&devicep
->lock
);
5003 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5004 ret
= gomp_fini_device (devicep
);
5005 gomp_mutex_unlock (&devicep
->lock
);
5007 gomp_fatal ("device finalization failed");
5011 /* This function initializes the runtime for offloading.
5012 It parses the list of offload plugins, and tries to load these.
5013 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5014 will be set, and the array DEVICES initialized, containing descriptors for
5015 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5019 gomp_target_init (void)
5021 const char *prefix
="libgomp-plugin-";
5022 const char *suffix
= SONAME_SUFFIX (1);
5023 const char *cur
, *next
;
5025 int i
, new_num_devs
;
5026 int num_devs
= 0, num_devs_openmp
;
5027 struct gomp_device_descr
*devs
= NULL
;
5029 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5032 cur
= OFFLOAD_PLUGINS
;
5036 struct gomp_device_descr current_device
;
5037 size_t prefix_len
, suffix_len
, cur_len
;
5039 next
= strchr (cur
, ',');
5041 prefix_len
= strlen (prefix
);
5042 cur_len
= next
? next
- cur
: strlen (cur
);
5043 suffix_len
= strlen (suffix
);
5045 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5052 memcpy (plugin_name
, prefix
, prefix_len
);
5053 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5054 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5056 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5058 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5059 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5060 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5063 int type
= current_device
.get_type_func ();
5064 for (int img
= 0; img
< num_offload_images
; img
++)
5065 if (type
== offload_images
[img
].type
)
5069 char buf
[sizeof ("unified_address, unified_shared_memory, "
5070 "reverse_offload")];
5071 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5072 char *name
= (char *) malloc (cur_len
+ 1);
5073 memcpy (name
, cur
, cur_len
);
5074 name
[cur_len
] = '\0';
5076 "%s devices present but 'omp requires %s' "
5077 "cannot be fulfilled\n", name
, buf
);
5081 else if (new_num_devs
>= 1)
5083 /* Augment DEVICES and NUM_DEVICES. */
5085 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5086 * sizeof (struct gomp_device_descr
));
5094 current_device
.name
= current_device
.get_name_func ();
5095 /* current_device.capabilities has already been set. */
5096 current_device
.type
= current_device
.get_type_func ();
5097 current_device
.mem_map
.root
= NULL
;
5098 current_device
.mem_map_rev
.root
= NULL
;
5099 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5100 for (i
= 0; i
< new_num_devs
; i
++)
5102 current_device
.target_id
= i
;
5103 devs
[num_devs
] = current_device
;
5104 gomp_mutex_init (&devs
[num_devs
].lock
);
5115 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5116 NUM_DEVICES_OPENMP. */
5117 struct gomp_device_descr
*devs_s
5118 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5125 num_devs_openmp
= 0;
5126 for (i
= 0; i
< num_devs
; i
++)
5127 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5128 devs_s
[num_devs_openmp
++] = devs
[i
];
5129 int num_devs_after_openmp
= num_devs_openmp
;
5130 for (i
= 0; i
< num_devs
; i
++)
5131 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5132 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5136 for (i
= 0; i
< num_devs
; i
++)
5138 /* The 'devices' array can be moved (by the realloc call) until we have
5139 found all the plugins, so registering with the OpenACC runtime (which
5140 takes a copy of the pointer argument) must be delayed until now. */
5141 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5142 goacc_register (&devs
[i
]);
5145 num_devices
= num_devs
;
5146 num_devices_openmp
= num_devs_openmp
;
5148 if (atexit (gomp_target_fini
) != 0)
5149 gomp_fatal ("atexit failed");
5152 #else /* PLUGIN_SUPPORT */
5153 /* If dlfcn.h is unavailable we always fallback to host execution.
5154 GOMP_target* routines are just stubs for this case. */
5156 gomp_target_init (void)
5159 #endif /* PLUGIN_SUPPORT */