1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
143 struct gomp_task_icv
*icv
= gomp_icv (false);
144 device_id
= icv
->default_device_var
;
150 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device
))
153 if (device_id
== omp_invalid_device
)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
161 else if (device_id
>= gomp_get_num_devices ())
163 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id
!= num_devices_openmp
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
171 gomp_mutex_lock (&devices
[device_id
].lock
);
172 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
173 gomp_init_device (&devices
[device_id
]);
174 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
176 gomp_mutex_unlock (&devices
[device_id
].lock
);
178 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 return &devices
[device_id
];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
193 if (key
->host_start
!= key
->host_end
)
194 return splay_tree_lookup (mem_map
, key
);
197 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
202 n
= splay_tree_lookup (mem_map
, key
);
206 return splay_tree_lookup (mem_map
, key
);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
212 return reverse_splay_tree_lookup (mem_map_rev
, key
);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
218 if (key
->host_start
!= key
->host_end
)
219 return splay_tree_lookup (mem_map
, key
);
222 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
228 gomp_device_copy (struct gomp_device_descr
*devicep
,
229 bool (*copy_func
) (int, void *, const void *, size_t),
230 const char *dst
, void *dstaddr
,
231 const char *src
, const void *srcaddr
,
234 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
236 gomp_mutex_unlock (&devicep
->lock
);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
243 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
244 bool (*copy_func
) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue
*),
246 const char *dst
, void *dstaddr
,
247 const char *src
, const void *srcaddr
,
248 const void *srcaddr_orig
,
249 size_t size
, struct goacc_asyncqueue
*aq
)
251 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
253 gomp_mutex_unlock (&devicep
->lock
);
254 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src
, srcaddr_orig
, srcaddr_orig
+ size
,
259 src
, srcaddr
, srcaddr
+ size
,
260 dst
, dstaddr
, dstaddr
+ size
);
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src
, srcaddr
, srcaddr
+ size
,
265 dst
, dstaddr
, dstaddr
+ size
);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
283 struct target_mem_desc
*tgt
;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk
*chunks
;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example). The exception is for EPHEMERAL data, that we know is available
314 already "by construction". */
317 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
319 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
323 if (cbuf
->chunk_cnt
< 0)
325 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
327 cbuf
->chunk_cnt
= -1;
330 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
332 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
336 /* If the last chunk is only used by one mapping, discard it,
337 as it will be one host to device copy anyway and
338 memcpying it around will only waste cycles. */
339 if (cbuf
->use_cnt
== 1)
342 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
343 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
348 /* Return true for mapping kinds which need to copy data from the
349 host to device for regions that weren't previously mapped. */
352 gomp_to_device_kind_p (int kind
)
358 case GOMP_MAP_FORCE_ALLOC
:
359 case GOMP_MAP_FORCE_FROM
:
360 case GOMP_MAP_ALWAYS_FROM
:
367 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
368 non-NULL), when the source data is stack or may otherwise be deallocated
369 before the asynchronous copy takes place, EPHEMERAL must be passed as
372 attribute_hidden
void
373 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
374 struct goacc_asyncqueue
*aq
,
375 void *d
, const void *h
, size_t sz
,
376 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
380 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
381 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
384 long last
= cbuf
->chunk_cnt
- 1;
385 while (first
<= last
)
387 long middle
= (first
+ last
) >> 1;
388 if (cbuf
->chunks
[middle
].end
<= doff
)
390 else if (cbuf
->chunks
[middle
].start
<= doff
)
392 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
394 gomp_mutex_unlock (&devicep
->lock
);
395 gomp_fatal ("internal libgomp cbuf error");
398 /* In an asynchronous context, verify that CBUF isn't used
399 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
400 if (__builtin_expect (aq
!= NULL
, 0))
403 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
413 if (__builtin_expect (aq
!= NULL
, 0))
415 void *h_buf
= (void *) h
;
418 /* We're queueing up an asynchronous copy from data that may
419 disappear before the transfer takes place (i.e. because it is a
420 stack local in a function that is no longer executing). As we've
421 not been able to use CBUF, make a copy of the data into a
423 h_buf
= gomp_malloc (sz
);
424 memcpy (h_buf
, h
, sz
);
426 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
427 "dev", d
, "host", h_buf
, h
, sz
, aq
);
429 /* Free once the transfer has completed. */
430 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
433 gomp_device_copy (devicep
, devicep
->host2dev_func
,
434 "dev", d
, "host", h
, sz
);
437 attribute_hidden
void
438 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
439 struct goacc_asyncqueue
*aq
,
440 void *h
, const void *d
, size_t sz
)
442 if (__builtin_expect (aq
!= NULL
, 0))
443 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
444 "host", h
, "dev", d
, NULL
, sz
, aq
);
446 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
450 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
452 if (!devicep
->free_func (devicep
->target_id
, devptr
))
454 gomp_mutex_unlock (&devicep
->lock
);
455 gomp_fatal ("error in freeing device memory block at %p", devptr
);
459 /* Increment reference count of a splay_tree_key region K by 1.
460 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
461 increment the value if refcount is not yet contained in the set (used for
462 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
463 once for each construct). */
466 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
468 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
471 uintptr_t *refcount_ptr
= &k
->refcount
;
473 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
474 refcount_ptr
= &k
->structelem_refcount
;
475 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
476 refcount_ptr
= k
->structelem_refcount_ptr
;
480 if (htab_find (*refcount_set
, refcount_ptr
))
482 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
483 *slot
= refcount_ptr
;
490 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
491 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
492 track already seen refcounts, and only adjust the value if refcount is not
493 yet contained in the set (like gomp_increment_refcount).
495 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
496 it is already zero and we know we decremented it earlier. This signals that
497 associated maps should be copied back to host.
499 *DO_REMOVE is set to true when we this is the first handling of this refcount
500 and we are setting it to zero. This signals a removal of this key from the
503 Copy and removal are separated due to cases like handling of structure
504 elements, e.g. each map of a structure element representing a possible copy
505 out of a structure field has to be handled individually, but we only signal
506 removal for one (the first encountered) sibing map. */
509 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
510 bool *do_copy
, bool *do_remove
)
512 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
514 *do_copy
= *do_remove
= false;
518 uintptr_t *refcount_ptr
= &k
->refcount
;
520 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
521 refcount_ptr
= &k
->structelem_refcount
;
522 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
523 refcount_ptr
= k
->structelem_refcount_ptr
;
525 bool new_encountered_refcount
;
526 bool set_to_zero
= false;
527 bool is_zero
= false;
529 uintptr_t orig_refcount
= *refcount_ptr
;
533 if (htab_find (*refcount_set
, refcount_ptr
))
535 new_encountered_refcount
= false;
539 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
540 *slot
= refcount_ptr
;
541 new_encountered_refcount
= true;
544 /* If no refcount_set being used, assume all keys are being decremented
545 for the first time. */
546 new_encountered_refcount
= true;
550 else if (*refcount_ptr
> 0)
554 if (*refcount_ptr
== 0)
556 if (orig_refcount
> 0)
562 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
563 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
566 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
567 gomp_map_0len_lookup found oldn for newn.
568 Helper function of gomp_map_vars. */
571 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
572 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
573 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
574 unsigned char kind
, bool always_to_flag
, bool implicit
,
575 struct gomp_coalesce_buf
*cbuf
,
576 htab_t
*refcount_set
)
578 assert (kind
!= GOMP_MAP_ATTACH
579 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
582 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
583 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
584 tgt_var
->is_attach
= false;
585 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
587 /* For implicit maps, old contained in new is valid. */
588 bool implicit_subset
= (implicit
589 && newn
->host_start
<= oldn
->host_start
590 && oldn
->host_end
<= newn
->host_end
);
592 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
594 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
596 if ((kind
& GOMP_MAP_FLAG_FORCE
)
597 /* For implicit maps, old contained in new is valid. */
599 /* Otherwise, new contained inside old is considered valid. */
600 || (oldn
->host_start
<= newn
->host_start
601 && newn
->host_end
<= oldn
->host_end
)))
603 gomp_mutex_unlock (&devicep
->lock
);
604 gomp_fatal ("Trying to map into device [%p..%p) object when "
605 "[%p..%p) is already mapped",
606 (void *) newn
->host_start
, (void *) newn
->host_end
,
607 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
610 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
612 /* Implicit + always should not happen. If this does occur, below
613 address/length adjustment is a TODO. */
614 assert (!implicit_subset
);
616 if (oldn
->aux
&& oldn
->aux
->attach_count
)
618 /* We have to be careful not to overwrite still attached pointers
619 during the copyback to host. */
620 uintptr_t addr
= newn
->host_start
;
621 while (addr
< newn
->host_end
)
623 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
624 if (oldn
->aux
->attach_count
[i
] == 0)
625 gomp_copy_host2dev (devicep
, aq
,
626 (void *) (oldn
->tgt
->tgt_start
628 + addr
- oldn
->host_start
),
630 sizeof (void *), false, cbuf
);
631 addr
+= sizeof (void *);
635 gomp_copy_host2dev (devicep
, aq
,
636 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
637 + newn
->host_start
- oldn
->host_start
),
638 (void *) newn
->host_start
,
639 newn
->host_end
- newn
->host_start
, false, cbuf
);
642 gomp_increment_refcount (oldn
, refcount_set
);
646 get_kind (bool short_mapkind
, void *kinds
, int idx
)
649 return ((unsigned char *) kinds
)[idx
];
651 int val
= ((unsigned short *) kinds
)[idx
];
652 if (GOMP_MAP_IMPLICIT_P (val
))
653 val
&= ~GOMP_MAP_IMPLICIT
;
659 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
664 int val
= ((unsigned short *) kinds
)[idx
];
665 return GOMP_MAP_IMPLICIT_P (val
);
669 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
670 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
671 struct gomp_coalesce_buf
*cbuf
,
672 bool allow_zero_length_array_sections
)
674 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
675 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
676 struct splay_tree_key_s cur_node
;
678 cur_node
.host_start
= host_ptr
;
679 if (cur_node
.host_start
== (uintptr_t) NULL
)
681 cur_node
.tgt_offset
= (uintptr_t) NULL
;
682 gomp_copy_host2dev (devicep
, aq
,
683 (void *) (tgt
->tgt_start
+ target_offset
),
684 (void *) &cur_node
.tgt_offset
, sizeof (void *),
688 /* Add bias to the pointer value. */
689 cur_node
.host_start
+= bias
;
690 cur_node
.host_end
= cur_node
.host_start
;
691 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
694 if (allow_zero_length_array_sections
)
695 cur_node
.tgt_offset
= 0;
698 gomp_mutex_unlock (&devicep
->lock
);
699 gomp_fatal ("Pointer target of array section wasn't mapped");
704 cur_node
.host_start
-= n
->host_start
;
706 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
707 /* At this point tgt_offset is target address of the
708 array section. Now subtract bias to get what we want
709 to initialize the pointer with. */
710 cur_node
.tgt_offset
-= bias
;
712 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
713 (void *) &cur_node
.tgt_offset
, sizeof (void *),
718 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
719 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
720 size_t first
, size_t i
, void **hostaddrs
,
721 size_t *sizes
, void *kinds
,
722 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
724 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
725 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
726 struct splay_tree_key_s cur_node
;
729 const bool short_mapkind
= true;
730 const int typemask
= short_mapkind
? 0xff : 0x7;
732 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
733 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
734 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
735 kind
= get_kind (short_mapkind
, kinds
, i
);
736 implicit
= get_implicit (short_mapkind
, kinds
, i
);
739 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
741 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
742 kind
& typemask
, false, implicit
, cbuf
,
748 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
750 cur_node
.host_start
--;
751 n2
= splay_tree_lookup (mem_map
, &cur_node
);
752 cur_node
.host_start
++;
755 && n2
->host_start
- n
->host_start
756 == n2
->tgt_offset
- n
->tgt_offset
)
758 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
759 kind
& typemask
, false, implicit
, cbuf
,
765 n2
= splay_tree_lookup (mem_map
, &cur_node
);
769 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
771 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
772 kind
& typemask
, false, implicit
, cbuf
,
777 gomp_mutex_unlock (&devicep
->lock
);
778 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
779 "other mapped elements from the same structure weren't mapped "
780 "together with it", (void *) cur_node
.host_start
,
781 (void *) cur_node
.host_end
);
784 attribute_hidden
void
785 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
786 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
787 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
788 struct gomp_coalesce_buf
*cbufp
,
789 bool allow_zero_length_array_sections
)
791 struct splay_tree_key_s s
;
796 gomp_mutex_unlock (&devicep
->lock
);
797 gomp_fatal ("enclosing struct not mapped for attach");
800 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
801 /* We might have a pointer in a packed struct: however we cannot have more
802 than one such pointer in each pointer-sized portion of the struct, so
804 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
807 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
809 if (!n
->aux
->attach_count
)
811 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
813 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
814 n
->aux
->attach_count
[idx
]++;
817 gomp_mutex_unlock (&devicep
->lock
);
818 gomp_fatal ("attach count overflow");
821 if (n
->aux
->attach_count
[idx
] == 1)
823 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
825 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
829 if ((void *) target
== NULL
)
831 gomp_mutex_unlock (&devicep
->lock
);
832 gomp_fatal ("attempt to attach null pointer");
835 s
.host_start
= target
+ bias
;
836 s
.host_end
= s
.host_start
+ 1;
837 tn
= splay_tree_lookup (mem_map
, &s
);
841 if (allow_zero_length_array_sections
)
842 /* When allowing attachment to zero-length array sections, we
843 allow attaching to NULL pointers when the target region is not
848 gomp_mutex_unlock (&devicep
->lock
);
849 gomp_fatal ("pointer target not mapped for attach");
853 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
856 "%s: attaching host %p, target %p (struct base %p) to %p\n",
857 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
858 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
860 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
861 sizeof (void *), true, cbufp
);
864 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
865 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
868 attribute_hidden
void
869 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
870 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
871 uintptr_t detach_from
, bool finalize
,
872 struct gomp_coalesce_buf
*cbufp
)
878 gomp_mutex_unlock (&devicep
->lock
);
879 gomp_fatal ("enclosing struct not mapped for detach");
882 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
884 if (!n
->aux
|| !n
->aux
->attach_count
)
886 gomp_mutex_unlock (&devicep
->lock
);
887 gomp_fatal ("no attachment counters for struct");
891 n
->aux
->attach_count
[idx
] = 1;
893 if (n
->aux
->attach_count
[idx
] == 0)
895 gomp_mutex_unlock (&devicep
->lock
);
896 gomp_fatal ("attach count underflow");
899 n
->aux
->attach_count
[idx
]--;
901 if (n
->aux
->attach_count
[idx
] == 0)
903 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
905 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
908 "%s: detaching host %p, target %p (struct base %p) to %p\n",
909 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
910 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
913 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
914 sizeof (void *), true, cbufp
);
917 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
918 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
921 attribute_hidden
uintptr_t
922 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
924 if (tgt
->list
[i
].key
!= NULL
)
925 return tgt
->list
[i
].key
->tgt
->tgt_start
926 + tgt
->list
[i
].key
->tgt_offset
927 + tgt
->list
[i
].offset
;
929 switch (tgt
->list
[i
].offset
)
932 return (uintptr_t) hostaddrs
[i
];
938 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
939 + tgt
->list
[i
+ 1].key
->tgt_offset
940 + tgt
->list
[i
+ 1].offset
941 + (uintptr_t) hostaddrs
[i
]
942 - (uintptr_t) hostaddrs
[i
+ 1];
945 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
949 static inline __attribute__((always_inline
)) struct target_mem_desc
*
950 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
951 struct goacc_asyncqueue
*aq
, size_t mapnum
,
952 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
953 void *kinds
, bool short_mapkind
,
954 htab_t
*refcount_set
,
955 enum gomp_map_vars_kind pragma_kind
)
957 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
958 bool has_firstprivate
= false;
959 bool has_always_ptrset
= false;
960 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
961 const int rshift
= short_mapkind
? 8 : 3;
962 const int typemask
= short_mapkind
? 0xff : 0x7;
963 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
964 struct splay_tree_key_s cur_node
;
965 struct target_mem_desc
*tgt
966 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
967 tgt
->list_count
= mapnum
;
968 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
969 tgt
->device_descr
= devicep
;
971 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
980 tgt_align
= sizeof (void *);
986 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
988 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
989 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
992 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
994 size_t align
= 4 * sizeof (void *);
996 tgt_size
= mapnum
* sizeof (void *);
998 cbuf
.use_cnt
= 1 + (mapnum
> 1);
999 cbuf
.chunks
[0].start
= 0;
1000 cbuf
.chunks
[0].end
= tgt_size
;
1003 gomp_mutex_lock (&devicep
->lock
);
1004 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1006 gomp_mutex_unlock (&devicep
->lock
);
1011 for (i
= 0; i
< mapnum
; i
++)
1013 int kind
= get_kind (short_mapkind
, kinds
, i
);
1014 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1015 if (hostaddrs
[i
] == NULL
1016 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1018 tgt
->list
[i
].key
= NULL
;
1019 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1022 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1023 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1025 tgt
->list
[i
].key
= NULL
;
1028 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1029 on a separate construct prior to using use_device_{addr,ptr}.
1030 In OpenMP 5.0, map directives need to be ordered by the
1031 middle-end before the use_device_* clauses. If
1032 !not_found_cnt, all mappings requested (if any) are already
1033 mapped, so use_device_{addr,ptr} can be resolved right away.
1034 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1035 now but would succeed after performing the mappings in the
1036 following loop. We can't defer this always to the second
1037 loop, because it is not even invoked when !not_found_cnt
1038 after the first loop. */
1039 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1040 cur_node
.host_end
= cur_node
.host_start
;
1041 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1044 cur_node
.host_start
-= n
->host_start
;
1046 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1047 + cur_node
.host_start
);
1049 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1051 gomp_mutex_unlock (&devicep
->lock
);
1052 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1054 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1055 /* If not present, continue using the host address. */
1058 __builtin_unreachable ();
1059 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1062 tgt
->list
[i
].offset
= 0;
1065 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1067 size_t first
= i
+ 1;
1068 size_t last
= i
+ sizes
[i
];
1069 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1070 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1072 tgt
->list
[i
].key
= NULL
;
1073 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1074 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1077 size_t align
= (size_t) 1 << (kind
>> rshift
);
1078 if (tgt_align
< align
)
1080 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1081 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1082 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1083 not_found_cnt
+= last
- i
;
1084 for (i
= first
; i
<= last
; i
++)
1086 tgt
->list
[i
].key
= NULL
;
1088 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1090 gomp_coalesce_buf_add (&cbuf
,
1091 tgt_size
- cur_node
.host_end
1092 + (uintptr_t) hostaddrs
[i
],
1098 for (i
= first
; i
<= last
; i
++)
1099 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1100 sizes
, kinds
, NULL
, refcount_set
);
1104 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1106 tgt
->list
[i
].key
= NULL
;
1107 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1108 has_firstprivate
= true;
1111 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1112 || ((kind
& typemask
)
1113 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1115 tgt
->list
[i
].key
= NULL
;
1116 has_firstprivate
= true;
1119 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1120 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1121 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1123 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1124 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1126 tgt
->list
[i
].key
= NULL
;
1128 size_t align
= (size_t) 1 << (kind
>> rshift
);
1129 if (tgt_align
< align
)
1131 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1133 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1134 cur_node
.host_end
- cur_node
.host_start
);
1135 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1136 has_firstprivate
= true;
1140 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1142 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1145 tgt
->list
[i
].key
= NULL
;
1146 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1151 n
= splay_tree_lookup (mem_map
, &cur_node
);
1152 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1154 int always_to_cnt
= 0;
1155 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1157 bool has_nullptr
= false;
1159 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1160 if (n
->tgt
->list
[j
].key
== n
)
1162 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1165 if (n
->tgt
->list_count
== 0)
1167 /* 'declare target'; assume has_nullptr; it could also be
1168 statically assigned pointer, but that it should be to
1169 the equivalent variable on the host. */
1170 assert (n
->refcount
== REFCOUNT_INFINITY
);
1174 assert (j
< n
->tgt
->list_count
);
1175 /* Re-map the data if there is an 'always' modifier or if it a
1176 null pointer was there and non a nonnull has been found; that
1177 permits transparent re-mapping for Fortran array descriptors
1178 which were previously mapped unallocated. */
1179 for (j
= i
+ 1; j
< mapnum
; j
++)
1181 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1182 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1184 || !GOMP_MAP_POINTER_P (ptr_kind
)
1185 || *(void **) hostaddrs
[j
] == NULL
))
1187 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1188 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1189 > cur_node
.host_end
))
1193 has_always_ptrset
= true;
1198 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1199 kind
& typemask
, always_to_cnt
> 0, implicit
,
1200 NULL
, refcount_set
);
1205 tgt
->list
[i
].key
= NULL
;
1207 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1209 /* Not present, hence, skip entry - including its MAP_POINTER,
1211 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1213 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1214 == GOMP_MAP_POINTER
))
1217 tgt
->list
[i
].key
= NULL
;
1218 tgt
->list
[i
].offset
= 0;
1222 size_t align
= (size_t) 1 << (kind
>> rshift
);
1224 if (tgt_align
< align
)
1226 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1228 && gomp_to_device_kind_p (kind
& typemask
))
1229 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1230 cur_node
.host_end
- cur_node
.host_start
);
1231 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1232 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1236 for (j
= i
+ 1; j
< mapnum
; j
++)
1237 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1238 kinds
, j
)) & typemask
))
1239 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1241 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1242 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1243 > cur_node
.host_end
))
1247 tgt
->list
[j
].key
= NULL
;
1258 gomp_mutex_unlock (&devicep
->lock
);
1259 gomp_fatal ("unexpected aggregation");
1261 tgt
->to_free
= devaddrs
[0];
1262 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1263 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1265 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1267 /* Allocate tgt_align aligned tgt_size block of memory. */
1268 /* FIXME: Perhaps change interface to allocate properly aligned
1270 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1271 tgt_size
+ tgt_align
- 1);
1274 gomp_mutex_unlock (&devicep
->lock
);
1275 gomp_fatal ("device memory allocation fail");
1278 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1279 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1280 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1282 if (cbuf
.use_cnt
== 1)
1284 if (cbuf
.chunk_cnt
> 0)
1287 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1297 tgt
->to_free
= NULL
;
1303 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1304 tgt_size
= mapnum
* sizeof (void *);
1307 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1310 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1311 splay_tree_node array
= tgt
->array
;
1312 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1313 uintptr_t field_tgt_base
= 0;
1314 splay_tree_key field_tgt_structelem_first
= NULL
;
1316 for (i
= 0; i
< mapnum
; i
++)
1317 if (has_always_ptrset
1319 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1320 == GOMP_MAP_TO_PSET
)
1322 splay_tree_key k
= tgt
->list
[i
].key
;
1323 bool has_nullptr
= false;
1325 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1326 if (k
->tgt
->list
[j
].key
== k
)
1328 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1331 if (k
->tgt
->list_count
== 0)
1334 assert (j
< k
->tgt
->list_count
);
1336 tgt
->list
[i
].has_null_ptr_assoc
= false;
1337 for (j
= i
+ 1; j
< mapnum
; j
++)
1339 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1340 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1342 || !GOMP_MAP_POINTER_P (ptr_kind
)
1343 || *(void **) hostaddrs
[j
] == NULL
))
1345 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1346 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1351 if (*(void **) hostaddrs
[j
] == NULL
)
1352 tgt
->list
[i
].has_null_ptr_assoc
= true;
1353 tgt
->list
[j
].key
= k
;
1354 tgt
->list
[j
].copy_from
= false;
1355 tgt
->list
[j
].always_copy_from
= false;
1356 tgt
->list
[j
].is_attach
= false;
1357 gomp_increment_refcount (k
, refcount_set
);
1358 gomp_map_pointer (k
->tgt
, aq
,
1359 (uintptr_t) *(void **) hostaddrs
[j
],
1360 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1362 sizes
[j
], cbufp
, false);
1367 else if (tgt
->list
[i
].key
== NULL
)
1369 int kind
= get_kind (short_mapkind
, kinds
, i
);
1370 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1371 if (hostaddrs
[i
] == NULL
)
1373 switch (kind
& typemask
)
1375 size_t align
, len
, first
, last
;
1377 case GOMP_MAP_FIRSTPRIVATE
:
1378 align
= (size_t) 1 << (kind
>> rshift
);
1379 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1380 tgt
->list
[i
].offset
= tgt_size
;
1382 gomp_copy_host2dev (devicep
, aq
,
1383 (void *) (tgt
->tgt_start
+ tgt_size
),
1384 (void *) hostaddrs
[i
], len
, false, cbufp
);
1385 /* Save device address in hostaddr to permit latter availablity
1386 when doing a deep-firstprivate with pointer attach. */
1387 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1390 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1391 firstprivate to hostaddrs[i+1], which is assumed to contain a
1395 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1397 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1398 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1400 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1401 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1402 this probably needs revision for 'aq' usage. */
1404 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1405 sizeof (void *), false, cbufp
);
1409 case GOMP_MAP_FIRSTPRIVATE_INT
:
1410 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1412 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1413 /* The OpenACC 'host_data' construct only allows 'use_device'
1414 "mapping" clauses, so in the first loop, 'not_found_cnt'
1415 must always have been zero, so all OpenACC 'use_device'
1416 clauses have already been handled. (We can only easily test
1417 'use_device' with 'if_present' clause here.) */
1418 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1419 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1420 code conceptually simple, similar to the first loop. */
1421 case GOMP_MAP_USE_DEVICE_PTR
:
1422 if (tgt
->list
[i
].offset
== 0)
1424 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1425 cur_node
.host_end
= cur_node
.host_start
;
1426 n
= gomp_map_lookup (mem_map
, &cur_node
);
1429 cur_node
.host_start
-= n
->host_start
;
1431 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1432 + cur_node
.host_start
);
1434 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1436 gomp_mutex_unlock (&devicep
->lock
);
1437 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1439 else if ((kind
& typemask
)
1440 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1441 /* If not present, continue using the host address. */
1444 __builtin_unreachable ();
1445 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1448 case GOMP_MAP_STRUCT
:
1450 last
= i
+ sizes
[i
];
1451 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1452 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1454 if (tgt
->list
[first
].key
!= NULL
)
1456 n
= splay_tree_lookup (mem_map
, &cur_node
);
1459 size_t align
= (size_t) 1 << (kind
>> rshift
);
1460 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1461 - (uintptr_t) hostaddrs
[i
];
1462 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1463 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1464 - (uintptr_t) hostaddrs
[i
];
1465 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1466 field_tgt_offset
= tgt_size
;
1467 field_tgt_clear
= last
;
1468 field_tgt_structelem_first
= NULL
;
1469 tgt_size
+= cur_node
.host_end
1470 - (uintptr_t) hostaddrs
[first
];
1473 for (i
= first
; i
<= last
; i
++)
1474 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1475 sizes
, kinds
, cbufp
, refcount_set
);
1478 case GOMP_MAP_ALWAYS_POINTER
:
1479 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1480 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1481 n
= splay_tree_lookup (mem_map
, &cur_node
);
1483 || n
->host_start
> cur_node
.host_start
1484 || n
->host_end
< cur_node
.host_end
)
1486 gomp_mutex_unlock (&devicep
->lock
);
1487 gomp_fatal ("always pointer not mapped");
1490 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1491 != GOMP_MAP_ALWAYS_POINTER
))
1492 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1493 if (cur_node
.tgt_offset
)
1494 cur_node
.tgt_offset
-= sizes
[i
];
1495 gomp_copy_host2dev (devicep
, aq
,
1496 (void *) (n
->tgt
->tgt_start
1498 + cur_node
.host_start
1500 (void *) &cur_node
.tgt_offset
,
1501 sizeof (void *), true, cbufp
);
1502 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1503 + cur_node
.host_start
- n
->host_start
;
1505 case GOMP_MAP_IF_PRESENT
:
1506 /* Not present - otherwise handled above. Skip over its
1507 MAP_POINTER as well. */
1509 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1510 == GOMP_MAP_POINTER
))
1513 case GOMP_MAP_ATTACH
:
1514 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1516 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1517 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1518 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1521 tgt
->list
[i
].key
= n
;
1522 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1523 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1524 tgt
->list
[i
].copy_from
= false;
1525 tgt
->list
[i
].always_copy_from
= false;
1526 tgt
->list
[i
].is_attach
= true;
1527 /* OpenACC 'attach'/'detach' doesn't affect
1528 structured/dynamic reference counts ('n->refcount',
1529 'n->dynamic_refcount'). */
1532 = ((kind
& typemask
)
1533 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1534 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1535 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1538 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1540 gomp_mutex_unlock (&devicep
->lock
);
1541 gomp_fatal ("outer struct not mapped for attach");
1548 splay_tree_key k
= &array
->key
;
1549 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1550 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1551 k
->host_end
= k
->host_start
+ sizes
[i
];
1553 k
->host_end
= k
->host_start
+ sizeof (void *);
1554 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1555 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1556 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1557 kind
& typemask
, false, implicit
, cbufp
,
1562 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1564 /* Replace target address of the pointer with target address
1565 of mapped object in the splay tree. */
1566 splay_tree_remove (mem_map
, n
);
1568 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1569 k
->aux
->link_key
= n
;
1571 size_t align
= (size_t) 1 << (kind
>> rshift
);
1572 tgt
->list
[i
].key
= k
;
1575 k
->dynamic_refcount
= 0;
1576 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1578 k
->tgt_offset
= k
->host_start
- field_tgt_base
1582 k
->refcount
= REFCOUNT_STRUCTELEM
;
1583 if (field_tgt_structelem_first
== NULL
)
1585 /* Set to first structure element of sequence. */
1586 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1587 field_tgt_structelem_first
= k
;
1590 /* Point to refcount of leading element, but do not
1592 k
->structelem_refcount_ptr
1593 = &field_tgt_structelem_first
->structelem_refcount
;
1595 if (i
== field_tgt_clear
)
1597 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1598 field_tgt_structelem_first
= NULL
;
1601 if (i
== field_tgt_clear
)
1602 field_tgt_clear
= FIELD_TGT_EMPTY
;
1606 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1607 k
->tgt_offset
= tgt_size
;
1608 tgt_size
+= k
->host_end
- k
->host_start
;
1610 /* First increment, from 0 to 1. gomp_increment_refcount
1611 encapsulates the different increment cases, so use this
1612 instead of directly setting 1 during initialization. */
1613 gomp_increment_refcount (k
, refcount_set
);
1615 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1616 tgt
->list
[i
].always_copy_from
1617 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1618 tgt
->list
[i
].is_attach
= false;
1619 tgt
->list
[i
].offset
= 0;
1620 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1623 array
->right
= NULL
;
1624 splay_tree_insert (mem_map
, array
);
1625 switch (kind
& typemask
)
1627 case GOMP_MAP_ALLOC
:
1629 case GOMP_MAP_FORCE_ALLOC
:
1630 case GOMP_MAP_FORCE_FROM
:
1631 case GOMP_MAP_ALWAYS_FROM
:
1634 case GOMP_MAP_TOFROM
:
1635 case GOMP_MAP_FORCE_TO
:
1636 case GOMP_MAP_FORCE_TOFROM
:
1637 case GOMP_MAP_ALWAYS_TO
:
1638 case GOMP_MAP_ALWAYS_TOFROM
:
1639 gomp_copy_host2dev (devicep
, aq
,
1640 (void *) (tgt
->tgt_start
1642 (void *) k
->host_start
,
1643 k
->host_end
- k
->host_start
,
1646 case GOMP_MAP_POINTER
:
1647 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1649 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1650 k
->tgt_offset
, sizes
[i
], cbufp
,
1652 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1654 case GOMP_MAP_TO_PSET
:
1655 gomp_copy_host2dev (devicep
, aq
,
1656 (void *) (tgt
->tgt_start
1658 (void *) k
->host_start
,
1659 k
->host_end
- k
->host_start
,
1661 tgt
->list
[i
].has_null_ptr_assoc
= false;
1663 for (j
= i
+ 1; j
< mapnum
; j
++)
1665 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1667 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1668 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1670 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1671 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1676 tgt
->list
[j
].key
= k
;
1677 tgt
->list
[j
].copy_from
= false;
1678 tgt
->list
[j
].always_copy_from
= false;
1679 tgt
->list
[j
].is_attach
= false;
1680 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1681 /* For OpenMP, the use of refcount_sets causes
1682 errors if we set k->refcount = 1 above but also
1683 increment it again here, for decrementing will
1684 not properly match, since we decrement only once
1685 for each key's refcount. Therefore avoid this
1686 increment for OpenMP constructs. */
1688 gomp_increment_refcount (k
, refcount_set
);
1689 gomp_map_pointer (tgt
, aq
,
1690 (uintptr_t) *(void **) hostaddrs
[j
],
1692 + ((uintptr_t) hostaddrs
[j
]
1694 sizes
[j
], cbufp
, false);
1699 case GOMP_MAP_FORCE_PRESENT
:
1701 /* We already looked up the memory region above and it
1703 size_t size
= k
->host_end
- k
->host_start
;
1704 gomp_mutex_unlock (&devicep
->lock
);
1705 #ifdef HAVE_INTTYPES_H
1706 gomp_fatal ("present clause: !acc_is_present (%p, "
1707 "%"PRIu64
" (0x%"PRIx64
"))",
1708 (void *) k
->host_start
,
1709 (uint64_t) size
, (uint64_t) size
);
1711 gomp_fatal ("present clause: !acc_is_present (%p, "
1712 "%lu (0x%lx))", (void *) k
->host_start
,
1713 (unsigned long) size
, (unsigned long) size
);
1717 case GOMP_MAP_FORCE_DEVICEPTR
:
1718 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1719 gomp_copy_host2dev (devicep
, aq
,
1720 (void *) (tgt
->tgt_start
1722 (void *) k
->host_start
,
1723 sizeof (void *), false, cbufp
);
1726 gomp_mutex_unlock (&devicep
->lock
);
1727 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1731 if (k
->aux
&& k
->aux
->link_key
)
1733 /* Set link pointer on target to the device address of the
1735 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1736 /* We intentionally do not use coalescing here, as it's not
1737 data allocated by the current call to this function. */
1738 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1739 &tgt_addr
, sizeof (void *), true, NULL
);
1746 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1748 for (i
= 0; i
< mapnum
; i
++)
1750 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1751 gomp_copy_host2dev (devicep
, aq
,
1752 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1753 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1761 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1762 gomp_copy_host2dev (devicep
, aq
,
1763 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1764 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1765 - cbuf
.chunks
[0].start
),
1766 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1769 /* Free once the transfer has completed. */
1770 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1777 /* If the variable from "omp target enter data" map-list was already mapped,
1778 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1780 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1786 gomp_mutex_unlock (&devicep
->lock
);
1790 static struct target_mem_desc
*
1791 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1792 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1793 bool short_mapkind
, htab_t
*refcount_set
,
1794 enum gomp_map_vars_kind pragma_kind
)
1796 /* This management of a local refcount_set is for convenience of callers
1797 who do not share a refcount_set over multiple map/unmap uses. */
1798 htab_t local_refcount_set
= NULL
;
1799 if (refcount_set
== NULL
)
1801 local_refcount_set
= htab_create (mapnum
);
1802 refcount_set
= &local_refcount_set
;
1805 struct target_mem_desc
*tgt
;
1806 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1807 sizes
, kinds
, short_mapkind
, refcount_set
,
1809 if (local_refcount_set
)
1810 htab_free (local_refcount_set
);
1815 attribute_hidden
struct target_mem_desc
*
1816 goacc_map_vars (struct gomp_device_descr
*devicep
,
1817 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1818 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1819 void *kinds
, bool short_mapkind
,
1820 enum gomp_map_vars_kind pragma_kind
)
1822 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1823 sizes
, kinds
, short_mapkind
, NULL
,
1824 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1828 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1830 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1832 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1839 gomp_unref_tgt (void *ptr
)
1841 bool is_tgt_unmapped
= false;
1843 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1845 if (tgt
->refcount
> 1)
1849 gomp_unmap_tgt (tgt
);
1850 is_tgt_unmapped
= true;
1853 return is_tgt_unmapped
;
1857 gomp_unref_tgt_void (void *ptr
)
1859 (void) gomp_unref_tgt (ptr
);
1863 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1865 splay_tree_remove (sp
, k
);
1868 if (k
->aux
->link_key
)
1869 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1870 if (k
->aux
->attach_count
)
1871 free (k
->aux
->attach_count
);
1877 static inline __attribute__((always_inline
)) bool
1878 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1879 struct goacc_asyncqueue
*aq
)
1881 bool is_tgt_unmapped
= false;
1883 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1885 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1886 /* Infer the splay_tree_key of the first structelem key using the
1887 pointer to the first structleme_refcount. */
1888 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1889 - offsetof (struct splay_tree_key_s
,
1890 structelem_refcount
));
1891 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1893 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1894 with the splay_tree_keys embedded inside. */
1895 splay_tree_node node
=
1896 (splay_tree_node
) ((char *) k
1897 - offsetof (struct splay_tree_node_s
, key
));
1900 /* Starting from the _FIRST key, and continue for all following
1902 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1903 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1910 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1913 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1916 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1917 return is_tgt_unmapped
;
1920 attribute_hidden
bool
1921 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1923 return gomp_remove_var_internal (devicep
, k
, NULL
);
1926 /* Remove a variable asynchronously. This actually removes the variable
1927 mapping immediately, but retains the linked target_mem_desc until the
1928 asynchronous operation has completed (as it may still refer to target
1929 memory). The device lock must be held before entry, and remains locked on
1932 attribute_hidden
void
1933 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1934 struct goacc_asyncqueue
*aq
)
1936 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1939 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1940 variables back from device to host: if it is false, it is assumed that this
1941 has been done already. */
1943 static inline __attribute__((always_inline
)) void
1944 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1945 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1947 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1949 if (tgt
->list_count
== 0)
1955 gomp_mutex_lock (&devicep
->lock
);
1956 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1958 gomp_mutex_unlock (&devicep
->lock
);
1966 /* We must perform detachments before any copies back to the host. */
1967 for (i
= 0; i
< tgt
->list_count
; i
++)
1969 splay_tree_key k
= tgt
->list
[i
].key
;
1971 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1972 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1973 + tgt
->list
[i
].offset
,
1977 for (i
= 0; i
< tgt
->list_count
; i
++)
1979 splay_tree_key k
= tgt
->list
[i
].key
;
1983 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1984 counts ('n->refcount', 'n->dynamic_refcount'). */
1985 if (tgt
->list
[i
].is_attach
)
1988 bool do_copy
, do_remove
;
1989 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1991 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1992 || tgt
->list
[i
].always_copy_from
)
1993 gomp_copy_dev2host (devicep
, aq
,
1994 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1995 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1996 + tgt
->list
[i
].offset
),
1997 tgt
->list
[i
].length
);
2000 struct target_mem_desc
*k_tgt
= k
->tgt
;
2001 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2002 /* It would be bad if TGT got unmapped while we're still iterating
2003 over its LIST_COUNT, and also expect to use it in the following
2005 assert (!is_tgt_unmapped
2011 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2014 gomp_unref_tgt ((void *) tgt
);
2016 gomp_mutex_unlock (&devicep
->lock
);
2020 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2021 htab_t
*refcount_set
)
2023 /* This management of a local refcount_set is for convenience of callers
2024 who do not share a refcount_set over multiple map/unmap uses. */
2025 htab_t local_refcount_set
= NULL
;
2026 if (refcount_set
== NULL
)
2028 local_refcount_set
= htab_create (tgt
->list_count
);
2029 refcount_set
= &local_refcount_set
;
2032 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2034 if (local_refcount_set
)
2035 htab_free (local_refcount_set
);
2038 attribute_hidden
void
2039 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2040 struct goacc_asyncqueue
*aq
)
2042 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2046 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2047 size_t *sizes
, void *kinds
, bool short_mapkind
)
2050 struct splay_tree_key_s cur_node
;
2051 const int typemask
= short_mapkind
? 0xff : 0x7;
2059 gomp_mutex_lock (&devicep
->lock
);
2060 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2062 gomp_mutex_unlock (&devicep
->lock
);
2066 for (i
= 0; i
< mapnum
; i
++)
2069 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2070 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2071 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2074 int kind
= get_kind (short_mapkind
, kinds
, i
);
2075 if (n
->host_start
> cur_node
.host_start
2076 || n
->host_end
< cur_node
.host_end
)
2078 gomp_mutex_unlock (&devicep
->lock
);
2079 gomp_fatal ("Trying to update [%p..%p) object when "
2080 "only [%p..%p) is mapped",
2081 (void *) cur_node
.host_start
,
2082 (void *) cur_node
.host_end
,
2083 (void *) n
->host_start
,
2084 (void *) n
->host_end
);
2087 if (n
->aux
&& n
->aux
->attach_count
)
2089 uintptr_t addr
= cur_node
.host_start
;
2090 while (addr
< cur_node
.host_end
)
2092 /* We have to be careful not to overwrite still attached
2093 pointers during host<->device updates. */
2094 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2095 if (n
->aux
->attach_count
[i
] == 0)
2097 void *devaddr
= (void *) (n
->tgt
->tgt_start
2099 + addr
- n
->host_start
);
2100 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2101 gomp_copy_host2dev (devicep
, NULL
,
2102 devaddr
, (void *) addr
,
2103 sizeof (void *), false, NULL
);
2104 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2105 gomp_copy_dev2host (devicep
, NULL
,
2106 (void *) addr
, devaddr
,
2109 addr
+= sizeof (void *);
2114 void *hostaddr
= (void *) cur_node
.host_start
;
2115 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2116 + cur_node
.host_start
2118 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2120 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2121 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2123 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2124 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2128 gomp_mutex_unlock (&devicep
->lock
);
2131 static struct gomp_offload_icv_list
*
2132 gomp_get_offload_icv_item (int dev_num
)
2134 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2135 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2141 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2142 depending on the device num and the variable hierarchy
2143 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2144 device and thus no item with that device number is contained in
2145 gomp_offload_icv_list, then a new item is created and added to the list. */
2147 static struct gomp_offload_icvs
*
2148 get_gomp_offload_icvs (int dev_num
)
2150 struct gomp_icv_list
*dev
2151 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2152 struct gomp_icv_list
*all
2153 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2154 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2155 struct gomp_offload_icv_list
*offload_icvs
2156 = gomp_get_offload_icv_item (dev_num
);
2158 if (offload_icvs
!= NULL
)
2159 return &offload_icvs
->icvs
;
2161 struct gomp_offload_icv_list
*new
2162 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2164 new->device_num
= dev_num
;
2165 new->icvs
.device_num
= dev_num
;
2166 new->next
= gomp_offload_icv_list
;
2168 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2169 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2170 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2171 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2172 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2173 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2175 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2178 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2179 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2180 else if (dev
!= NULL
2181 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2182 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2183 else if (all
!= NULL
2184 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2185 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2187 new->icvs
.teams_thread_limit
2188 = gomp_default_icv_values
.teams_thread_limit_var
;
2191 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2192 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2193 else if (dev
!= NULL
2194 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2195 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2196 else if (all
!= NULL
2197 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2198 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2200 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2202 gomp_offload_icv_list
= new;
2206 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2207 And insert to splay tree the mapping between addresses from HOST_TABLE and
2208 from loaded target image. We rely in the host and device compiler
2209 emitting variable and functions in the same order. */
2212 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2213 const void *host_table
, const void *target_data
,
2214 bool is_register_lock
)
2216 void **host_func_table
= ((void ***) host_table
)[0];
2217 void **host_funcs_end
= ((void ***) host_table
)[1];
2218 void **host_var_table
= ((void ***) host_table
)[2];
2219 void **host_vars_end
= ((void ***) host_table
)[3];
2221 /* The func table contains only addresses, the var table contains addresses
2222 and corresponding sizes. */
2223 int num_funcs
= host_funcs_end
- host_func_table
;
2224 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2226 /* Load image to device and get target addresses for the image. */
2227 struct addr_pair
*target_table
= NULL
;
2228 uint64_t *rev_target_fn_table
= NULL
;
2229 int i
, num_target_entries
;
2231 /* With reverse offload, insert also target-host addresses. */
2232 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2235 = devicep
->load_image_func (devicep
->target_id
, version
,
2236 target_data
, &target_table
,
2237 rev_lookup
? &rev_target_fn_table
: NULL
);
2239 if (num_target_entries
!= num_funcs
+ num_vars
2240 /* "+1" due to the additional ICV struct. */
2241 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2243 gomp_mutex_unlock (&devicep
->lock
);
2244 if (is_register_lock
)
2245 gomp_mutex_unlock (®ister_lock
);
2246 gomp_fatal ("Cannot map target functions or variables"
2247 " (expected %u, have %u)", num_funcs
+ num_vars
,
2248 num_target_entries
);
2251 /* Insert host-target address mapping into splay tree. */
2252 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2253 /* "+1" due to the additional ICV struct. */
2254 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2255 * sizeof (*tgt
->array
));
2256 if (rev_target_fn_table
)
2257 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2259 tgt
->rev_array
= NULL
;
2260 tgt
->refcount
= REFCOUNT_INFINITY
;
2263 tgt
->to_free
= NULL
;
2265 tgt
->list_count
= 0;
2266 tgt
->device_descr
= devicep
;
2267 splay_tree_node array
= tgt
->array
;
2268 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2270 for (i
= 0; i
< num_funcs
; i
++)
2272 splay_tree_key k
= &array
->key
;
2273 k
->host_start
= (uintptr_t) host_func_table
[i
];
2274 k
->host_end
= k
->host_start
+ 1;
2276 k
->tgt_offset
= target_table
[i
].start
;
2277 k
->refcount
= REFCOUNT_INFINITY
;
2278 k
->dynamic_refcount
= 0;
2281 array
->right
= NULL
;
2282 splay_tree_insert (&devicep
->mem_map
, array
);
2283 if (rev_target_fn_table
)
2285 reverse_splay_tree_key k2
= &rev_array
->key
;
2286 k2
->dev
= rev_target_fn_table
[i
];
2288 rev_array
->left
= NULL
;
2289 rev_array
->right
= NULL
;
2291 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2297 /* Most significant bit of the size in host and target tables marks
2298 "omp declare target link" variables. */
2299 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2300 const uintptr_t size_mask
= ~link_bit
;
2302 for (i
= 0; i
< num_vars
; i
++)
2304 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2305 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2306 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2308 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2310 gomp_mutex_unlock (&devicep
->lock
);
2311 if (is_register_lock
)
2312 gomp_mutex_unlock (®ister_lock
);
2313 gomp_fatal ("Cannot map target variables (size mismatch)");
2316 splay_tree_key k
= &array
->key
;
2317 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2319 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2321 k
->tgt_offset
= target_var
->start
;
2322 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2323 k
->dynamic_refcount
= 0;
2326 array
->right
= NULL
;
2327 splay_tree_insert (&devicep
->mem_map
, array
);
2331 /* Last entry is for a ICVs variable.
2332 Tolerate case where plugin does not return those entries. */
2333 if (num_funcs
+ num_vars
< num_target_entries
)
2335 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2337 /* Start address will be non-zero for the ICVs variable if
2338 the variable was found in this image. */
2339 if (var
->start
!= 0)
2341 /* The index of the devicep within devices[] is regarded as its
2342 'device number', which is different from the per-device type
2343 devicep->target_id. */
2344 int dev_num
= (int) (devicep
- &devices
[0]);
2345 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2346 size_t var_size
= var
->end
- var
->start
;
2347 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2349 gomp_mutex_unlock (&devicep
->lock
);
2350 if (is_register_lock
)
2351 gomp_mutex_unlock (®ister_lock
);
2352 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2355 /* Copy the ICVs variable to place on device memory, hereby
2356 actually designating its device number into effect. */
2357 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2358 var_size
, false, NULL
);
2359 splay_tree_key k
= &array
->key
;
2360 k
->host_start
= (uintptr_t) icvs
;
2362 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2364 k
->tgt_offset
= var
->start
;
2365 k
->refcount
= REFCOUNT_INFINITY
;
2366 k
->dynamic_refcount
= 0;
2369 array
->right
= NULL
;
2370 splay_tree_insert (&devicep
->mem_map
, array
);
2375 free (target_table
);
2378 /* Unload the mappings described by target_data from device DEVICE_P.
2379 The device must be locked. */
2382 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2384 const void *host_table
, const void *target_data
)
2386 void **host_func_table
= ((void ***) host_table
)[0];
2387 void **host_funcs_end
= ((void ***) host_table
)[1];
2388 void **host_var_table
= ((void ***) host_table
)[2];
2389 void **host_vars_end
= ((void ***) host_table
)[3];
2391 /* The func table contains only addresses, the var table contains addresses
2392 and corresponding sizes. */
2393 int num_funcs
= host_funcs_end
- host_func_table
;
2394 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2396 struct splay_tree_key_s k
;
2397 splay_tree_key node
= NULL
;
2399 /* Find mapping at start of node array */
2400 if (num_funcs
|| num_vars
)
2402 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2403 : (uintptr_t) host_var_table
[0]);
2404 k
.host_end
= k
.host_start
+ 1;
2405 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2408 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2410 gomp_mutex_unlock (&devicep
->lock
);
2411 gomp_fatal ("image unload fail");
2413 if (devicep
->mem_map_rev
.root
)
2415 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2417 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2418 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2419 free (node
->tgt
->rev_array
);
2420 devicep
->mem_map_rev
.root
= NULL
;
2423 /* Remove mappings from splay tree. */
2425 for (i
= 0; i
< num_funcs
; i
++)
2427 k
.host_start
= (uintptr_t) host_func_table
[i
];
2428 k
.host_end
= k
.host_start
+ 1;
2429 splay_tree_remove (&devicep
->mem_map
, &k
);
2432 /* Most significant bit of the size in host and target tables marks
2433 "omp declare target link" variables. */
2434 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2435 const uintptr_t size_mask
= ~link_bit
;
2436 bool is_tgt_unmapped
= false;
2438 for (i
= 0; i
< num_vars
; i
++)
2440 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2442 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2444 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2445 splay_tree_remove (&devicep
->mem_map
, &k
);
2448 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2449 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2453 if (node
&& !is_tgt_unmapped
)
2461 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2463 char *end
= buf
+ size
, *p
= buf
;
2464 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2465 p
+= snprintf (p
, end
- p
, "unified_address");
2466 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2467 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2468 (p
== buf
? "" : ", "));
2469 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2470 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2471 (p
== buf
? "" : ", "));
2474 /* This function should be called from every offload image while loading.
2475 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2476 the target, and DATA. */
2479 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2480 int target_type
, const void *data
)
2484 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2485 gomp_fatal ("Library too old for offload (version %u < %u)",
2486 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2489 const void *target_data
;
2490 if (GOMP_VERSION_LIB (version
) > 1)
2492 omp_req
= (int) (size_t) ((void **) data
)[0];
2493 target_data
= &((void **) data
)[1];
2501 gomp_mutex_lock (®ister_lock
);
2503 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2505 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2506 "reverse_offload")];
2507 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2508 "reverse_offload")];
2509 gomp_requires_to_name (buf2
, sizeof (buf2
),
2510 omp_req
!= GOMP_REQUIRES_TARGET_USED
2511 ? omp_req
: omp_requires_mask
);
2512 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2513 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2515 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2516 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2517 "in multiple compilation units: '%s' vs. '%s'",
2521 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2522 "some compilation units", buf2
);
2524 omp_requires_mask
= omp_req
;
2526 /* Load image to all initialized devices. */
2527 for (i
= 0; i
< num_devices
; i
++)
2529 struct gomp_device_descr
*devicep
= &devices
[i
];
2530 gomp_mutex_lock (&devicep
->lock
);
2531 if (devicep
->type
== target_type
2532 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2533 gomp_load_image_to_device (devicep
, version
,
2534 host_table
, target_data
, true);
2535 gomp_mutex_unlock (&devicep
->lock
);
2538 /* Insert image to array of pending images. */
2540 = gomp_realloc_unlock (offload_images
,
2541 (num_offload_images
+ 1)
2542 * sizeof (struct offload_image_descr
));
2543 offload_images
[num_offload_images
].version
= version
;
2544 offload_images
[num_offload_images
].type
= target_type
;
2545 offload_images
[num_offload_images
].host_table
= host_table
;
2546 offload_images
[num_offload_images
].target_data
= target_data
;
2548 num_offload_images
++;
2549 gomp_mutex_unlock (®ister_lock
);
2552 /* Legacy entry point. */
2555 GOMP_offload_register (const void *host_table
, int target_type
,
2556 const void *target_data
)
2558 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2561 /* This function should be called from every offload image while unloading.
2562 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2563 the target, and DATA. */
2566 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2567 int target_type
, const void *data
)
2571 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2572 gomp_fatal ("Library too old for offload (version %u < %u)",
2573 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2575 const void *target_data
;
2576 if (GOMP_VERSION_LIB (version
) > 1)
2577 target_data
= &((void **) data
)[1];
2581 gomp_mutex_lock (®ister_lock
);
2583 /* Unload image from all initialized devices. */
2584 for (i
= 0; i
< num_devices
; i
++)
2586 struct gomp_device_descr
*devicep
= &devices
[i
];
2587 gomp_mutex_lock (&devicep
->lock
);
2588 if (devicep
->type
== target_type
2589 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2590 gomp_unload_image_from_device (devicep
, version
,
2591 host_table
, target_data
);
2592 gomp_mutex_unlock (&devicep
->lock
);
2595 /* Remove image from array of pending images. */
2596 for (i
= 0; i
< num_offload_images
; i
++)
2597 if (offload_images
[i
].target_data
== target_data
)
2599 offload_images
[i
] = offload_images
[--num_offload_images
];
2603 gomp_mutex_unlock (®ister_lock
);
2606 /* Legacy entry point. */
2609 GOMP_offload_unregister (const void *host_table
, int target_type
,
2610 const void *target_data
)
2612 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2615 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2616 must be locked on entry, and remains locked on return. */
2618 attribute_hidden
void
2619 gomp_init_device (struct gomp_device_descr
*devicep
)
2622 if (!devicep
->init_device_func (devicep
->target_id
))
2624 gomp_mutex_unlock (&devicep
->lock
);
2625 gomp_fatal ("device initialization failed");
2628 /* Load to device all images registered by the moment. */
2629 for (i
= 0; i
< num_offload_images
; i
++)
2631 struct offload_image_descr
*image
= &offload_images
[i
];
2632 if (image
->type
== devicep
->type
)
2633 gomp_load_image_to_device (devicep
, image
->version
,
2634 image
->host_table
, image
->target_data
,
2638 /* Initialize OpenACC asynchronous queues. */
2639 goacc_init_asyncqueues (devicep
);
2641 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2644 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2645 must be locked on entry, and remains locked on return. */
2647 attribute_hidden
bool
2648 gomp_fini_device (struct gomp_device_descr
*devicep
)
2650 bool ret
= goacc_fini_asyncqueues (devicep
);
2651 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2652 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2656 attribute_hidden
void
2657 gomp_unload_device (struct gomp_device_descr
*devicep
)
2659 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2663 /* Unload from device all images registered at the moment. */
2664 for (i
= 0; i
< num_offload_images
; i
++)
2666 struct offload_image_descr
*image
= &offload_images
[i
];
2667 if (image
->type
== devicep
->type
)
2668 gomp_unload_image_from_device (devicep
, image
->version
,
2670 image
->target_data
);
2675 /* Host fallback for GOMP_target{,_ext} routines. */
2678 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2679 struct gomp_device_descr
*devicep
, void **args
)
2681 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2683 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2685 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2686 "be used for offloading");
2689 memset (thr
, '\0', sizeof (*thr
));
2690 if (gomp_places_list
)
2692 thr
->place
= old_thr
.place
;
2693 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2698 intptr_t id
= (intptr_t) *args
++, val
;
2699 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2700 val
= (intptr_t) *args
++;
2702 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2703 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2705 id
&= GOMP_TARGET_ARG_ID_MASK
;
2706 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2708 val
= val
> INT_MAX
? INT_MAX
: val
;
2710 gomp_icv (true)->thread_limit_var
= val
;
2715 gomp_free_thread (thr
);
2719 /* Calculate alignment and size requirements of a private copy of data shared
2720 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2723 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2724 unsigned short *kinds
, size_t *tgt_align
,
2728 for (i
= 0; i
< mapnum
; i
++)
2729 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2731 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2732 if (*tgt_align
< align
)
2734 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2735 *tgt_size
+= sizes
[i
];
2739 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2742 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2743 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2746 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2748 tgt
+= tgt_align
- al
;
2751 for (i
= 0; i
< mapnum
; i
++)
2752 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2754 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2755 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2756 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2757 hostaddrs
[i
] = tgt
+ tgt_size
;
2758 tgt_size
= tgt_size
+ sizes
[i
];
2759 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2761 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2767 /* Helper function of GOMP_target{,_ext} routines. */
2770 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2771 void (*host_fn
) (void *))
2773 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2774 return (void *) host_fn
;
2777 gomp_mutex_lock (&devicep
->lock
);
2778 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2780 gomp_mutex_unlock (&devicep
->lock
);
2784 struct splay_tree_key_s k
;
2785 k
.host_start
= (uintptr_t) host_fn
;
2786 k
.host_end
= k
.host_start
+ 1;
2787 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2788 gomp_mutex_unlock (&devicep
->lock
);
2792 return (void *) tgt_fn
->tgt_offset
;
2796 /* Called when encountering a target directive. If DEVICE
2797 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2798 GOMP_DEVICE_HOST_FALLBACK (or any value
2799 larger than last available hw device), use host fallback.
2800 FN is address of host code, UNUSED is part of the current ABI, but
2801 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2802 with MAPNUM entries, with addresses of the host objects,
2803 sizes of the host objects (resp. for pointer kind pointer bias
2804 and assumed sizeof (void *) size) and kinds. */
2807 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2808 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2809 unsigned char *kinds
)
2811 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2815 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2816 /* All shared memory devices should use the GOMP_target_ext function. */
2817 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2818 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2819 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2821 htab_t refcount_set
= htab_create (mapnum
);
2822 struct target_mem_desc
*tgt_vars
2823 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2824 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2825 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2827 htab_clear (refcount_set
);
2828 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2829 htab_free (refcount_set
);
2832 static inline unsigned int
2833 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2835 /* If we cannot run asynchronously, simply ignore nowait. */
2836 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2837 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2843 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2845 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2849 void *host_ptr
= &item
->icvs
;
2850 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2851 if (dev_ptr
!= NULL
)
2852 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2853 sizeof (struct gomp_offload_icvs
));
2856 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2857 and several arguments have been added:
2858 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2859 DEPEND is array of dependencies, see GOMP_task for details.
2861 ARGS is a pointer to an array consisting of a variable number of both
2862 device-independent and device-specific arguments, which can take one two
2863 elements where the first specifies for which device it is intended, the type
2864 and optionally also the value. If the value is not present in the first
2865 one, the whole second element the actual value. The last element of the
2866 array is a single NULL. Among the device independent can be for example
2867 NUM_TEAMS and THREAD_LIMIT.
2869 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2870 that value, or 1 if teams construct is not present, or 0, if
2871 teams construct does not have num_teams clause and so the choice is
2872 implementation defined, and -1 if it can't be determined on the host
2873 what value will GOMP_teams have on the device.
2874 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2875 body with that value, or 0, if teams construct does not have thread_limit
2876 clause or the teams construct is not present, or -1 if it can't be
2877 determined on the host what value will GOMP_teams have on the device. */
2880 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2881 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2882 unsigned int flags
, void **depend
, void **args
)
2884 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2885 size_t tgt_align
= 0, tgt_size
= 0;
2886 bool fpc_done
= false;
2888 /* Obtain the original TEAMS and THREADS values from ARGS. */
2889 intptr_t orig_teams
= 1, orig_threads
= 0;
2890 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2891 void **tmpargs
= args
;
2894 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2895 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2897 val
= (intptr_t) *tmpargs
++;
2902 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2906 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2908 val
= val
> INT_MAX
? INT_MAX
: val
;
2909 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2914 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2921 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2922 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2923 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2924 value could not be determined. No change.
2925 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2926 Set device-specific value.
2927 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2929 if (orig_teams
== -2)
2931 else if (orig_teams
== 0)
2933 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2935 new_teams
= item
->icvs
.nteams
;
2937 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2938 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2939 e.g. a THREAD_LIMIT clause. */
2940 if (orig_teams
> -2 && orig_threads
== 0)
2942 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2944 new_threads
= item
->icvs
.teams_thread_limit
;
2947 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2949 void **new_args
= args
;
2950 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
2952 size_t tms_len
= (orig_teams
== new_teams
2954 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
2956 size_t ths_len
= (orig_threads
== new_threads
2958 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
2960 /* One additional item after the last arg must be NULL. */
2961 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
2963 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
2966 void **tmp_new_args
= new_args
;
2967 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2968 too if they have not been changed and skipped otherwise. */
2971 intptr_t id
= (intptr_t) *tmpargs
;
2972 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
2973 && orig_teams
!= new_teams
)
2974 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
2975 && orig_threads
!= new_threads
))
2978 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2983 *tmp_new_args
++ = *tmpargs
++;
2984 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2985 *tmp_new_args
++ = *tmpargs
++;
2989 /* Add the new TEAMS arg to the new args list if it has been changed. */
2990 if (orig_teams
!= new_teams
)
2992 intptr_t new_val
= new_teams
;
2995 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
2996 | GOMP_TARGET_ARG_NUM_TEAMS
;
2997 *tmp_new_args
++ = (void *) new_val
;
3001 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3002 | GOMP_TARGET_ARG_NUM_TEAMS
);
3003 *tmp_new_args
++ = (void *) new_val
;
3007 /* Add the new THREADS arg to the new args list if it has been changed. */
3008 if (orig_threads
!= new_threads
)
3010 intptr_t new_val
= new_threads
;
3013 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3014 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3015 *tmp_new_args
++ = (void *) new_val
;
3019 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3020 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3021 *tmp_new_args
++ = (void *) new_val
;
3025 *tmp_new_args
= NULL
;
3028 flags
= clear_unsupported_flags (devicep
, flags
);
3030 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3032 struct gomp_thread
*thr
= gomp_thread ();
3033 /* Create a team if we don't have any around, as nowait
3034 target tasks make sense to run asynchronously even when
3035 outside of any parallel. */
3036 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3038 struct gomp_team
*team
= gomp_new_team (1);
3039 struct gomp_task
*task
= thr
->task
;
3040 struct gomp_task
**implicit_task
= &task
;
3041 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3042 team
->prev_ts
= thr
->ts
;
3043 thr
->ts
.team
= team
;
3044 thr
->ts
.team_id
= 0;
3045 thr
->ts
.work_share
= &team
->work_shares
[0];
3046 thr
->ts
.last_work_share
= NULL
;
3047 #ifdef HAVE_SYNC_BUILTINS
3048 thr
->ts
.single_count
= 0;
3050 thr
->ts
.static_trip
= 0;
3051 thr
->task
= &team
->implicit_task
[0];
3052 gomp_init_task (thr
->task
, NULL
, icv
);
3053 while (*implicit_task
3054 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3055 implicit_task
= &(*implicit_task
)->parent
;
3058 thr
->task
= *implicit_task
;
3060 free (*implicit_task
);
3061 thr
->task
= &team
->implicit_task
[0];
3064 pthread_setspecific (gomp_thread_destructor
, thr
);
3065 if (implicit_task
!= &task
)
3067 *implicit_task
= thr
->task
;
3072 && !thr
->task
->final_task
)
3074 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3075 sizes
, kinds
, flags
, depend
, new_args
,
3076 GOMP_TARGET_TASK_BEFORE_MAP
);
3081 /* If there are depend clauses, but nowait is not present
3082 (or we are in a final task), block the parent task until the
3083 dependencies are resolved and then just continue with the rest
3084 of the function as if it is a merged task. */
3087 struct gomp_thread
*thr
= gomp_thread ();
3088 if (thr
->task
&& thr
->task
->depend_hash
)
3090 /* If we might need to wait, copy firstprivate now. */
3091 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3092 &tgt_align
, &tgt_size
);
3095 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3096 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3097 tgt_align
, tgt_size
);
3100 gomp_task_maybe_wait_for_dependencies (depend
);
3106 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3107 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3108 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3112 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3113 &tgt_align
, &tgt_size
);
3116 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3117 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3118 tgt_align
, tgt_size
);
3121 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3125 struct target_mem_desc
*tgt_vars
;
3126 htab_t refcount_set
= NULL
;
3128 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3132 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3133 &tgt_align
, &tgt_size
);
3136 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3137 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3138 tgt_align
, tgt_size
);
3145 refcount_set
= htab_create (mapnum
);
3146 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3147 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3149 devicep
->run_func (devicep
->target_id
, fn_addr
,
3150 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3154 htab_clear (refcount_set
);
3155 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3158 htab_free (refcount_set
);
3160 /* Copy back ICVs from device to host.
3161 HOST_PTR is expected to exist since it was added in
3162 gomp_load_image_to_device if not already available. */
3163 gomp_copy_back_icvs (devicep
, device
);
3168 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3169 keeping track of all variable handling - assuming that reverse offload occurs
3170 ony very rarely. Downside is that the reverse search is slow. */
3172 struct gomp_splay_tree_rev_lookup_data
{
3173 uintptr_t tgt_start
;
3179 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3181 struct gomp_splay_tree_rev_lookup_data
*data
;
3182 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3183 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3185 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3189 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3190 if (key
->tgt
->list
[j
].key
== key
)
3192 assert (j
< key
->tgt
->list_count
);
3193 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3195 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3196 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3204 static inline splay_tree_key
3205 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3208 struct gomp_splay_tree_rev_lookup_data data
;
3210 data
.tgt_start
= tgt_start
;
3211 data
.tgt_end
= tgt_end
;
3213 if (tgt_start
!= tgt_end
)
3215 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3220 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3221 if (data
.key
!= NULL
|| zero_len
)
3226 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3233 bool present
, aligned
;
3237 /* Search just mapped reverse-offload data; returns index if found,
3241 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3242 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3243 uint64_t tgt_start
, uint64_t tgt_end
)
3245 const bool short_mapkind
= true;
3246 const int typemask
= short_mapkind
? 0xff : 0x7;
3248 for (i
= 0; i
< n
; i
++)
3250 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3251 == GOMP_MAP_STRUCT
);
3254 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3257 if (i
+ sizes
[i
] < n
)
3258 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3260 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3262 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3263 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3272 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3273 unsigned short *kinds
, uint64_t *sizes
,
3274 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3278 if (tgt_start
!= tgt_end
)
3279 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3280 tgt_start
, tgt_end
);
3282 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3283 tgt_start
, tgt_end
);
3284 if (i
< n
|| zero_len
)
3289 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3290 tgt_start
, tgt_end
);
3293 /* Handle reverse offload. This is called by the device plugins for a
3294 reverse offload; it is not called if the outer target runs on the host.
3295 The mapping is simplified device-affecting constructs (except for target
3296 with device(ancestor:1)) must not be encountered; in particular not
3297 target (enter/exit) data. */
3300 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3301 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3302 struct goacc_asyncqueue
*aq
)
3304 /* Return early if there is no offload code. */
3305 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3307 /* Currently, this fails because of calculate_firstprivate_requirements
3308 below; it could be fixed but additional code needs to be updated to
3309 handle 32bit hosts - thus, it is not worthwhile. */
3310 if (sizeof (void *) != sizeof (uint64_t))
3311 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3313 struct cpy_data
*cdata
= NULL
;
3316 unsigned short *kinds
;
3317 const bool short_mapkind
= true;
3318 const int typemask
= short_mapkind
? 0xff : 0x7;
3319 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3321 reverse_splay_tree_key n
;
3322 struct reverse_splay_tree_key_s k
;
3325 gomp_mutex_lock (&devicep
->lock
);
3326 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3327 gomp_mutex_unlock (&devicep
->lock
);
3330 gomp_fatal ("Cannot find reverse-offload function");
3331 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3333 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3335 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3336 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3337 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3341 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3342 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3343 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3344 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3345 (const void *) (uintptr_t) devaddrs_ptr
,
3346 mapnum
* sizeof (uint64_t));
3347 gomp_copy_dev2host (devicep
, aq
, sizes
,
3348 (const void *) (uintptr_t) sizes_ptr
,
3349 mapnum
* sizeof (uint64_t));
3350 gomp_copy_dev2host (devicep
, aq
, kinds
,
3351 (const void *) (uintptr_t) kinds_ptr
,
3352 mapnum
* sizeof (unsigned short));
3353 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3354 exit (EXIT_FAILURE
);
3357 size_t tgt_align
= 0, tgt_size
= 0;
3359 /* If actually executed on 32bit systems, the casts lead to wrong code;
3360 but 32bit with offloading is not supported; see top of this function. */
3361 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3362 (void *) (uintptr_t) kinds
,
3363 &tgt_align
, &tgt_size
);
3367 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3368 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3370 tgt
+= tgt_align
- al
;
3372 for (uint64_t i
= 0; i
< mapnum
; i
++)
3373 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3374 && devaddrs
[i
] != 0)
3376 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3377 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3378 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3379 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3383 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3384 (void *) (uintptr_t) devaddrs
[i
],
3386 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3387 exit (EXIT_FAILURE
);
3389 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3390 tgt_size
= tgt_size
+ sizes
[i
];
3391 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3393 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3394 == GOMP_MAP_ATTACH
))
3396 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3397 = (uint64_t) devaddrs
[i
];
3403 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3405 size_t j
, struct_cpy
= 0;
3407 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3408 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3409 gomp_mutex_lock (&devicep
->lock
);
3410 for (uint64_t i
= 0; i
< mapnum
; i
++)
3412 if (devaddrs
[i
] == 0)
3415 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3418 case GOMP_MAP_FIRSTPRIVATE
:
3419 case GOMP_MAP_FIRSTPRIVATE_INT
:
3422 case GOMP_MAP_DELETE
:
3423 case GOMP_MAP_RELEASE
:
3424 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3425 /* Assume it is present; look it up - but ignore otherwise. */
3426 case GOMP_MAP_ALLOC
:
3428 case GOMP_MAP_FORCE_ALLOC
:
3429 case GOMP_MAP_FORCE_FROM
:
3430 case GOMP_MAP_ALWAYS_FROM
:
3432 case GOMP_MAP_TOFROM
:
3433 case GOMP_MAP_FORCE_TO
:
3434 case GOMP_MAP_FORCE_TOFROM
:
3435 case GOMP_MAP_ALWAYS_TO
:
3436 case GOMP_MAP_ALWAYS_TOFROM
:
3437 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3438 cdata
[i
].devaddr
= devaddrs
[i
];
3439 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3440 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3441 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3443 devaddrs
[i
] + sizes
[i
], zero_len
);
3447 cdata
[i
].present
= true;
3448 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3452 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3454 devaddrs
[i
] + sizes
[i
], zero_len
);
3455 cdata
[i
].present
= n2
!= NULL
;
3457 if (!cdata
[i
].present
3458 && kind
!= GOMP_MAP_DELETE
3459 && kind
!= GOMP_MAP_RELEASE
3460 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3462 cdata
[i
].aligned
= true;
3463 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3465 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3468 else if (n2
!= NULL
)
3469 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3470 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3471 if (((!cdata
[i
].present
|| struct_cpy
)
3472 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3473 || kind
== GOMP_MAP_FORCE_TO
3474 || kind
== GOMP_MAP_FORCE_TOFROM
3475 || kind
== GOMP_MAP_ALWAYS_TO
3476 || kind
== GOMP_MAP_ALWAYS_TOFROM
)
3478 gomp_copy_dev2host (devicep
, aq
,
3479 (void *) (uintptr_t) devaddrs
[i
],
3480 (void *) (uintptr_t) cdata
[i
].devaddr
,
3482 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3484 gomp_mutex_unlock (&devicep
->lock
);
3485 exit (EXIT_FAILURE
);
3491 case GOMP_MAP_ATTACH
:
3492 case GOMP_MAP_POINTER
:
3493 case GOMP_MAP_ALWAYS_POINTER
:
3494 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3495 devaddrs
[i
] + sizes
[i
],
3496 devaddrs
[i
] + sizes
[i
]
3497 + sizeof (void*), false);
3498 cdata
[i
].present
= n2
!= NULL
;
3499 cdata
[i
].devaddr
= devaddrs
[i
];
3501 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3502 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3505 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3506 devaddrs
[i
] + sizes
[i
],
3507 devaddrs
[i
] + sizes
[i
]
3508 + sizeof (void*), false);
3511 cdata
[i
].present
= true;
3512 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3513 - cdata
[j
].devaddr
);
3516 if (!cdata
[i
].present
)
3517 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3518 /* Assume that when present, the pointer is already correct. */
3520 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3523 case GOMP_MAP_TO_PSET
:
3524 /* Assume that when present, the pointers are fine and no 'to:'
3526 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3527 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3529 cdata
[i
].present
= n2
!= NULL
;
3530 cdata
[i
].devaddr
= devaddrs
[i
];
3532 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3533 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3536 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3538 devaddrs
[i
] + sizes
[i
], false);
3541 cdata
[i
].present
= true;
3542 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3543 - cdata
[j
].devaddr
);
3546 if (!cdata
[i
].present
)
3548 cdata
[i
].aligned
= true;
3549 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3551 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3553 gomp_copy_dev2host (devicep
, aq
,
3554 (void *) (uintptr_t) devaddrs
[i
],
3555 (void *) (uintptr_t) cdata
[i
].devaddr
,
3557 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3559 gomp_mutex_unlock (&devicep
->lock
);
3560 exit (EXIT_FAILURE
);
3563 for (j
= i
+ 1; j
< mapnum
; j
++)
3565 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3566 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3567 && !GOMP_MAP_POINTER_P (kind
))
3569 if (devaddrs
[j
] < devaddrs
[i
])
3571 if (cdata
[i
].present
)
3573 if (devaddrs
[j
] == 0)
3575 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3580 /* Dereference devaddrs[j] to get the device addr. */
3581 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3582 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3584 cdata
[j
].present
= true;
3585 cdata
[j
].devaddr
= devaddrs
[j
];
3586 if (devaddrs
[j
] == 0)
3588 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3590 devaddrs
[j
] + sizeof (void*),
3593 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3594 - cdata
[k
].devaddr
);
3597 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3599 devaddrs
[j
] + sizeof (void*),
3603 gomp_mutex_unlock (&devicep
->lock
);
3604 gomp_fatal ("Pointer target wasn't mapped");
3606 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3607 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3609 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3610 = (void *) (uintptr_t) devaddrs
[j
];
3614 case GOMP_MAP_STRUCT
:
3615 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3616 devaddrs
[i
+ sizes
[i
]]
3617 + sizes
[i
+ sizes
[i
]], false);
3618 cdata
[i
].present
= n2
!= NULL
;
3619 cdata
[i
].devaddr
= devaddrs
[i
];
3620 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3623 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3625 + sizes
[i
+ sizes
[i
]]);
3626 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3627 cdata
[i
].aligned
= true;
3628 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3629 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3632 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3633 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3636 gomp_mutex_unlock (&devicep
->lock
);
3637 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3640 gomp_mutex_unlock (&devicep
->lock
);
3645 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3647 uint64_t struct_cpy
= 0;
3648 bool clean_struct
= false;
3649 for (uint64_t i
= 0; i
< mapnum
; i
++)
3651 if (cdata
[i
].devaddr
== 0)
3653 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3654 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3657 case GOMP_MAP_FORCE_FROM
:
3658 case GOMP_MAP_FORCE_TOFROM
:
3659 case GOMP_MAP_ALWAYS_FROM
:
3660 case GOMP_MAP_ALWAYS_TOFROM
:
3664 case GOMP_MAP_TOFROM
:
3667 gomp_copy_host2dev (devicep
, aq
,
3668 (void *) (uintptr_t) cdata
[i
].devaddr
,
3669 (void *) (uintptr_t) devaddrs
[i
],
3670 sizes
[i
], false, NULL
);
3671 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3672 exit (EXIT_FAILURE
);
3682 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3684 clean_struct
= true;
3685 struct_cpy
= sizes
[i
];
3687 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3688 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3689 else if (!cdata
[i
].present
)
3690 free ((void *) (uintptr_t) devaddrs
[i
]);
3693 for (uint64_t i
= 0; i
< mapnum
; i
++)
3694 if (!cdata
[i
].present
3695 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3696 == GOMP_MAP_STRUCT
))
3698 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3699 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3708 /* Host fallback for GOMP_target_data{,_ext} routines. */
3711 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3713 struct gomp_task_icv
*icv
= gomp_icv (false);
3715 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3717 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3718 "be used for offloading");
3720 if (icv
->target_data
)
3722 /* Even when doing a host fallback, if there are any active
3723 #pragma omp target data constructs, need to remember the
3724 new #pragma omp target data, otherwise GOMP_target_end_data
3725 would get out of sync. */
3726 struct target_mem_desc
*tgt
3727 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3728 NULL
, GOMP_MAP_VARS_DATA
);
3729 tgt
->prev
= icv
->target_data
;
3730 icv
->target_data
= tgt
;
3735 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3736 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3738 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3741 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3742 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3743 return gomp_target_data_fallback (devicep
);
3745 struct target_mem_desc
*tgt
3746 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3747 NULL
, GOMP_MAP_VARS_DATA
);
3748 struct gomp_task_icv
*icv
= gomp_icv (true);
3749 tgt
->prev
= icv
->target_data
;
3750 icv
->target_data
= tgt
;
3754 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3755 size_t *sizes
, unsigned short *kinds
)
3757 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3760 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3761 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3762 return gomp_target_data_fallback (devicep
);
3764 struct target_mem_desc
*tgt
3765 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3766 NULL
, GOMP_MAP_VARS_DATA
);
3767 struct gomp_task_icv
*icv
= gomp_icv (true);
3768 tgt
->prev
= icv
->target_data
;
3769 icv
->target_data
= tgt
;
3773 GOMP_target_end_data (void)
3775 struct gomp_task_icv
*icv
= gomp_icv (false);
3776 if (icv
->target_data
)
3778 struct target_mem_desc
*tgt
= icv
->target_data
;
3779 icv
->target_data
= tgt
->prev
;
3780 gomp_unmap_vars (tgt
, true, NULL
);
3785 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3786 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3788 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3791 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3792 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3795 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3799 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3800 size_t *sizes
, unsigned short *kinds
,
3801 unsigned int flags
, void **depend
)
3803 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3805 /* If there are depend clauses, but nowait is not present,
3806 block the parent task until the dependencies are resolved
3807 and then just continue with the rest of the function as if it
3808 is a merged task. Until we are able to schedule task during
3809 variable mapping or unmapping, ignore nowait if depend clauses
3813 struct gomp_thread
*thr
= gomp_thread ();
3814 if (thr
->task
&& thr
->task
->depend_hash
)
3816 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3818 && !thr
->task
->final_task
)
3820 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3821 mapnum
, hostaddrs
, sizes
, kinds
,
3822 flags
| GOMP_TARGET_FLAG_UPDATE
,
3823 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3828 struct gomp_team
*team
= thr
->ts
.team
;
3829 /* If parallel or taskgroup has been cancelled, don't start new
3831 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3833 if (gomp_team_barrier_cancelled (&team
->barrier
))
3835 if (thr
->task
->taskgroup
)
3837 if (thr
->task
->taskgroup
->cancelled
)
3839 if (thr
->task
->taskgroup
->workshare
3840 && thr
->task
->taskgroup
->prev
3841 && thr
->task
->taskgroup
->prev
->cancelled
)
3846 gomp_task_maybe_wait_for_dependencies (depend
);
3852 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3853 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3856 struct gomp_thread
*thr
= gomp_thread ();
3857 struct gomp_team
*team
= thr
->ts
.team
;
3858 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3859 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3861 if (gomp_team_barrier_cancelled (&team
->barrier
))
3863 if (thr
->task
->taskgroup
)
3865 if (thr
->task
->taskgroup
->cancelled
)
3867 if (thr
->task
->taskgroup
->workshare
3868 && thr
->task
->taskgroup
->prev
3869 && thr
->task
->taskgroup
->prev
->cancelled
)
3874 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3878 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3879 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3880 htab_t
*refcount_set
)
3882 const int typemask
= 0xff;
3884 gomp_mutex_lock (&devicep
->lock
);
3885 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3887 gomp_mutex_unlock (&devicep
->lock
);
3891 for (i
= 0; i
< mapnum
; i
++)
3892 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3894 struct splay_tree_key_s cur_node
;
3895 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3896 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3897 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3900 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3905 splay_tree_key remove_vars
[mapnum
];
3907 for (i
= 0; i
< mapnum
; i
++)
3909 struct splay_tree_key_s cur_node
;
3910 unsigned char kind
= kinds
[i
] & typemask
;
3914 case GOMP_MAP_ALWAYS_FROM
:
3915 case GOMP_MAP_DELETE
:
3916 case GOMP_MAP_RELEASE
:
3917 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3918 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3919 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3920 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3921 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3922 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3923 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3924 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3928 bool delete_p
= (kind
== GOMP_MAP_DELETE
3929 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3930 bool do_copy
, do_remove
;
3931 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3934 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3935 || kind
== GOMP_MAP_ALWAYS_FROM
)
3937 if (k
->aux
&& k
->aux
->attach_count
)
3939 /* We have to be careful not to overwrite still attached
3940 pointers during the copyback to host. */
3941 uintptr_t addr
= k
->host_start
;
3942 while (addr
< k
->host_end
)
3944 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
3945 if (k
->aux
->attach_count
[i
] == 0)
3946 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
3947 (void *) (k
->tgt
->tgt_start
3949 + addr
- k
->host_start
),
3951 addr
+= sizeof (void *);
3955 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3956 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3957 + cur_node
.host_start
3959 cur_node
.host_end
- cur_node
.host_start
);
3962 /* Structure elements lists are removed altogether at once, which
3963 may cause immediate deallocation of the target_mem_desc, causing
3964 errors if we still have following element siblings to copy back.
3965 While we're at it, it also seems more disciplined to simply
3966 queue all removals together for processing below.
3968 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3969 not have this problem, since they maintain an additional
3970 tgt->refcount = 1 reference to the target_mem_desc to start with.
3973 remove_vars
[nrmvars
++] = k
;
3976 case GOMP_MAP_DETACH
:
3979 gomp_mutex_unlock (&devicep
->lock
);
3980 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3985 for (int i
= 0; i
< nrmvars
; i
++)
3986 gomp_remove_var (devicep
, remove_vars
[i
]);
3988 gomp_mutex_unlock (&devicep
->lock
);
3992 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3993 size_t *sizes
, unsigned short *kinds
,
3994 unsigned int flags
, void **depend
)
3996 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3998 /* If there are depend clauses, but nowait is not present,
3999 block the parent task until the dependencies are resolved
4000 and then just continue with the rest of the function as if it
4001 is a merged task. Until we are able to schedule task during
4002 variable mapping or unmapping, ignore nowait if depend clauses
4006 struct gomp_thread
*thr
= gomp_thread ();
4007 if (thr
->task
&& thr
->task
->depend_hash
)
4009 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4011 && !thr
->task
->final_task
)
4013 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4014 mapnum
, hostaddrs
, sizes
, kinds
,
4015 flags
, depend
, NULL
,
4016 GOMP_TARGET_TASK_DATA
))
4021 struct gomp_team
*team
= thr
->ts
.team
;
4022 /* If parallel or taskgroup has been cancelled, don't start new
4024 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4026 if (gomp_team_barrier_cancelled (&team
->barrier
))
4028 if (thr
->task
->taskgroup
)
4030 if (thr
->task
->taskgroup
->cancelled
)
4032 if (thr
->task
->taskgroup
->workshare
4033 && thr
->task
->taskgroup
->prev
4034 && thr
->task
->taskgroup
->prev
->cancelled
)
4039 gomp_task_maybe_wait_for_dependencies (depend
);
4045 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4046 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4049 struct gomp_thread
*thr
= gomp_thread ();
4050 struct gomp_team
*team
= thr
->ts
.team
;
4051 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4052 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4054 if (gomp_team_barrier_cancelled (&team
->barrier
))
4056 if (thr
->task
->taskgroup
)
4058 if (thr
->task
->taskgroup
->cancelled
)
4060 if (thr
->task
->taskgroup
->workshare
4061 && thr
->task
->taskgroup
->prev
4062 && thr
->task
->taskgroup
->prev
->cancelled
)
4067 htab_t refcount_set
= htab_create (mapnum
);
4069 /* The variables are mapped separately such that they can be released
4072 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4073 for (i
= 0; i
< mapnum
; i
++)
4074 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4076 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4077 &kinds
[i
], true, &refcount_set
,
4078 GOMP_MAP_VARS_ENTER_DATA
);
4081 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4083 for (j
= i
+ 1; j
< mapnum
; j
++)
4084 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4085 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4087 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4088 &kinds
[i
], true, &refcount_set
,
4089 GOMP_MAP_VARS_ENTER_DATA
);
4092 else if (i
+ 1 < mapnum
4093 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4094 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4095 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4097 /* An attach operation must be processed together with the mapped
4098 base-pointer list item. */
4099 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4100 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4104 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4105 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4107 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4108 htab_free (refcount_set
);
4112 gomp_target_task_fn (void *data
)
4114 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4115 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4117 if (ttask
->fn
!= NULL
)
4121 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4122 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4123 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4125 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4126 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4131 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4134 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4138 void *actual_arguments
;
4139 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4142 actual_arguments
= ttask
->hostaddrs
;
4146 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4147 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4148 NULL
, GOMP_MAP_VARS_TARGET
);
4149 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4151 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4153 assert (devicep
->async_run_func
);
4154 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4155 ttask
->args
, (void *) ttask
);
4158 else if (devicep
== NULL
4159 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4160 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4164 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4165 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4166 ttask
->kinds
, true);
4169 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4170 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4171 for (i
= 0; i
< ttask
->mapnum
; i
++)
4172 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4174 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4175 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4176 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4177 i
+= ttask
->sizes
[i
];
4180 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4181 &ttask
->kinds
[i
], true, &refcount_set
,
4182 GOMP_MAP_VARS_ENTER_DATA
);
4184 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4185 ttask
->kinds
, &refcount_set
);
4186 htab_free (refcount_set
);
4192 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4196 struct gomp_task_icv
*icv
= gomp_icv (true);
4197 icv
->thread_limit_var
4198 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4204 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4205 unsigned int thread_limit
, bool first
)
4207 struct gomp_thread
*thr
= gomp_thread ();
4212 struct gomp_task_icv
*icv
= gomp_icv (true);
4213 icv
->thread_limit_var
4214 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4216 (void) num_teams_high
;
4217 if (num_teams_low
== 0)
4219 thr
->num_teams
= num_teams_low
- 1;
4222 else if (thr
->team_num
== thr
->num_teams
)
4230 omp_target_alloc (size_t size
, int device_num
)
4232 if (device_num
== omp_initial_device
4233 || device_num
== gomp_get_num_devices ())
4234 return malloc (size
);
4236 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4237 if (devicep
== NULL
)
4240 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4241 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4242 return malloc (size
);
4244 gomp_mutex_lock (&devicep
->lock
);
4245 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4246 gomp_mutex_unlock (&devicep
->lock
);
4251 omp_target_free (void *device_ptr
, int device_num
)
4253 if (device_num
== omp_initial_device
4254 || device_num
== gomp_get_num_devices ())
4260 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4261 if (devicep
== NULL
|| device_ptr
== NULL
)
4264 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4265 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4271 gomp_mutex_lock (&devicep
->lock
);
4272 gomp_free_device_memory (devicep
, device_ptr
);
4273 gomp_mutex_unlock (&devicep
->lock
);
4277 omp_target_is_present (const void *ptr
, int device_num
)
4279 if (device_num
== omp_initial_device
4280 || device_num
== gomp_get_num_devices ())
4283 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4284 if (devicep
== NULL
)
4290 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4291 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4294 gomp_mutex_lock (&devicep
->lock
);
4295 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4296 struct splay_tree_key_s cur_node
;
4298 cur_node
.host_start
= (uintptr_t) ptr
;
4299 cur_node
.host_end
= cur_node
.host_start
;
4300 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4301 int ret
= n
!= NULL
;
4302 gomp_mutex_unlock (&devicep
->lock
);
4307 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4308 struct gomp_device_descr
**dst_devicep
,
4309 struct gomp_device_descr
**src_devicep
)
4311 if (dst_device_num
!= gomp_get_num_devices ()
4312 /* Above gomp_get_num_devices has to be called unconditionally. */
4313 && dst_device_num
!= omp_initial_device
)
4315 *dst_devicep
= resolve_device (dst_device_num
, false);
4316 if (*dst_devicep
== NULL
)
4319 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4320 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4321 *dst_devicep
= NULL
;
4324 if (src_device_num
!= num_devices_openmp
4325 && src_device_num
!= omp_initial_device
)
4327 *src_devicep
= resolve_device (src_device_num
, false);
4328 if (*src_devicep
== NULL
)
4331 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4332 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4333 *src_devicep
= NULL
;
4340 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4341 size_t dst_offset
, size_t src_offset
,
4342 struct gomp_device_descr
*dst_devicep
,
4343 struct gomp_device_descr
*src_devicep
)
4346 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4348 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4351 if (src_devicep
== NULL
)
4353 gomp_mutex_lock (&dst_devicep
->lock
);
4354 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4355 (char *) dst
+ dst_offset
,
4356 (char *) src
+ src_offset
, length
);
4357 gomp_mutex_unlock (&dst_devicep
->lock
);
4358 return (ret
? 0 : EINVAL
);
4360 if (dst_devicep
== NULL
)
4362 gomp_mutex_lock (&src_devicep
->lock
);
4363 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4364 (char *) dst
+ dst_offset
,
4365 (char *) src
+ src_offset
, length
);
4366 gomp_mutex_unlock (&src_devicep
->lock
);
4367 return (ret
? 0 : EINVAL
);
4369 if (src_devicep
== dst_devicep
)
4371 gomp_mutex_lock (&src_devicep
->lock
);
4372 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4373 (char *) dst
+ dst_offset
,
4374 (char *) src
+ src_offset
, length
);
4375 gomp_mutex_unlock (&src_devicep
->lock
);
4376 return (ret
? 0 : EINVAL
);
4382 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4383 size_t src_offset
, int dst_device_num
, int src_device_num
)
4385 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4386 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4387 &dst_devicep
, &src_devicep
);
4392 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4393 dst_devicep
, src_devicep
);
4405 struct gomp_device_descr
*dst_devicep
;
4406 struct gomp_device_descr
*src_devicep
;
4407 } omp_target_memcpy_data
;
4410 omp_target_memcpy_async_helper (void *args
)
4412 omp_target_memcpy_data
*a
= args
;
4413 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4414 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4415 gomp_fatal ("omp_target_memcpy failed");
4419 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4420 size_t dst_offset
, size_t src_offset
,
4421 int dst_device_num
, int src_device_num
,
4422 int depobj_count
, omp_depend_t
*depobj_list
)
4424 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4425 unsigned int flags
= 0;
4426 void *depend
[depobj_count
+ 5];
4428 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4429 &dst_devicep
, &src_devicep
);
4431 omp_target_memcpy_data s
= {
4435 .dst_offset
= dst_offset
,
4436 .src_offset
= src_offset
,
4437 .dst_devicep
= dst_devicep
,
4438 .src_devicep
= src_devicep
4444 if (depobj_count
> 0 && depobj_list
!= NULL
)
4446 flags
|= GOMP_TASK_FLAG_DEPEND
;
4448 depend
[1] = (void *) (uintptr_t) depobj_count
;
4449 depend
[2] = depend
[3] = depend
[4] = 0;
4450 for (i
= 0; i
< depobj_count
; ++i
)
4451 depend
[i
+ 5] = &depobj_list
[i
];
4454 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4455 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4461 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4462 int num_dims
, const size_t *volume
,
4463 const size_t *dst_offsets
,
4464 const size_t *src_offsets
,
4465 const size_t *dst_dimensions
,
4466 const size_t *src_dimensions
,
4467 struct gomp_device_descr
*dst_devicep
,
4468 struct gomp_device_descr
*src_devicep
)
4470 size_t dst_slice
= element_size
;
4471 size_t src_slice
= element_size
;
4472 size_t j
, dst_off
, src_off
, length
;
4477 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4478 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4479 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4481 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4483 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4487 else if (src_devicep
== NULL
)
4488 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4489 (char *) dst
+ dst_off
,
4490 (const char *) src
+ src_off
,
4492 else if (dst_devicep
== NULL
)
4493 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4494 (char *) dst
+ dst_off
,
4495 (const char *) src
+ src_off
,
4497 else if (src_devicep
== dst_devicep
)
4498 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4499 (char *) dst
+ dst_off
,
4500 (const char *) src
+ src_off
,
4504 return ret
? 0 : EINVAL
;
4507 /* FIXME: it would be nice to have some plugin function to handle
4508 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4509 be handled in the generic recursion below, and for host-host it
4510 should be used even for any num_dims >= 2. */
4512 for (i
= 1; i
< num_dims
; i
++)
4513 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4514 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4516 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4517 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4519 for (j
= 0; j
< volume
[0]; j
++)
4521 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4522 (const char *) src
+ src_off
,
4523 element_size
, num_dims
- 1,
4524 volume
+ 1, dst_offsets
+ 1,
4525 src_offsets
+ 1, dst_dimensions
+ 1,
4526 src_dimensions
+ 1, dst_devicep
,
4530 dst_off
+= dst_slice
;
4531 src_off
+= src_slice
;
4537 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4539 struct gomp_device_descr
**dst_devicep
,
4540 struct gomp_device_descr
**src_devicep
)
4545 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4546 dst_devicep
, src_devicep
);
4550 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
4557 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4558 size_t element_size
, int num_dims
,
4559 const size_t *volume
, const size_t *dst_offsets
,
4560 const size_t *src_offsets
,
4561 const size_t *dst_dimensions
,
4562 const size_t *src_dimensions
,
4563 struct gomp_device_descr
*dst_devicep
,
4564 struct gomp_device_descr
*src_devicep
)
4567 gomp_mutex_lock (&src_devicep
->lock
);
4568 else if (dst_devicep
)
4569 gomp_mutex_lock (&dst_devicep
->lock
);
4570 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4571 volume
, dst_offsets
, src_offsets
,
4572 dst_dimensions
, src_dimensions
,
4573 dst_devicep
, src_devicep
);
4575 gomp_mutex_unlock (&src_devicep
->lock
);
4576 else if (dst_devicep
)
4577 gomp_mutex_unlock (&dst_devicep
->lock
);
4583 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4584 int num_dims
, const size_t *volume
,
4585 const size_t *dst_offsets
,
4586 const size_t *src_offsets
,
4587 const size_t *dst_dimensions
,
4588 const size_t *src_dimensions
,
4589 int dst_device_num
, int src_device_num
)
4591 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4593 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4594 src_device_num
, &dst_devicep
,
4600 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4601 volume
, dst_offsets
, src_offsets
,
4602 dst_dimensions
, src_dimensions
,
4603 dst_devicep
, src_devicep
);
4612 size_t element_size
;
4613 const size_t *volume
;
4614 const size_t *dst_offsets
;
4615 const size_t *src_offsets
;
4616 const size_t *dst_dimensions
;
4617 const size_t *src_dimensions
;
4618 struct gomp_device_descr
*dst_devicep
;
4619 struct gomp_device_descr
*src_devicep
;
4621 } omp_target_memcpy_rect_data
;
4624 omp_target_memcpy_rect_async_helper (void *args
)
4626 omp_target_memcpy_rect_data
*a
= args
;
4627 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4628 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4629 a
->src_offsets
, a
->dst_dimensions
,
4630 a
->src_dimensions
, a
->dst_devicep
,
4633 gomp_fatal ("omp_target_memcpy_rect failed");
4637 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4638 int num_dims
, const size_t *volume
,
4639 const size_t *dst_offsets
,
4640 const size_t *src_offsets
,
4641 const size_t *dst_dimensions
,
4642 const size_t *src_dimensions
,
4643 int dst_device_num
, int src_device_num
,
4644 int depobj_count
, omp_depend_t
*depobj_list
)
4646 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4648 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4649 src_device_num
, &dst_devicep
,
4651 void *depend
[depobj_count
+ 5];
4654 omp_target_memcpy_rect_data s
= {
4657 .element_size
= element_size
,
4658 .num_dims
= num_dims
,
4660 .dst_offsets
= dst_offsets
,
4661 .src_offsets
= src_offsets
,
4662 .dst_dimensions
= dst_dimensions
,
4663 .src_dimensions
= src_dimensions
,
4664 .dst_devicep
= dst_devicep
,
4665 .src_devicep
= src_devicep
4671 if (depobj_count
> 0 && depobj_list
!= NULL
)
4673 flags
|= GOMP_TASK_FLAG_DEPEND
;
4675 depend
[1] = (void *) (uintptr_t) depobj_count
;
4676 depend
[2] = depend
[3] = depend
[4] = 0;
4677 for (i
= 0; i
< depobj_count
; ++i
)
4678 depend
[i
+ 5] = &depobj_list
[i
];
4681 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4682 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4688 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4689 size_t size
, size_t device_offset
, int device_num
)
4691 if (device_num
== omp_initial_device
4692 || device_num
== gomp_get_num_devices ())
4695 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4696 if (devicep
== NULL
)
4699 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4700 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4703 gomp_mutex_lock (&devicep
->lock
);
4705 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4706 struct splay_tree_key_s cur_node
;
4709 cur_node
.host_start
= (uintptr_t) host_ptr
;
4710 cur_node
.host_end
= cur_node
.host_start
+ size
;
4711 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4714 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4715 == (uintptr_t) device_ptr
+ device_offset
4716 && n
->host_start
<= cur_node
.host_start
4717 && n
->host_end
>= cur_node
.host_end
)
4722 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4723 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4727 tgt
->to_free
= NULL
;
4729 tgt
->list_count
= 0;
4730 tgt
->device_descr
= devicep
;
4731 splay_tree_node array
= tgt
->array
;
4732 splay_tree_key k
= &array
->key
;
4733 k
->host_start
= cur_node
.host_start
;
4734 k
->host_end
= cur_node
.host_end
;
4736 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4737 k
->refcount
= REFCOUNT_INFINITY
;
4738 k
->dynamic_refcount
= 0;
4741 array
->right
= NULL
;
4742 splay_tree_insert (&devicep
->mem_map
, array
);
4745 gomp_mutex_unlock (&devicep
->lock
);
4750 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4752 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4753 if (devicep
== NULL
)
4756 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4759 gomp_mutex_lock (&devicep
->lock
);
4761 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4762 struct splay_tree_key_s cur_node
;
4765 cur_node
.host_start
= (uintptr_t) ptr
;
4766 cur_node
.host_end
= cur_node
.host_start
;
4767 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4769 && n
->host_start
== cur_node
.host_start
4770 && n
->refcount
== REFCOUNT_INFINITY
4771 && n
->tgt
->tgt_start
== 0
4772 && n
->tgt
->to_free
== NULL
4773 && n
->tgt
->refcount
== 1
4774 && n
->tgt
->list_count
== 0)
4776 splay_tree_remove (&devicep
->mem_map
, n
);
4777 gomp_unmap_tgt (n
->tgt
);
4781 gomp_mutex_unlock (&devicep
->lock
);
4786 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4788 if (device_num
== omp_initial_device
4789 || device_num
== omp_get_initial_device ())
4790 return (void *) ptr
;
4792 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4793 if (devicep
== NULL
)
4796 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4797 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4798 return (void *) ptr
;
4800 gomp_mutex_lock (&devicep
->lock
);
4802 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4803 struct splay_tree_key_s cur_node
;
4806 cur_node
.host_start
= (uintptr_t) ptr
;
4807 cur_node
.host_end
= cur_node
.host_start
;
4808 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4812 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4813 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4816 gomp_mutex_unlock (&devicep
->lock
);
4822 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4824 if (device_num
== omp_initial_device
4825 || device_num
== gomp_get_num_devices ())
4828 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4829 if (devicep
== NULL
)
4832 /* TODO: Unified shared memory must be handled when available. */
4834 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4838 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4841 if (device_num
== omp_initial_device
4842 || device_num
== gomp_get_num_devices ())
4843 return gomp_pause_host ();
4845 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4846 if (devicep
== NULL
)
4849 /* Do nothing for target devices for now. */
4854 omp_pause_resource_all (omp_pause_resource_t kind
)
4857 if (gomp_pause_host ())
4859 /* Do nothing for target devices for now. */
4863 ialias (omp_pause_resource
)
4864 ialias (omp_pause_resource_all
)
4866 #ifdef PLUGIN_SUPPORT
4868 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4870 The handles of the found functions are stored in the corresponding fields
4871 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4874 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4875 const char *plugin_name
)
4877 const char *err
= NULL
, *last_missing
= NULL
;
4879 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4881 #if OFFLOAD_DEFAULTED
4887 /* Check if all required functions are available in the plugin and store
4888 their handlers. None of the symbols can legitimately be NULL,
4889 so we don't need to check dlerror all the time. */
4891 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4893 /* Similar, but missing functions are not an error. Return false if
4894 failed, true otherwise. */
4895 #define DLSYM_OPT(f, n) \
4896 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4897 || (last_missing = #n, 0))
4900 if (device
->version_func () != GOMP_VERSION
)
4902 err
= "plugin version mismatch";
4909 DLSYM (get_num_devices
);
4910 DLSYM (init_device
);
4911 DLSYM (fini_device
);
4913 DLSYM (unload_image
);
4918 device
->capabilities
= device
->get_caps_func ();
4919 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4922 DLSYM_OPT (async_run
, async_run
);
4923 DLSYM_OPT (can_run
, can_run
);
4926 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4928 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4929 || !DLSYM_OPT (openacc
.create_thread_data
,
4930 openacc_create_thread_data
)
4931 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4932 openacc_destroy_thread_data
)
4933 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4934 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4935 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4936 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4937 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4938 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4939 openacc_async_queue_callback
)
4940 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4941 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4942 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4943 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4945 /* Require all the OpenACC handlers if we have
4946 GOMP_OFFLOAD_CAP_OPENACC_200. */
4947 err
= "plugin missing OpenACC handler function";
4952 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4953 openacc_cuda_get_current_device
);
4954 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4955 openacc_cuda_get_current_context
);
4956 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4957 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4958 if (cuda
&& cuda
!= 4)
4960 /* Make sure all the CUDA functions are there if any of them are. */
4961 err
= "plugin missing OpenACC CUDA handler function";
4973 gomp_error ("while loading %s: %s", plugin_name
, err
);
4975 gomp_error ("missing function was %s", last_missing
);
4977 dlclose (plugin_handle
);
4982 /* This function finalizes all initialized devices. */
4985 gomp_target_fini (void)
4988 for (i
= 0; i
< num_devices
; i
++)
4991 struct gomp_device_descr
*devicep
= &devices
[i
];
4992 gomp_mutex_lock (&devicep
->lock
);
4993 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
4994 ret
= gomp_fini_device (devicep
);
4995 gomp_mutex_unlock (&devicep
->lock
);
4997 gomp_fatal ("device finalization failed");
5001 /* This function initializes the runtime for offloading.
5002 It parses the list of offload plugins, and tries to load these.
5003 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5004 will be set, and the array DEVICES initialized, containing descriptors for
5005 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5009 gomp_target_init (void)
5011 const char *prefix
="libgomp-plugin-";
5012 const char *suffix
= SONAME_SUFFIX (1);
5013 const char *cur
, *next
;
5015 int i
, new_num_devs
;
5016 int num_devs
= 0, num_devs_openmp
;
5017 struct gomp_device_descr
*devs
= NULL
;
5019 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5022 cur
= OFFLOAD_PLUGINS
;
5026 struct gomp_device_descr current_device
;
5027 size_t prefix_len
, suffix_len
, cur_len
;
5029 next
= strchr (cur
, ',');
5031 prefix_len
= strlen (prefix
);
5032 cur_len
= next
? next
- cur
: strlen (cur
);
5033 suffix_len
= strlen (suffix
);
5035 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5042 memcpy (plugin_name
, prefix
, prefix_len
);
5043 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5044 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5046 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5048 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5049 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5050 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5053 int type
= current_device
.get_type_func ();
5054 for (int img
= 0; img
< num_offload_images
; img
++)
5055 if (type
== offload_images
[img
].type
)
5059 char buf
[sizeof ("unified_address, unified_shared_memory, "
5060 "reverse_offload")];
5061 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5062 char *name
= (char *) malloc (cur_len
+ 1);
5063 memcpy (name
, cur
, cur_len
);
5064 name
[cur_len
] = '\0';
5066 "%s devices present but 'omp requires %s' "
5067 "cannot be fulfilled\n", name
, buf
);
5071 else if (new_num_devs
>= 1)
5073 /* Augment DEVICES and NUM_DEVICES. */
5075 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5076 * sizeof (struct gomp_device_descr
));
5084 current_device
.name
= current_device
.get_name_func ();
5085 /* current_device.capabilities has already been set. */
5086 current_device
.type
= current_device
.get_type_func ();
5087 current_device
.mem_map
.root
= NULL
;
5088 current_device
.mem_map_rev
.root
= NULL
;
5089 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5090 for (i
= 0; i
< new_num_devs
; i
++)
5092 current_device
.target_id
= i
;
5093 devs
[num_devs
] = current_device
;
5094 gomp_mutex_init (&devs
[num_devs
].lock
);
5105 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5106 NUM_DEVICES_OPENMP. */
5107 struct gomp_device_descr
*devs_s
5108 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5115 num_devs_openmp
= 0;
5116 for (i
= 0; i
< num_devs
; i
++)
5117 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5118 devs_s
[num_devs_openmp
++] = devs
[i
];
5119 int num_devs_after_openmp
= num_devs_openmp
;
5120 for (i
= 0; i
< num_devs
; i
++)
5121 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5122 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5126 for (i
= 0; i
< num_devs
; i
++)
5128 /* The 'devices' array can be moved (by the realloc call) until we have
5129 found all the plugins, so registering with the OpenACC runtime (which
5130 takes a copy of the pointer argument) must be delayed until now. */
5131 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5132 goacc_register (&devs
[i
]);
5135 num_devices
= num_devs
;
5136 num_devices_openmp
= num_devs_openmp
;
5138 if (atexit (gomp_target_fini
) != 0)
5139 gomp_fatal ("atexit failed");
5142 #else /* PLUGIN_SUPPORT */
5143 /* If dlfcn.h is unavailable we always fallback to host execution.
5144 GOMP_target* routines are just stubs for this case. */
5146 gomp_target_init (void)
5149 #endif /* PLUGIN_SUPPORT */