1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
142 called, which must be done before using default_device_var. */
143 int num_devices
= gomp_get_num_devices ();
145 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
147 struct gomp_task_icv
*icv
= gomp_icv (false);
148 device_id
= icv
->default_device_var
;
154 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
155 : omp_initial_device
))
157 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but only the host device is available");
161 else if (device_id
== omp_invalid_device
)
162 gomp_fatal ("omp_invalid_device encountered");
163 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
164 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
165 "but device not found");
169 else if (device_id
>= num_devices
)
171 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
172 && device_id
!= num_devices
)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device not found");
179 gomp_mutex_lock (&devices
[device_id
].lock
);
180 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
181 gomp_init_device (&devices
[device_id
]);
182 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
187 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
188 "but device is finalized");
192 gomp_mutex_unlock (&devices
[device_id
].lock
);
194 return &devices
[device_id
];
198 static inline splay_tree_key
199 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
201 if (key
->host_start
!= key
->host_end
)
202 return splay_tree_lookup (mem_map
, key
);
205 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
210 n
= splay_tree_lookup (mem_map
, key
);
214 return splay_tree_lookup (mem_map
, key
);
217 static inline reverse_splay_tree_key
218 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
220 return reverse_splay_tree_lookup (mem_map_rev
, key
);
223 static inline splay_tree_key
224 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
226 if (key
->host_start
!= key
->host_end
)
227 return splay_tree_lookup (mem_map
, key
);
230 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
236 gomp_device_copy (struct gomp_device_descr
*devicep
,
237 bool (*copy_func
) (int, void *, const void *, size_t),
238 const char *dst
, void *dstaddr
,
239 const char *src
, const void *srcaddr
,
242 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
244 gomp_mutex_unlock (&devicep
->lock
);
245 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
246 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
251 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
252 bool (*copy_func
) (int, void *, const void *, size_t,
253 struct goacc_asyncqueue
*),
254 const char *dst
, void *dstaddr
,
255 const char *src
, const void *srcaddr
,
256 const void *srcaddr_orig
,
257 size_t size
, struct goacc_asyncqueue
*aq
)
259 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
261 gomp_mutex_unlock (&devicep
->lock
);
262 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
263 gomp_fatal ("Copying of %s object [%p..%p)"
264 " via buffer %s object [%p..%p)"
265 " to %s object [%p..%p) failed",
266 src
, srcaddr_orig
, srcaddr_orig
+ size
,
267 src
, srcaddr
, srcaddr
+ size
,
268 dst
, dstaddr
, dstaddr
+ size
);
270 gomp_fatal ("Copying of %s object [%p..%p)"
271 " to %s object [%p..%p) failed",
272 src
, srcaddr
, srcaddr
+ size
,
273 dst
, dstaddr
, dstaddr
+ size
);
277 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
278 host to device memory transfers. */
280 struct gomp_coalesce_chunk
282 /* The starting and ending point of a coalesced chunk of memory. */
286 struct gomp_coalesce_buf
288 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
289 it will be copied to the device. */
291 struct target_mem_desc
*tgt
;
292 /* Array with offsets, chunks[i].start is the starting offset and
293 chunks[i].end ending offset relative to tgt->tgt_start device address
294 of chunks which are to be copied to buf and later copied to device. */
295 struct gomp_coalesce_chunk
*chunks
;
296 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
299 /* During construction of chunks array, how many memory regions are within
300 the last chunk. If there is just one memory region for a chunk, we copy
301 it directly to device rather than going through buf. */
305 /* Maximum size of memory region considered for coalescing. Larger copies
306 are performed directly. */
307 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
309 /* Maximum size of a gap in between regions to consider them being copied
310 within the same chunk. All the device offsets considered are within
311 newly allocated device memory, so it isn't fatal if we copy some padding
312 in between from host to device. The gaps come either from alignment
313 padding or from memory regions which are not supposed to be copied from
314 host to device (e.g. map(alloc:), map(from:) etc.). */
315 #define MAX_COALESCE_BUF_GAP (4 * 1024)
317 /* Add region with device tgt_start relative offset and length to CBUF.
319 This must not be used for asynchronous copies, because the host data might
320 not be computed yet (by an earlier asynchronous compute region, for
321 example). The exception is for EPHEMERAL data, that we know is available
322 already "by construction". */
325 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
327 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
331 if (cbuf
->chunk_cnt
< 0)
333 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
335 cbuf
->chunk_cnt
= -1;
338 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
340 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
344 /* If the last chunk is only used by one mapping, discard it,
345 as it will be one host to device copy anyway and
346 memcpying it around will only waste cycles. */
347 if (cbuf
->use_cnt
== 1)
350 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
351 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
356 /* Return true for mapping kinds which need to copy data from the
357 host to device for regions that weren't previously mapped. */
360 gomp_to_device_kind_p (int kind
)
366 case GOMP_MAP_FORCE_ALLOC
:
367 case GOMP_MAP_FORCE_FROM
:
368 case GOMP_MAP_ALWAYS_FROM
:
369 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
370 case GOMP_MAP_FORCE_PRESENT
:
377 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
378 non-NULL), when the source data is stack or may otherwise be deallocated
379 before the asynchronous copy takes place, EPHEMERAL must be passed as
382 attribute_hidden
void
383 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
384 struct goacc_asyncqueue
*aq
,
385 void *d
, const void *h
, size_t sz
,
386 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
390 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
391 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
394 long last
= cbuf
->chunk_cnt
- 1;
395 while (first
<= last
)
397 long middle
= (first
+ last
) >> 1;
398 if (cbuf
->chunks
[middle
].end
<= doff
)
400 else if (cbuf
->chunks
[middle
].start
<= doff
)
402 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
404 gomp_mutex_unlock (&devicep
->lock
);
405 gomp_fatal ("internal libgomp cbuf error");
408 /* In an asynchronous context, verify that CBUF isn't used
409 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
410 if (__builtin_expect (aq
!= NULL
, 0))
413 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
423 if (__builtin_expect (aq
!= NULL
, 0))
425 void *h_buf
= (void *) h
;
428 /* We're queueing up an asynchronous copy from data that may
429 disappear before the transfer takes place (i.e. because it is a
430 stack local in a function that is no longer executing). As we've
431 not been able to use CBUF, make a copy of the data into a
433 h_buf
= gomp_malloc (sz
);
434 memcpy (h_buf
, h
, sz
);
436 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
437 "dev", d
, "host", h_buf
, h
, sz
, aq
);
439 /* Free once the transfer has completed. */
440 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
443 gomp_device_copy (devicep
, devicep
->host2dev_func
,
444 "dev", d
, "host", h
, sz
);
447 attribute_hidden
void
448 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
449 struct goacc_asyncqueue
*aq
,
450 void *h
, const void *d
, size_t sz
)
452 if (__builtin_expect (aq
!= NULL
, 0))
453 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
454 "host", h
, "dev", d
, NULL
, sz
, aq
);
456 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
460 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
462 if (!devicep
->free_func (devicep
->target_id
, devptr
))
464 gomp_mutex_unlock (&devicep
->lock
);
465 gomp_fatal ("error in freeing device memory block at %p", devptr
);
469 /* Increment reference count of a splay_tree_key region K by 1.
470 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
471 increment the value if refcount is not yet contained in the set (used for
472 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
473 once for each construct). */
476 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
478 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
481 uintptr_t *refcount_ptr
= &k
->refcount
;
483 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
484 refcount_ptr
= &k
->structelem_refcount
;
485 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
486 refcount_ptr
= k
->structelem_refcount_ptr
;
490 if (htab_find (*refcount_set
, refcount_ptr
))
492 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
493 *slot
= refcount_ptr
;
500 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
501 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
502 track already seen refcounts, and only adjust the value if refcount is not
503 yet contained in the set (like gomp_increment_refcount).
505 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
506 it is already zero and we know we decremented it earlier. This signals that
507 associated maps should be copied back to host.
509 *DO_REMOVE is set to true when we this is the first handling of this refcount
510 and we are setting it to zero. This signals a removal of this key from the
513 Copy and removal are separated due to cases like handling of structure
514 elements, e.g. each map of a structure element representing a possible copy
515 out of a structure field has to be handled individually, but we only signal
516 removal for one (the first encountered) sibing map. */
519 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
520 bool *do_copy
, bool *do_remove
)
522 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
524 *do_copy
= *do_remove
= false;
528 uintptr_t *refcount_ptr
= &k
->refcount
;
530 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
531 refcount_ptr
= &k
->structelem_refcount
;
532 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
533 refcount_ptr
= k
->structelem_refcount_ptr
;
535 bool new_encountered_refcount
;
536 bool set_to_zero
= false;
537 bool is_zero
= false;
539 uintptr_t orig_refcount
= *refcount_ptr
;
543 if (htab_find (*refcount_set
, refcount_ptr
))
545 new_encountered_refcount
= false;
549 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
550 *slot
= refcount_ptr
;
551 new_encountered_refcount
= true;
554 /* If no refcount_set being used, assume all keys are being decremented
555 for the first time. */
556 new_encountered_refcount
= true;
560 else if (*refcount_ptr
> 0)
564 if (*refcount_ptr
== 0)
566 if (orig_refcount
> 0)
572 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
573 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
576 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
577 gomp_map_0len_lookup found oldn for newn.
578 Helper function of gomp_map_vars. */
581 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
582 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
583 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
584 unsigned char kind
, bool always_to_flag
, bool implicit
,
585 struct gomp_coalesce_buf
*cbuf
,
586 htab_t
*refcount_set
)
588 assert (kind
!= GOMP_MAP_ATTACH
589 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
592 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
593 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
594 tgt_var
->is_attach
= false;
595 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
597 /* For implicit maps, old contained in new is valid. */
598 bool implicit_subset
= (implicit
599 && newn
->host_start
<= oldn
->host_start
600 && oldn
->host_end
<= newn
->host_end
);
602 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
604 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
606 if (GOMP_MAP_FORCE_P (kind
)
607 /* For implicit maps, old contained in new is valid. */
609 /* Otherwise, new contained inside old is considered valid. */
610 || (oldn
->host_start
<= newn
->host_start
611 && newn
->host_end
<= oldn
->host_end
)))
613 gomp_mutex_unlock (&devicep
->lock
);
614 gomp_fatal ("Trying to map into device [%p..%p) object when "
615 "[%p..%p) is already mapped",
616 (void *) newn
->host_start
, (void *) newn
->host_end
,
617 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
620 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
622 /* Implicit + always should not happen. If this does occur, below
623 address/length adjustment is a TODO. */
624 assert (!implicit_subset
);
626 if (oldn
->aux
&& oldn
->aux
->attach_count
)
628 /* We have to be careful not to overwrite still attached pointers
629 during the copyback to host. */
630 uintptr_t addr
= newn
->host_start
;
631 while (addr
< newn
->host_end
)
633 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
634 if (oldn
->aux
->attach_count
[i
] == 0)
635 gomp_copy_host2dev (devicep
, aq
,
636 (void *) (oldn
->tgt
->tgt_start
638 + addr
- oldn
->host_start
),
640 sizeof (void *), false, cbuf
);
641 addr
+= sizeof (void *);
645 gomp_copy_host2dev (devicep
, aq
,
646 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
647 + newn
->host_start
- oldn
->host_start
),
648 (void *) newn
->host_start
,
649 newn
->host_end
- newn
->host_start
, false, cbuf
);
652 gomp_increment_refcount (oldn
, refcount_set
);
656 get_kind (bool short_mapkind
, void *kinds
, int idx
)
659 return ((unsigned char *) kinds
)[idx
];
661 int val
= ((unsigned short *) kinds
)[idx
];
662 if (GOMP_MAP_IMPLICIT_P (val
))
663 val
&= ~GOMP_MAP_IMPLICIT
;
669 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
674 int val
= ((unsigned short *) kinds
)[idx
];
675 return GOMP_MAP_IMPLICIT_P (val
);
679 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
680 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
681 struct gomp_coalesce_buf
*cbuf
,
682 bool allow_zero_length_array_sections
)
684 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
685 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
686 struct splay_tree_key_s cur_node
;
688 cur_node
.host_start
= host_ptr
;
689 if (cur_node
.host_start
== (uintptr_t) NULL
)
691 cur_node
.tgt_offset
= (uintptr_t) NULL
;
692 gomp_copy_host2dev (devicep
, aq
,
693 (void *) (tgt
->tgt_start
+ target_offset
),
694 (void *) &cur_node
.tgt_offset
, sizeof (void *),
698 /* Add bias to the pointer value. */
699 cur_node
.host_start
+= bias
;
700 cur_node
.host_end
= cur_node
.host_start
;
701 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
704 if (allow_zero_length_array_sections
)
705 cur_node
.tgt_offset
= 0;
708 gomp_mutex_unlock (&devicep
->lock
);
709 gomp_fatal ("Pointer target of array section wasn't mapped");
714 cur_node
.host_start
-= n
->host_start
;
716 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
717 /* At this point tgt_offset is target address of the
718 array section. Now subtract bias to get what we want
719 to initialize the pointer with. */
720 cur_node
.tgt_offset
-= bias
;
722 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
723 (void *) &cur_node
.tgt_offset
, sizeof (void *),
728 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
729 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
730 size_t first
, size_t i
, void **hostaddrs
,
731 size_t *sizes
, void *kinds
,
732 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
734 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
735 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
736 struct splay_tree_key_s cur_node
;
739 const bool short_mapkind
= true;
740 const int typemask
= short_mapkind
? 0xff : 0x7;
742 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
743 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
744 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
745 kind
= get_kind (short_mapkind
, kinds
, i
);
746 implicit
= get_implicit (short_mapkind
, kinds
, i
);
749 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
751 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
752 kind
& typemask
, false, implicit
, cbuf
,
758 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
760 cur_node
.host_start
--;
761 n2
= splay_tree_lookup (mem_map
, &cur_node
);
762 cur_node
.host_start
++;
765 && n2
->host_start
- n
->host_start
766 == n2
->tgt_offset
- n
->tgt_offset
)
768 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
769 kind
& typemask
, false, implicit
, cbuf
,
775 n2
= splay_tree_lookup (mem_map
, &cur_node
);
779 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
781 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
782 kind
& typemask
, false, implicit
, cbuf
,
787 gomp_mutex_unlock (&devicep
->lock
);
788 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
789 "other mapped elements from the same structure weren't mapped "
790 "together with it", (void *) cur_node
.host_start
,
791 (void *) cur_node
.host_end
);
794 attribute_hidden
void
795 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
796 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
797 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
798 struct gomp_coalesce_buf
*cbufp
,
799 bool allow_zero_length_array_sections
)
801 struct splay_tree_key_s s
;
806 gomp_mutex_unlock (&devicep
->lock
);
807 gomp_fatal ("enclosing struct not mapped for attach");
810 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
811 /* We might have a pointer in a packed struct: however we cannot have more
812 than one such pointer in each pointer-sized portion of the struct, so
814 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
817 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
819 if (!n
->aux
->attach_count
)
821 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
823 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
824 n
->aux
->attach_count
[idx
]++;
827 gomp_mutex_unlock (&devicep
->lock
);
828 gomp_fatal ("attach count overflow");
831 if (n
->aux
->attach_count
[idx
] == 1)
833 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
835 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
839 if ((void *) target
== NULL
)
841 gomp_mutex_unlock (&devicep
->lock
);
842 gomp_fatal ("attempt to attach null pointer");
845 s
.host_start
= target
+ bias
;
846 s
.host_end
= s
.host_start
+ 1;
847 tn
= splay_tree_lookup (mem_map
, &s
);
851 if (allow_zero_length_array_sections
)
852 /* When allowing attachment to zero-length array sections, we
853 allow attaching to NULL pointers when the target region is not
858 gomp_mutex_unlock (&devicep
->lock
);
859 gomp_fatal ("pointer target not mapped for attach");
863 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
866 "%s: attaching host %p, target %p (struct base %p) to %p\n",
867 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
868 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
870 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
871 sizeof (void *), true, cbufp
);
874 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
875 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
878 attribute_hidden
void
879 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
880 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
881 uintptr_t detach_from
, bool finalize
,
882 struct gomp_coalesce_buf
*cbufp
)
888 gomp_mutex_unlock (&devicep
->lock
);
889 gomp_fatal ("enclosing struct not mapped for detach");
892 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
894 if (!n
->aux
|| !n
->aux
->attach_count
)
896 gomp_mutex_unlock (&devicep
->lock
);
897 gomp_fatal ("no attachment counters for struct");
901 n
->aux
->attach_count
[idx
] = 1;
903 if (n
->aux
->attach_count
[idx
] == 0)
905 gomp_mutex_unlock (&devicep
->lock
);
906 gomp_fatal ("attach count underflow");
909 n
->aux
->attach_count
[idx
]--;
911 if (n
->aux
->attach_count
[idx
] == 0)
913 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
915 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
918 "%s: detaching host %p, target %p (struct base %p) to %p\n",
919 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
920 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
923 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
924 sizeof (void *), true, cbufp
);
927 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
928 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
931 attribute_hidden
uintptr_t
932 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
934 if (tgt
->list
[i
].key
!= NULL
)
935 return tgt
->list
[i
].key
->tgt
->tgt_start
936 + tgt
->list
[i
].key
->tgt_offset
937 + tgt
->list
[i
].offset
;
939 switch (tgt
->list
[i
].offset
)
942 return (uintptr_t) hostaddrs
[i
];
948 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
949 + tgt
->list
[i
+ 1].key
->tgt_offset
950 + tgt
->list
[i
+ 1].offset
951 + (uintptr_t) hostaddrs
[i
]
952 - (uintptr_t) hostaddrs
[i
+ 1];
955 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
959 static inline __attribute__((always_inline
)) struct target_mem_desc
*
960 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
961 struct goacc_asyncqueue
*aq
, size_t mapnum
,
962 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
963 void *kinds
, bool short_mapkind
,
964 htab_t
*refcount_set
,
965 enum gomp_map_vars_kind pragma_kind
)
967 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
968 bool has_firstprivate
= false;
969 bool has_always_ptrset
= false;
970 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
971 const int rshift
= short_mapkind
? 8 : 3;
972 const int typemask
= short_mapkind
? 0xff : 0x7;
973 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
974 struct splay_tree_key_s cur_node
;
975 struct target_mem_desc
*tgt
976 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
977 tgt
->list_count
= mapnum
;
978 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
979 tgt
->device_descr
= devicep
;
981 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
990 tgt_align
= sizeof (void *);
996 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
998 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
999 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1002 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1004 size_t align
= 4 * sizeof (void *);
1006 tgt_size
= mapnum
* sizeof (void *);
1008 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1009 cbuf
.chunks
[0].start
= 0;
1010 cbuf
.chunks
[0].end
= tgt_size
;
1013 gomp_mutex_lock (&devicep
->lock
);
1014 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1016 gomp_mutex_unlock (&devicep
->lock
);
1021 for (i
= 0; i
< mapnum
; i
++)
1023 int kind
= get_kind (short_mapkind
, kinds
, i
);
1024 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1025 if (hostaddrs
[i
] == NULL
1026 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1028 tgt
->list
[i
].key
= NULL
;
1029 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1032 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1033 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1035 tgt
->list
[i
].key
= NULL
;
1038 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1039 on a separate construct prior to using use_device_{addr,ptr}.
1040 In OpenMP 5.0, map directives need to be ordered by the
1041 middle-end before the use_device_* clauses. If
1042 !not_found_cnt, all mappings requested (if any) are already
1043 mapped, so use_device_{addr,ptr} can be resolved right away.
1044 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1045 now but would succeed after performing the mappings in the
1046 following loop. We can't defer this always to the second
1047 loop, because it is not even invoked when !not_found_cnt
1048 after the first loop. */
1049 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1050 cur_node
.host_end
= cur_node
.host_start
;
1051 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1054 cur_node
.host_start
-= n
->host_start
;
1056 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1057 + cur_node
.host_start
);
1059 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1061 gomp_mutex_unlock (&devicep
->lock
);
1062 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1064 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1065 /* If not present, continue using the host address. */
1068 __builtin_unreachable ();
1069 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1072 tgt
->list
[i
].offset
= 0;
1075 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1077 size_t first
= i
+ 1;
1078 size_t last
= i
+ sizes
[i
];
1079 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1080 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1082 tgt
->list
[i
].key
= NULL
;
1083 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1084 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1087 size_t align
= (size_t) 1 << (kind
>> rshift
);
1088 if (tgt_align
< align
)
1090 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1091 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1092 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1093 not_found_cnt
+= last
- i
;
1094 for (i
= first
; i
<= last
; i
++)
1096 tgt
->list
[i
].key
= NULL
;
1098 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1100 gomp_coalesce_buf_add (&cbuf
,
1101 tgt_size
- cur_node
.host_end
1102 + (uintptr_t) hostaddrs
[i
],
1108 for (i
= first
; i
<= last
; i
++)
1109 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1110 sizes
, kinds
, NULL
, refcount_set
);
1114 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1116 tgt
->list
[i
].key
= NULL
;
1117 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1118 has_firstprivate
= true;
1121 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1122 || ((kind
& typemask
)
1123 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1125 tgt
->list
[i
].key
= NULL
;
1126 has_firstprivate
= true;
1129 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1130 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1131 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1133 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1134 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1136 tgt
->list
[i
].key
= NULL
;
1138 size_t align
= (size_t) 1 << (kind
>> rshift
);
1139 if (tgt_align
< align
)
1141 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1143 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1144 cur_node
.host_end
- cur_node
.host_start
);
1145 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1146 has_firstprivate
= true;
1150 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1152 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1155 tgt
->list
[i
].key
= NULL
;
1156 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1161 n
= splay_tree_lookup (mem_map
, &cur_node
);
1162 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1164 int always_to_cnt
= 0;
1165 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1167 bool has_nullptr
= false;
1169 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1170 if (n
->tgt
->list
[j
].key
== n
)
1172 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1175 if (n
->tgt
->list_count
== 0)
1177 /* 'declare target'; assume has_nullptr; it could also be
1178 statically assigned pointer, but that it should be to
1179 the equivalent variable on the host. */
1180 assert (n
->refcount
== REFCOUNT_INFINITY
);
1184 assert (j
< n
->tgt
->list_count
);
1185 /* Re-map the data if there is an 'always' modifier or if it a
1186 null pointer was there and non a nonnull has been found; that
1187 permits transparent re-mapping for Fortran array descriptors
1188 which were previously mapped unallocated. */
1189 for (j
= i
+ 1; j
< mapnum
; j
++)
1191 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1192 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1194 || !GOMP_MAP_POINTER_P (ptr_kind
)
1195 || *(void **) hostaddrs
[j
] == NULL
))
1197 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1198 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1199 > cur_node
.host_end
))
1203 has_always_ptrset
= true;
1208 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1209 kind
& typemask
, always_to_cnt
> 0, implicit
,
1210 NULL
, refcount_set
);
1215 tgt
->list
[i
].key
= NULL
;
1217 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1219 /* Not present, hence, skip entry - including its MAP_POINTER,
1221 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1223 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1224 == GOMP_MAP_POINTER
))
1227 tgt
->list
[i
].key
= NULL
;
1228 tgt
->list
[i
].offset
= 0;
1232 size_t align
= (size_t) 1 << (kind
>> rshift
);
1234 if (tgt_align
< align
)
1236 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1238 && gomp_to_device_kind_p (kind
& typemask
))
1239 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1240 cur_node
.host_end
- cur_node
.host_start
);
1241 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1242 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1246 for (j
= i
+ 1; j
< mapnum
; j
++)
1247 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1248 kinds
, j
)) & typemask
))
1249 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1251 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1252 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1253 > cur_node
.host_end
))
1257 tgt
->list
[j
].key
= NULL
;
1268 gomp_mutex_unlock (&devicep
->lock
);
1269 gomp_fatal ("unexpected aggregation");
1271 tgt
->to_free
= devaddrs
[0];
1272 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1273 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1275 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1277 /* Allocate tgt_align aligned tgt_size block of memory. */
1278 /* FIXME: Perhaps change interface to allocate properly aligned
1280 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1281 tgt_size
+ tgt_align
- 1);
1284 gomp_mutex_unlock (&devicep
->lock
);
1285 gomp_fatal ("device memory allocation fail");
1288 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1289 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1290 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1292 if (cbuf
.use_cnt
== 1)
1294 if (cbuf
.chunk_cnt
> 0)
1297 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1307 tgt
->to_free
= NULL
;
1313 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1314 tgt_size
= mapnum
* sizeof (void *);
1317 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1320 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1321 splay_tree_node array
= tgt
->array
;
1322 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1323 uintptr_t field_tgt_base
= 0;
1324 splay_tree_key field_tgt_structelem_first
= NULL
;
1326 for (i
= 0; i
< mapnum
; i
++)
1327 if (has_always_ptrset
1329 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1330 == GOMP_MAP_TO_PSET
)
1332 splay_tree_key k
= tgt
->list
[i
].key
;
1333 bool has_nullptr
= false;
1335 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1336 if (k
->tgt
->list
[j
].key
== k
)
1338 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1341 if (k
->tgt
->list_count
== 0)
1344 assert (j
< k
->tgt
->list_count
);
1346 tgt
->list
[i
].has_null_ptr_assoc
= false;
1347 for (j
= i
+ 1; j
< mapnum
; j
++)
1349 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1350 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1352 || !GOMP_MAP_POINTER_P (ptr_kind
)
1353 || *(void **) hostaddrs
[j
] == NULL
))
1355 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1356 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1361 if (*(void **) hostaddrs
[j
] == NULL
)
1362 tgt
->list
[i
].has_null_ptr_assoc
= true;
1363 tgt
->list
[j
].key
= k
;
1364 tgt
->list
[j
].copy_from
= false;
1365 tgt
->list
[j
].always_copy_from
= false;
1366 tgt
->list
[j
].is_attach
= false;
1367 gomp_increment_refcount (k
, refcount_set
);
1368 gomp_map_pointer (k
->tgt
, aq
,
1369 (uintptr_t) *(void **) hostaddrs
[j
],
1370 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1372 sizes
[j
], cbufp
, false);
1377 else if (tgt
->list
[i
].key
== NULL
)
1379 int kind
= get_kind (short_mapkind
, kinds
, i
);
1380 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1381 if (hostaddrs
[i
] == NULL
)
1383 switch (kind
& typemask
)
1385 size_t align
, len
, first
, last
;
1387 case GOMP_MAP_FIRSTPRIVATE
:
1388 align
= (size_t) 1 << (kind
>> rshift
);
1389 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1390 tgt
->list
[i
].offset
= tgt_size
;
1392 gomp_copy_host2dev (devicep
, aq
,
1393 (void *) (tgt
->tgt_start
+ tgt_size
),
1394 (void *) hostaddrs
[i
], len
, false, cbufp
);
1395 /* Save device address in hostaddr to permit latter availablity
1396 when doing a deep-firstprivate with pointer attach. */
1397 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1400 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1401 firstprivate to hostaddrs[i+1], which is assumed to contain a
1405 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1407 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1408 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1410 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1411 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1412 this probably needs revision for 'aq' usage. */
1414 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1415 sizeof (void *), false, cbufp
);
1419 case GOMP_MAP_FIRSTPRIVATE_INT
:
1420 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1422 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1423 /* The OpenACC 'host_data' construct only allows 'use_device'
1424 "mapping" clauses, so in the first loop, 'not_found_cnt'
1425 must always have been zero, so all OpenACC 'use_device'
1426 clauses have already been handled. (We can only easily test
1427 'use_device' with 'if_present' clause here.) */
1428 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1429 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1430 code conceptually simple, similar to the first loop. */
1431 case GOMP_MAP_USE_DEVICE_PTR
:
1432 if (tgt
->list
[i
].offset
== 0)
1434 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1435 cur_node
.host_end
= cur_node
.host_start
;
1436 n
= gomp_map_lookup (mem_map
, &cur_node
);
1439 cur_node
.host_start
-= n
->host_start
;
1441 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1442 + cur_node
.host_start
);
1444 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1446 gomp_mutex_unlock (&devicep
->lock
);
1447 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1449 else if ((kind
& typemask
)
1450 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1451 /* If not present, continue using the host address. */
1454 __builtin_unreachable ();
1455 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1458 case GOMP_MAP_STRUCT
:
1460 last
= i
+ sizes
[i
];
1461 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1462 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1464 if (tgt
->list
[first
].key
!= NULL
)
1466 n
= splay_tree_lookup (mem_map
, &cur_node
);
1469 size_t align
= (size_t) 1 << (kind
>> rshift
);
1470 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1471 - (uintptr_t) hostaddrs
[i
];
1472 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1473 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1474 - (uintptr_t) hostaddrs
[i
];
1475 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1476 field_tgt_offset
= tgt_size
;
1477 field_tgt_clear
= last
;
1478 field_tgt_structelem_first
= NULL
;
1479 tgt_size
+= cur_node
.host_end
1480 - (uintptr_t) hostaddrs
[first
];
1483 for (i
= first
; i
<= last
; i
++)
1484 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1485 sizes
, kinds
, cbufp
, refcount_set
);
1488 case GOMP_MAP_ALWAYS_POINTER
:
1489 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1490 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1491 n
= splay_tree_lookup (mem_map
, &cur_node
);
1493 || n
->host_start
> cur_node
.host_start
1494 || n
->host_end
< cur_node
.host_end
)
1496 gomp_mutex_unlock (&devicep
->lock
);
1497 gomp_fatal ("always pointer not mapped");
1500 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1501 != GOMP_MAP_ALWAYS_POINTER
))
1502 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1503 if (cur_node
.tgt_offset
)
1504 cur_node
.tgt_offset
-= sizes
[i
];
1505 gomp_copy_host2dev (devicep
, aq
,
1506 (void *) (n
->tgt
->tgt_start
1508 + cur_node
.host_start
1510 (void *) &cur_node
.tgt_offset
,
1511 sizeof (void *), true, cbufp
);
1512 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1513 + cur_node
.host_start
- n
->host_start
;
1515 case GOMP_MAP_IF_PRESENT
:
1516 /* Not present - otherwise handled above. Skip over its
1517 MAP_POINTER as well. */
1519 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1520 == GOMP_MAP_POINTER
))
1523 case GOMP_MAP_ATTACH
:
1524 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1526 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1527 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1528 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1531 tgt
->list
[i
].key
= n
;
1532 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1533 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1534 tgt
->list
[i
].copy_from
= false;
1535 tgt
->list
[i
].always_copy_from
= false;
1536 tgt
->list
[i
].is_attach
= true;
1537 /* OpenACC 'attach'/'detach' doesn't affect
1538 structured/dynamic reference counts ('n->refcount',
1539 'n->dynamic_refcount'). */
1542 = ((kind
& typemask
)
1543 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1544 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1545 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1548 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1550 gomp_mutex_unlock (&devicep
->lock
);
1551 gomp_fatal ("outer struct not mapped for attach");
1558 splay_tree_key k
= &array
->key
;
1559 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1560 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1561 k
->host_end
= k
->host_start
+ sizes
[i
];
1563 k
->host_end
= k
->host_start
+ sizeof (void *);
1564 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1565 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1566 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1567 kind
& typemask
, false, implicit
, cbufp
,
1572 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1574 /* Replace target address of the pointer with target address
1575 of mapped object in the splay tree. */
1576 splay_tree_remove (mem_map
, n
);
1578 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1579 k
->aux
->link_key
= n
;
1581 size_t align
= (size_t) 1 << (kind
>> rshift
);
1582 tgt
->list
[i
].key
= k
;
1585 k
->dynamic_refcount
= 0;
1586 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1588 k
->tgt_offset
= k
->host_start
- field_tgt_base
1592 k
->refcount
= REFCOUNT_STRUCTELEM
;
1593 if (field_tgt_structelem_first
== NULL
)
1595 /* Set to first structure element of sequence. */
1596 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1597 field_tgt_structelem_first
= k
;
1600 /* Point to refcount of leading element, but do not
1602 k
->structelem_refcount_ptr
1603 = &field_tgt_structelem_first
->structelem_refcount
;
1605 if (i
== field_tgt_clear
)
1607 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1608 field_tgt_structelem_first
= NULL
;
1611 if (i
== field_tgt_clear
)
1612 field_tgt_clear
= FIELD_TGT_EMPTY
;
1616 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1617 k
->tgt_offset
= tgt_size
;
1618 tgt_size
+= k
->host_end
- k
->host_start
;
1620 /* First increment, from 0 to 1. gomp_increment_refcount
1621 encapsulates the different increment cases, so use this
1622 instead of directly setting 1 during initialization. */
1623 gomp_increment_refcount (k
, refcount_set
);
1625 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1626 tgt
->list
[i
].always_copy_from
1627 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1628 tgt
->list
[i
].is_attach
= false;
1629 tgt
->list
[i
].offset
= 0;
1630 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1633 array
->right
= NULL
;
1634 splay_tree_insert (mem_map
, array
);
1635 switch (kind
& typemask
)
1637 case GOMP_MAP_ALLOC
:
1639 case GOMP_MAP_FORCE_ALLOC
:
1640 case GOMP_MAP_FORCE_FROM
:
1641 case GOMP_MAP_ALWAYS_FROM
:
1644 case GOMP_MAP_TOFROM
:
1645 case GOMP_MAP_FORCE_TO
:
1646 case GOMP_MAP_FORCE_TOFROM
:
1647 case GOMP_MAP_ALWAYS_TO
:
1648 case GOMP_MAP_ALWAYS_TOFROM
:
1649 gomp_copy_host2dev (devicep
, aq
,
1650 (void *) (tgt
->tgt_start
1652 (void *) k
->host_start
,
1653 k
->host_end
- k
->host_start
,
1656 case GOMP_MAP_POINTER
:
1657 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1659 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1660 k
->tgt_offset
, sizes
[i
], cbufp
,
1662 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1664 case GOMP_MAP_TO_PSET
:
1665 gomp_copy_host2dev (devicep
, aq
,
1666 (void *) (tgt
->tgt_start
1668 (void *) k
->host_start
,
1669 k
->host_end
- k
->host_start
,
1671 tgt
->list
[i
].has_null_ptr_assoc
= false;
1673 for (j
= i
+ 1; j
< mapnum
; j
++)
1675 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1677 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1678 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1680 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1681 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1686 tgt
->list
[j
].key
= k
;
1687 tgt
->list
[j
].copy_from
= false;
1688 tgt
->list
[j
].always_copy_from
= false;
1689 tgt
->list
[j
].is_attach
= false;
1690 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1691 /* For OpenMP, the use of refcount_sets causes
1692 errors if we set k->refcount = 1 above but also
1693 increment it again here, for decrementing will
1694 not properly match, since we decrement only once
1695 for each key's refcount. Therefore avoid this
1696 increment for OpenMP constructs. */
1698 gomp_increment_refcount (k
, refcount_set
);
1699 gomp_map_pointer (tgt
, aq
,
1700 (uintptr_t) *(void **) hostaddrs
[j
],
1702 + ((uintptr_t) hostaddrs
[j
]
1704 sizes
[j
], cbufp
, false);
1709 case GOMP_MAP_FORCE_PRESENT
:
1710 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1711 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1712 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1714 /* We already looked up the memory region above and it
1716 size_t size
= k
->host_end
- k
->host_start
;
1717 gomp_mutex_unlock (&devicep
->lock
);
1718 #ifdef HAVE_INTTYPES_H
1719 gomp_fatal ("present clause: not present on the device "
1720 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
1721 "dev: %d)", (void *) k
->host_start
,
1722 (uint64_t) size
, (uint64_t) size
,
1723 devicep
->target_id
);
1725 gomp_fatal ("present clause: not present on the device "
1726 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1727 (void *) k
->host_start
,
1728 (unsigned long) size
, (unsigned long) size
,
1729 devicep
->target_id
);
1733 case GOMP_MAP_FORCE_DEVICEPTR
:
1734 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1735 gomp_copy_host2dev (devicep
, aq
,
1736 (void *) (tgt
->tgt_start
1738 (void *) k
->host_start
,
1739 sizeof (void *), false, cbufp
);
1742 gomp_mutex_unlock (&devicep
->lock
);
1743 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1747 if (k
->aux
&& k
->aux
->link_key
)
1749 /* Set link pointer on target to the device address of the
1751 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1752 /* We intentionally do not use coalescing here, as it's not
1753 data allocated by the current call to this function. */
1754 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1755 &tgt_addr
, sizeof (void *), true, NULL
);
1762 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1764 for (i
= 0; i
< mapnum
; i
++)
1766 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1767 gomp_copy_host2dev (devicep
, aq
,
1768 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1769 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1777 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1778 gomp_copy_host2dev (devicep
, aq
,
1779 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1780 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1781 - cbuf
.chunks
[0].start
),
1782 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1785 /* Free once the transfer has completed. */
1786 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1793 /* If the variable from "omp target enter data" map-list was already mapped,
1794 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1796 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1802 gomp_mutex_unlock (&devicep
->lock
);
1806 static struct target_mem_desc
*
1807 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1808 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1809 bool short_mapkind
, htab_t
*refcount_set
,
1810 enum gomp_map_vars_kind pragma_kind
)
1812 /* This management of a local refcount_set is for convenience of callers
1813 who do not share a refcount_set over multiple map/unmap uses. */
1814 htab_t local_refcount_set
= NULL
;
1815 if (refcount_set
== NULL
)
1817 local_refcount_set
= htab_create (mapnum
);
1818 refcount_set
= &local_refcount_set
;
1821 struct target_mem_desc
*tgt
;
1822 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1823 sizes
, kinds
, short_mapkind
, refcount_set
,
1825 if (local_refcount_set
)
1826 htab_free (local_refcount_set
);
1831 attribute_hidden
struct target_mem_desc
*
1832 goacc_map_vars (struct gomp_device_descr
*devicep
,
1833 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1834 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1835 void *kinds
, bool short_mapkind
,
1836 enum gomp_map_vars_kind pragma_kind
)
1838 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1839 sizes
, kinds
, short_mapkind
, NULL
,
1840 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1844 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1846 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1848 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1855 gomp_unref_tgt (void *ptr
)
1857 bool is_tgt_unmapped
= false;
1859 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1861 if (tgt
->refcount
> 1)
1865 gomp_unmap_tgt (tgt
);
1866 is_tgt_unmapped
= true;
1869 return is_tgt_unmapped
;
1873 gomp_unref_tgt_void (void *ptr
)
1875 (void) gomp_unref_tgt (ptr
);
1879 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1881 splay_tree_remove (sp
, k
);
1884 if (k
->aux
->link_key
)
1885 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1886 if (k
->aux
->attach_count
)
1887 free (k
->aux
->attach_count
);
1893 static inline __attribute__((always_inline
)) bool
1894 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1895 struct goacc_asyncqueue
*aq
)
1897 bool is_tgt_unmapped
= false;
1899 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1901 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1902 /* Infer the splay_tree_key of the first structelem key using the
1903 pointer to the first structleme_refcount. */
1904 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1905 - offsetof (struct splay_tree_key_s
,
1906 structelem_refcount
));
1907 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1909 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1910 with the splay_tree_keys embedded inside. */
1911 splay_tree_node node
=
1912 (splay_tree_node
) ((char *) k
1913 - offsetof (struct splay_tree_node_s
, key
));
1916 /* Starting from the _FIRST key, and continue for all following
1918 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1919 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1926 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1929 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1932 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1933 return is_tgt_unmapped
;
1936 attribute_hidden
bool
1937 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1939 return gomp_remove_var_internal (devicep
, k
, NULL
);
1942 /* Remove a variable asynchronously. This actually removes the variable
1943 mapping immediately, but retains the linked target_mem_desc until the
1944 asynchronous operation has completed (as it may still refer to target
1945 memory). The device lock must be held before entry, and remains locked on
1948 attribute_hidden
void
1949 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1950 struct goacc_asyncqueue
*aq
)
1952 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1955 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1956 variables back from device to host: if it is false, it is assumed that this
1957 has been done already. */
1959 static inline __attribute__((always_inline
)) void
1960 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1961 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1963 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1965 if (tgt
->list_count
== 0)
1971 gomp_mutex_lock (&devicep
->lock
);
1972 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1974 gomp_mutex_unlock (&devicep
->lock
);
1982 /* We must perform detachments before any copies back to the host. */
1983 for (i
= 0; i
< tgt
->list_count
; i
++)
1985 splay_tree_key k
= tgt
->list
[i
].key
;
1987 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1988 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1989 + tgt
->list
[i
].offset
,
1993 for (i
= 0; i
< tgt
->list_count
; i
++)
1995 splay_tree_key k
= tgt
->list
[i
].key
;
1999 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2000 counts ('n->refcount', 'n->dynamic_refcount'). */
2001 if (tgt
->list
[i
].is_attach
)
2004 bool do_copy
, do_remove
;
2005 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2007 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2008 || tgt
->list
[i
].always_copy_from
)
2009 gomp_copy_dev2host (devicep
, aq
,
2010 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2011 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2012 + tgt
->list
[i
].offset
),
2013 tgt
->list
[i
].length
);
2016 struct target_mem_desc
*k_tgt
= k
->tgt
;
2017 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2018 /* It would be bad if TGT got unmapped while we're still iterating
2019 over its LIST_COUNT, and also expect to use it in the following
2021 assert (!is_tgt_unmapped
2027 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2030 gomp_unref_tgt ((void *) tgt
);
2032 gomp_mutex_unlock (&devicep
->lock
);
2036 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2037 htab_t
*refcount_set
)
2039 /* This management of a local refcount_set is for convenience of callers
2040 who do not share a refcount_set over multiple map/unmap uses. */
2041 htab_t local_refcount_set
= NULL
;
2042 if (refcount_set
== NULL
)
2044 local_refcount_set
= htab_create (tgt
->list_count
);
2045 refcount_set
= &local_refcount_set
;
2048 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2050 if (local_refcount_set
)
2051 htab_free (local_refcount_set
);
2054 attribute_hidden
void
2055 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2056 struct goacc_asyncqueue
*aq
)
2058 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2062 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2063 size_t *sizes
, void *kinds
, bool short_mapkind
)
2066 struct splay_tree_key_s cur_node
;
2067 const int typemask
= short_mapkind
? 0xff : 0x7;
2075 gomp_mutex_lock (&devicep
->lock
);
2076 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2078 gomp_mutex_unlock (&devicep
->lock
);
2082 for (i
= 0; i
< mapnum
; i
++)
2085 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2086 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2087 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2090 int kind
= get_kind (short_mapkind
, kinds
, i
);
2091 if (n
->host_start
> cur_node
.host_start
2092 || n
->host_end
< cur_node
.host_end
)
2094 gomp_mutex_unlock (&devicep
->lock
);
2095 gomp_fatal ("Trying to update [%p..%p) object when "
2096 "only [%p..%p) is mapped",
2097 (void *) cur_node
.host_start
,
2098 (void *) cur_node
.host_end
,
2099 (void *) n
->host_start
,
2100 (void *) n
->host_end
);
2103 if (n
->aux
&& n
->aux
->attach_count
)
2105 uintptr_t addr
= cur_node
.host_start
;
2106 while (addr
< cur_node
.host_end
)
2108 /* We have to be careful not to overwrite still attached
2109 pointers during host<->device updates. */
2110 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2111 if (n
->aux
->attach_count
[i
] == 0)
2113 void *devaddr
= (void *) (n
->tgt
->tgt_start
2115 + addr
- n
->host_start
);
2116 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2117 gomp_copy_host2dev (devicep
, NULL
,
2118 devaddr
, (void *) addr
,
2119 sizeof (void *), false, NULL
);
2120 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2121 gomp_copy_dev2host (devicep
, NULL
,
2122 (void *) addr
, devaddr
,
2125 addr
+= sizeof (void *);
2130 void *hostaddr
= (void *) cur_node
.host_start
;
2131 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2132 + cur_node
.host_start
2134 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2136 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2137 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2139 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2140 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2145 int kind
= get_kind (short_mapkind
, kinds
, i
);
2147 if (GOMP_MAP_PRESENT_P (kind
))
2149 /* We already looked up the memory region above and it
2151 gomp_mutex_unlock (&devicep
->lock
);
2152 #ifdef HAVE_INTTYPES_H
2153 gomp_fatal ("present clause: not present on the device "
2154 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
2155 "dev: %d)", (void *) hostaddrs
[i
],
2156 (uint64_t) sizes
[i
], (uint64_t) sizes
[i
],
2157 devicep
->target_id
);
2159 gomp_fatal ("present clause: not present on the device "
2160 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2161 (void *) hostaddrs
[i
], (unsigned long) sizes
[i
],
2162 (unsigned long) sizes
[i
], devicep
->target_id
);
2167 gomp_mutex_unlock (&devicep
->lock
);
2170 static struct gomp_offload_icv_list
*
2171 gomp_get_offload_icv_item (int dev_num
)
2173 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2174 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2180 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2181 depending on the device num and the variable hierarchy
2182 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2183 device and thus no item with that device number is contained in
2184 gomp_offload_icv_list, then a new item is created and added to the list. */
2186 static struct gomp_offload_icvs
*
2187 get_gomp_offload_icvs (int dev_num
)
2189 struct gomp_icv_list
*dev
2190 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2191 struct gomp_icv_list
*all
2192 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2193 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2194 struct gomp_offload_icv_list
*offload_icvs
2195 = gomp_get_offload_icv_item (dev_num
);
2197 if (offload_icvs
!= NULL
)
2198 return &offload_icvs
->icvs
;
2200 struct gomp_offload_icv_list
*new
2201 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2203 new->device_num
= dev_num
;
2204 new->icvs
.device_num
= dev_num
;
2205 new->next
= gomp_offload_icv_list
;
2207 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2208 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2209 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2210 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2211 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2212 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2214 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2217 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2218 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2219 else if (dev
!= NULL
2220 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2221 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2222 else if (all
!= NULL
2223 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2224 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2226 new->icvs
.teams_thread_limit
2227 = gomp_default_icv_values
.teams_thread_limit_var
;
2230 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2231 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2232 else if (dev
!= NULL
2233 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2234 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2235 else if (all
!= NULL
2236 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2237 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2239 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2241 gomp_offload_icv_list
= new;
2245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2246 And insert to splay tree the mapping between addresses from HOST_TABLE and
2247 from loaded target image. We rely in the host and device compiler
2248 emitting variable and functions in the same order. */
2251 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2252 const void *host_table
, const void *target_data
,
2253 bool is_register_lock
)
2255 void **host_func_table
= ((void ***) host_table
)[0];
2256 void **host_funcs_end
= ((void ***) host_table
)[1];
2257 void **host_var_table
= ((void ***) host_table
)[2];
2258 void **host_vars_end
= ((void ***) host_table
)[3];
2259 void **host_ind_func_table
= NULL
;
2260 void **host_ind_funcs_end
= NULL
;
2262 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version
))
2264 host_ind_func_table
= ((void ***) host_table
)[4];
2265 host_ind_funcs_end
= ((void ***) host_table
)[5];
2268 /* The func and ind_func tables contain only addresses, the var table
2269 contains addresses and corresponding sizes. */
2270 int num_funcs
= host_funcs_end
- host_func_table
;
2271 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2272 int num_ind_funcs
= (host_ind_funcs_end
- host_ind_func_table
);
2274 /* Load image to device and get target addresses for the image. */
2275 struct addr_pair
*target_table
= NULL
;
2276 uint64_t *rev_target_fn_table
= NULL
;
2277 int i
, num_target_entries
;
2279 /* With reverse offload, insert also target-host addresses. */
2280 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2283 = devicep
->load_image_func (devicep
->target_id
, version
,
2284 target_data
, &target_table
,
2285 rev_lookup
? &rev_target_fn_table
: NULL
,
2287 ? (uint64_t *) host_ind_func_table
: NULL
);
2289 if (num_target_entries
!= num_funcs
+ num_vars
2290 /* "+1" due to the additional ICV struct. */
2291 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2293 gomp_mutex_unlock (&devicep
->lock
);
2294 if (is_register_lock
)
2295 gomp_mutex_unlock (®ister_lock
);
2296 gomp_fatal ("Cannot map target functions or variables"
2297 " (expected %u, have %u)", num_funcs
+ num_vars
,
2298 num_target_entries
);
2301 /* Insert host-target address mapping into splay tree. */
2302 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2303 /* "+1" due to the additional ICV struct. */
2304 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2305 * sizeof (*tgt
->array
));
2306 if (rev_target_fn_table
)
2307 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2309 tgt
->rev_array
= NULL
;
2310 tgt
->refcount
= REFCOUNT_INFINITY
;
2313 tgt
->to_free
= NULL
;
2315 tgt
->list_count
= 0;
2316 tgt
->device_descr
= devicep
;
2317 splay_tree_node array
= tgt
->array
;
2318 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2320 for (i
= 0; i
< num_funcs
; i
++)
2322 splay_tree_key k
= &array
->key
;
2323 k
->host_start
= (uintptr_t) host_func_table
[i
];
2324 k
->host_end
= k
->host_start
+ 1;
2326 k
->tgt_offset
= target_table
[i
].start
;
2327 k
->refcount
= REFCOUNT_INFINITY
;
2328 k
->dynamic_refcount
= 0;
2331 array
->right
= NULL
;
2332 splay_tree_insert (&devicep
->mem_map
, array
);
2333 if (rev_target_fn_table
)
2335 reverse_splay_tree_key k2
= &rev_array
->key
;
2336 k2
->dev
= rev_target_fn_table
[i
];
2338 rev_array
->left
= NULL
;
2339 rev_array
->right
= NULL
;
2341 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2347 /* Most significant bit of the size in host and target tables marks
2348 "omp declare target link" variables. */
2349 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2350 const uintptr_t size_mask
= ~link_bit
;
2352 for (i
= 0; i
< num_vars
; i
++)
2354 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2355 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2356 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2358 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2360 gomp_mutex_unlock (&devicep
->lock
);
2361 if (is_register_lock
)
2362 gomp_mutex_unlock (®ister_lock
);
2363 gomp_fatal ("Cannot map target variables (size mismatch)");
2366 splay_tree_key k
= &array
->key
;
2367 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2369 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2371 k
->tgt_offset
= target_var
->start
;
2372 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2373 k
->dynamic_refcount
= 0;
2376 array
->right
= NULL
;
2377 splay_tree_insert (&devicep
->mem_map
, array
);
2381 /* Last entry is for a ICVs variable.
2382 Tolerate case where plugin does not return those entries. */
2383 if (num_funcs
+ num_vars
< num_target_entries
)
2385 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2387 /* Start address will be non-zero for the ICVs variable if
2388 the variable was found in this image. */
2389 if (var
->start
!= 0)
2391 /* The index of the devicep within devices[] is regarded as its
2392 'device number', which is different from the per-device type
2393 devicep->target_id. */
2394 int dev_num
= (int) (devicep
- &devices
[0]);
2395 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2396 size_t var_size
= var
->end
- var
->start
;
2397 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2399 gomp_mutex_unlock (&devicep
->lock
);
2400 if (is_register_lock
)
2401 gomp_mutex_unlock (®ister_lock
);
2402 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2405 /* Copy the ICVs variable to place on device memory, hereby
2406 actually designating its device number into effect. */
2407 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2408 var_size
, false, NULL
);
2409 splay_tree_key k
= &array
->key
;
2410 k
->host_start
= (uintptr_t) icvs
;
2412 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2414 k
->tgt_offset
= var
->start
;
2415 k
->refcount
= REFCOUNT_INFINITY
;
2416 k
->dynamic_refcount
= 0;
2419 array
->right
= NULL
;
2420 splay_tree_insert (&devicep
->mem_map
, array
);
2425 free (target_table
);
2428 /* Unload the mappings described by target_data from device DEVICE_P.
2429 The device must be locked. */
2432 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2434 const void *host_table
, const void *target_data
)
2436 void **host_func_table
= ((void ***) host_table
)[0];
2437 void **host_funcs_end
= ((void ***) host_table
)[1];
2438 void **host_var_table
= ((void ***) host_table
)[2];
2439 void **host_vars_end
= ((void ***) host_table
)[3];
2441 /* The func table contains only addresses, the var table contains addresses
2442 and corresponding sizes. */
2443 int num_funcs
= host_funcs_end
- host_func_table
;
2444 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2446 struct splay_tree_key_s k
;
2447 splay_tree_key node
= NULL
;
2449 /* Find mapping at start of node array */
2450 if (num_funcs
|| num_vars
)
2452 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2453 : (uintptr_t) host_var_table
[0]);
2454 k
.host_end
= k
.host_start
+ 1;
2455 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2458 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2460 gomp_mutex_unlock (&devicep
->lock
);
2461 gomp_fatal ("image unload fail");
2463 if (devicep
->mem_map_rev
.root
)
2465 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2467 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2468 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2469 free (node
->tgt
->rev_array
);
2470 devicep
->mem_map_rev
.root
= NULL
;
2473 /* Remove mappings from splay tree. */
2475 for (i
= 0; i
< num_funcs
; i
++)
2477 k
.host_start
= (uintptr_t) host_func_table
[i
];
2478 k
.host_end
= k
.host_start
+ 1;
2479 splay_tree_remove (&devicep
->mem_map
, &k
);
2482 /* Most significant bit of the size in host and target tables marks
2483 "omp declare target link" variables. */
2484 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2485 const uintptr_t size_mask
= ~link_bit
;
2486 bool is_tgt_unmapped
= false;
2488 for (i
= 0; i
< num_vars
; i
++)
2490 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2492 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2494 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2495 splay_tree_remove (&devicep
->mem_map
, &k
);
2498 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2499 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2503 if (node
&& !is_tgt_unmapped
)
2511 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2513 char *end
= buf
+ size
, *p
= buf
;
2514 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2515 p
+= snprintf (p
, end
- p
, "unified_address");
2516 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2517 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2518 (p
== buf
? "" : ", "));
2519 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2520 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2521 (p
== buf
? "" : ", "));
2524 /* This function should be called from every offload image while loading.
2525 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2526 the target, and DATA. */
2529 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2530 int target_type
, const void *data
)
2534 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2535 gomp_fatal ("Library too old for offload (version %u < %u)",
2536 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2539 const void *target_data
;
2540 if (GOMP_VERSION_LIB (version
) > 1)
2542 omp_req
= (int) (size_t) ((void **) data
)[0];
2543 target_data
= &((void **) data
)[1];
2551 gomp_mutex_lock (®ister_lock
);
2553 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2555 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2556 "reverse_offload")];
2557 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2558 "reverse_offload")];
2559 gomp_requires_to_name (buf2
, sizeof (buf2
),
2560 omp_req
!= GOMP_REQUIRES_TARGET_USED
2561 ? omp_req
: omp_requires_mask
);
2562 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2563 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2565 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2566 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2567 "in multiple compilation units: '%s' vs. '%s'",
2571 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2572 "some compilation units", buf2
);
2574 omp_requires_mask
= omp_req
;
2576 /* Load image to all initialized devices. */
2577 for (i
= 0; i
< num_devices
; i
++)
2579 struct gomp_device_descr
*devicep
= &devices
[i
];
2580 gomp_mutex_lock (&devicep
->lock
);
2581 if (devicep
->type
== target_type
2582 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2583 gomp_load_image_to_device (devicep
, version
,
2584 host_table
, target_data
, true);
2585 gomp_mutex_unlock (&devicep
->lock
);
2588 /* Insert image to array of pending images. */
2590 = gomp_realloc_unlock (offload_images
,
2591 (num_offload_images
+ 1)
2592 * sizeof (struct offload_image_descr
));
2593 offload_images
[num_offload_images
].version
= version
;
2594 offload_images
[num_offload_images
].type
= target_type
;
2595 offload_images
[num_offload_images
].host_table
= host_table
;
2596 offload_images
[num_offload_images
].target_data
= target_data
;
2598 num_offload_images
++;
2599 gomp_mutex_unlock (®ister_lock
);
2602 /* Legacy entry point. */
2605 GOMP_offload_register (const void *host_table
, int target_type
,
2606 const void *target_data
)
2608 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2611 /* This function should be called from every offload image while unloading.
2612 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2613 the target, and DATA. */
2616 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2617 int target_type
, const void *data
)
2621 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2622 gomp_fatal ("Library too old for offload (version %u < %u)",
2623 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2625 const void *target_data
;
2626 if (GOMP_VERSION_LIB (version
) > 1)
2627 target_data
= &((void **) data
)[1];
2631 gomp_mutex_lock (®ister_lock
);
2633 /* Unload image from all initialized devices. */
2634 for (i
= 0; i
< num_devices
; i
++)
2636 struct gomp_device_descr
*devicep
= &devices
[i
];
2637 gomp_mutex_lock (&devicep
->lock
);
2638 if (devicep
->type
== target_type
2639 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2640 gomp_unload_image_from_device (devicep
, version
,
2641 host_table
, target_data
);
2642 gomp_mutex_unlock (&devicep
->lock
);
2645 /* Remove image from array of pending images. */
2646 for (i
= 0; i
< num_offload_images
; i
++)
2647 if (offload_images
[i
].target_data
== target_data
)
2649 offload_images
[i
] = offload_images
[--num_offload_images
];
2653 gomp_mutex_unlock (®ister_lock
);
2656 /* Legacy entry point. */
2659 GOMP_offload_unregister (const void *host_table
, int target_type
,
2660 const void *target_data
)
2662 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2665 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2666 must be locked on entry, and remains locked on return. */
2668 attribute_hidden
void
2669 gomp_init_device (struct gomp_device_descr
*devicep
)
2672 if (!devicep
->init_device_func (devicep
->target_id
))
2674 gomp_mutex_unlock (&devicep
->lock
);
2675 gomp_fatal ("device initialization failed");
2678 /* Load to device all images registered by the moment. */
2679 for (i
= 0; i
< num_offload_images
; i
++)
2681 struct offload_image_descr
*image
= &offload_images
[i
];
2682 if (image
->type
== devicep
->type
)
2683 gomp_load_image_to_device (devicep
, image
->version
,
2684 image
->host_table
, image
->target_data
,
2688 /* Initialize OpenACC asynchronous queues. */
2689 goacc_init_asyncqueues (devicep
);
2691 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2694 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2695 must be locked on entry, and remains locked on return. */
2697 attribute_hidden
bool
2698 gomp_fini_device (struct gomp_device_descr
*devicep
)
2700 bool ret
= goacc_fini_asyncqueues (devicep
);
2701 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2702 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2706 attribute_hidden
void
2707 gomp_unload_device (struct gomp_device_descr
*devicep
)
2709 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2713 /* Unload from device all images registered at the moment. */
2714 for (i
= 0; i
< num_offload_images
; i
++)
2716 struct offload_image_descr
*image
= &offload_images
[i
];
2717 if (image
->type
== devicep
->type
)
2718 gomp_unload_image_from_device (devicep
, image
->version
,
2720 image
->target_data
);
2725 /* Host fallback for GOMP_target{,_ext} routines. */
2728 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2729 struct gomp_device_descr
*devicep
, void **args
)
2731 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2733 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2735 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2736 "be used for offloading");
2739 memset (thr
, '\0', sizeof (*thr
));
2740 if (gomp_places_list
)
2742 thr
->place
= old_thr
.place
;
2743 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2748 intptr_t id
= (intptr_t) *args
++, val
;
2749 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2750 val
= (intptr_t) *args
++;
2752 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2753 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2755 id
&= GOMP_TARGET_ARG_ID_MASK
;
2756 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2758 val
= val
> INT_MAX
? INT_MAX
: val
;
2760 gomp_icv (true)->thread_limit_var
= val
;
2765 gomp_free_thread (thr
);
2769 /* Calculate alignment and size requirements of a private copy of data shared
2770 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2773 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2774 unsigned short *kinds
, size_t *tgt_align
,
2778 for (i
= 0; i
< mapnum
; i
++)
2779 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2781 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2782 if (*tgt_align
< align
)
2784 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2785 *tgt_size
+= sizes
[i
];
2789 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2792 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2793 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2796 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2798 tgt
+= tgt_align
- al
;
2801 for (i
= 0; i
< mapnum
; i
++)
2802 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2804 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2805 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2806 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2807 hostaddrs
[i
] = tgt
+ tgt_size
;
2808 tgt_size
= tgt_size
+ sizes
[i
];
2809 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2811 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2817 /* Helper function of GOMP_target{,_ext} routines. */
2820 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2821 void (*host_fn
) (void *))
2823 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2824 return (void *) host_fn
;
2827 gomp_mutex_lock (&devicep
->lock
);
2828 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2830 gomp_mutex_unlock (&devicep
->lock
);
2834 struct splay_tree_key_s k
;
2835 k
.host_start
= (uintptr_t) host_fn
;
2836 k
.host_end
= k
.host_start
+ 1;
2837 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2838 gomp_mutex_unlock (&devicep
->lock
);
2842 return (void *) tgt_fn
->tgt_offset
;
2846 /* Called when encountering a target directive. If DEVICE
2847 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2848 GOMP_DEVICE_HOST_FALLBACK (or any value
2849 larger than last available hw device), use host fallback.
2850 FN is address of host code, UNUSED is part of the current ABI, but
2851 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2852 with MAPNUM entries, with addresses of the host objects,
2853 sizes of the host objects (resp. for pointer kind pointer bias
2854 and assumed sizeof (void *) size) and kinds. */
2857 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2858 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2859 unsigned char *kinds
)
2861 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2865 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2866 /* All shared memory devices should use the GOMP_target_ext function. */
2867 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2868 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2869 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2871 htab_t refcount_set
= htab_create (mapnum
);
2872 struct target_mem_desc
*tgt_vars
2873 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2874 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2875 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2877 htab_clear (refcount_set
);
2878 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2879 htab_free (refcount_set
);
2882 static inline unsigned int
2883 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2885 /* If we cannot run asynchronously, simply ignore nowait. */
2886 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2887 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2893 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2895 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2899 void *host_ptr
= &item
->icvs
;
2900 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2901 if (dev_ptr
!= NULL
)
2902 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2903 sizeof (struct gomp_offload_icvs
));
2906 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2907 and several arguments have been added:
2908 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2909 DEPEND is array of dependencies, see GOMP_task for details.
2911 ARGS is a pointer to an array consisting of a variable number of both
2912 device-independent and device-specific arguments, which can take one two
2913 elements where the first specifies for which device it is intended, the type
2914 and optionally also the value. If the value is not present in the first
2915 one, the whole second element the actual value. The last element of the
2916 array is a single NULL. Among the device independent can be for example
2917 NUM_TEAMS and THREAD_LIMIT.
2919 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2920 that value, or 1 if teams construct is not present, or 0, if
2921 teams construct does not have num_teams clause and so the choice is
2922 implementation defined, and -1 if it can't be determined on the host
2923 what value will GOMP_teams have on the device.
2924 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2925 body with that value, or 0, if teams construct does not have thread_limit
2926 clause or the teams construct is not present, or -1 if it can't be
2927 determined on the host what value will GOMP_teams have on the device. */
2930 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2931 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2932 unsigned int flags
, void **depend
, void **args
)
2934 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2935 size_t tgt_align
= 0, tgt_size
= 0;
2936 bool fpc_done
= false;
2938 /* Obtain the original TEAMS and THREADS values from ARGS. */
2939 intptr_t orig_teams
= 1, orig_threads
= 0;
2940 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2941 void **tmpargs
= args
;
2944 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2945 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2947 val
= (intptr_t) *tmpargs
++;
2952 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2956 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2958 val
= val
> INT_MAX
? INT_MAX
: val
;
2959 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2964 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2971 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2972 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2973 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2974 value could not be determined. No change.
2975 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2976 Set device-specific value.
2977 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2979 if (orig_teams
== -2)
2981 else if (orig_teams
== 0)
2983 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2985 new_teams
= item
->icvs
.nteams
;
2987 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2988 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2989 e.g. a THREAD_LIMIT clause. */
2990 if (orig_teams
> -2 && orig_threads
== 0)
2992 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2994 new_threads
= item
->icvs
.teams_thread_limit
;
2997 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2999 void **new_args
= args
;
3000 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3002 size_t tms_len
= (orig_teams
== new_teams
3004 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3006 size_t ths_len
= (orig_threads
== new_threads
3008 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3010 /* One additional item after the last arg must be NULL. */
3011 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3013 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3016 void **tmp_new_args
= new_args
;
3017 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3018 too if they have not been changed and skipped otherwise. */
3021 intptr_t id
= (intptr_t) *tmpargs
;
3022 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3023 && orig_teams
!= new_teams
)
3024 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3025 && orig_threads
!= new_threads
))
3028 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3033 *tmp_new_args
++ = *tmpargs
++;
3034 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3035 *tmp_new_args
++ = *tmpargs
++;
3039 /* Add the new TEAMS arg to the new args list if it has been changed. */
3040 if (orig_teams
!= new_teams
)
3042 intptr_t new_val
= new_teams
;
3045 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3046 | GOMP_TARGET_ARG_NUM_TEAMS
;
3047 *tmp_new_args
++ = (void *) new_val
;
3051 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3052 | GOMP_TARGET_ARG_NUM_TEAMS
);
3053 *tmp_new_args
++ = (void *) new_val
;
3057 /* Add the new THREADS arg to the new args list if it has been changed. */
3058 if (orig_threads
!= new_threads
)
3060 intptr_t new_val
= new_threads
;
3063 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3064 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3065 *tmp_new_args
++ = (void *) new_val
;
3069 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3070 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3071 *tmp_new_args
++ = (void *) new_val
;
3075 *tmp_new_args
= NULL
;
3078 flags
= clear_unsupported_flags (devicep
, flags
);
3080 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3082 struct gomp_thread
*thr
= gomp_thread ();
3083 /* Create a team if we don't have any around, as nowait
3084 target tasks make sense to run asynchronously even when
3085 outside of any parallel. */
3086 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3088 struct gomp_team
*team
= gomp_new_team (1);
3089 struct gomp_task
*task
= thr
->task
;
3090 struct gomp_task
**implicit_task
= &task
;
3091 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3092 team
->prev_ts
= thr
->ts
;
3093 thr
->ts
.team
= team
;
3094 thr
->ts
.team_id
= 0;
3095 thr
->ts
.work_share
= &team
->work_shares
[0];
3096 thr
->ts
.last_work_share
= NULL
;
3097 #ifdef HAVE_SYNC_BUILTINS
3098 thr
->ts
.single_count
= 0;
3100 thr
->ts
.static_trip
= 0;
3101 thr
->task
= &team
->implicit_task
[0];
3102 gomp_init_task (thr
->task
, NULL
, icv
);
3103 while (*implicit_task
3104 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3105 implicit_task
= &(*implicit_task
)->parent
;
3108 thr
->task
= *implicit_task
;
3110 free (*implicit_task
);
3111 thr
->task
= &team
->implicit_task
[0];
3114 pthread_setspecific (gomp_thread_destructor
, thr
);
3115 if (implicit_task
!= &task
)
3117 *implicit_task
= thr
->task
;
3122 && !thr
->task
->final_task
)
3124 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3125 sizes
, kinds
, flags
, depend
, new_args
,
3126 GOMP_TARGET_TASK_BEFORE_MAP
);
3131 /* If there are depend clauses, but nowait is not present
3132 (or we are in a final task), block the parent task until the
3133 dependencies are resolved and then just continue with the rest
3134 of the function as if it is a merged task. */
3137 struct gomp_thread
*thr
= gomp_thread ();
3138 if (thr
->task
&& thr
->task
->depend_hash
)
3140 /* If we might need to wait, copy firstprivate now. */
3141 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3142 &tgt_align
, &tgt_size
);
3145 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3146 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3147 tgt_align
, tgt_size
);
3150 gomp_task_maybe_wait_for_dependencies (depend
);
3156 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3157 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3158 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3162 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3163 &tgt_align
, &tgt_size
);
3166 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3167 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3168 tgt_align
, tgt_size
);
3171 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3175 struct target_mem_desc
*tgt_vars
;
3176 htab_t refcount_set
= NULL
;
3178 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3182 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3183 &tgt_align
, &tgt_size
);
3186 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3187 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3188 tgt_align
, tgt_size
);
3195 refcount_set
= htab_create (mapnum
);
3196 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3197 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3199 devicep
->run_func (devicep
->target_id
, fn_addr
,
3200 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3204 htab_clear (refcount_set
);
3205 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3208 htab_free (refcount_set
);
3210 /* Copy back ICVs from device to host.
3211 HOST_PTR is expected to exist since it was added in
3212 gomp_load_image_to_device if not already available. */
3213 gomp_copy_back_icvs (devicep
, device
);
3218 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3219 keeping track of all variable handling - assuming that reverse offload occurs
3220 ony very rarely. Downside is that the reverse search is slow. */
3222 struct gomp_splay_tree_rev_lookup_data
{
3223 uintptr_t tgt_start
;
3229 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3231 struct gomp_splay_tree_rev_lookup_data
*data
;
3232 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3233 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3235 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3239 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3240 if (key
->tgt
->list
[j
].key
== key
)
3242 assert (j
< key
->tgt
->list_count
);
3243 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3245 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3246 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3254 static inline splay_tree_key
3255 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3258 struct gomp_splay_tree_rev_lookup_data data
;
3260 data
.tgt_start
= tgt_start
;
3261 data
.tgt_end
= tgt_end
;
3263 if (tgt_start
!= tgt_end
)
3265 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3270 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3271 if (data
.key
!= NULL
|| zero_len
)
3276 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3283 bool present
, aligned
;
3287 /* Search just mapped reverse-offload data; returns index if found,
3291 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3292 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3293 uint64_t tgt_start
, uint64_t tgt_end
)
3295 const bool short_mapkind
= true;
3296 const int typemask
= short_mapkind
? 0xff : 0x7;
3298 for (i
= 0; i
< n
; i
++)
3300 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3301 == GOMP_MAP_STRUCT
);
3304 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3307 if (i
+ sizes
[i
] < n
)
3308 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3310 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3312 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3313 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3322 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3323 unsigned short *kinds
, uint64_t *sizes
,
3324 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3328 if (tgt_start
!= tgt_end
)
3329 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3330 tgt_start
, tgt_end
);
3332 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3333 tgt_start
, tgt_end
);
3334 if (i
< n
|| zero_len
)
3339 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3340 tgt_start
, tgt_end
);
3343 /* Handle reverse offload. This is called by the device plugins for a
3344 reverse offload; it is not called if the outer target runs on the host.
3345 The mapping is simplified device-affecting constructs (except for target
3346 with device(ancestor:1)) must not be encountered; in particular not
3347 target (enter/exit) data. */
3350 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3351 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3352 struct goacc_asyncqueue
*aq
)
3354 /* Return early if there is no offload code. */
3355 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3357 /* Currently, this fails because of calculate_firstprivate_requirements
3358 below; it could be fixed but additional code needs to be updated to
3359 handle 32bit hosts - thus, it is not worthwhile. */
3360 if (sizeof (void *) != sizeof (uint64_t))
3361 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3363 struct cpy_data
*cdata
= NULL
;
3366 unsigned short *kinds
;
3367 const bool short_mapkind
= true;
3368 const int typemask
= short_mapkind
? 0xff : 0x7;
3369 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3371 reverse_splay_tree_key n
;
3372 struct reverse_splay_tree_key_s k
;
3375 gomp_mutex_lock (&devicep
->lock
);
3376 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3377 gomp_mutex_unlock (&devicep
->lock
);
3380 gomp_fatal ("Cannot find reverse-offload function");
3381 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3383 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3385 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3386 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3387 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3391 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3392 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3393 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3394 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3395 (const void *) (uintptr_t) devaddrs_ptr
,
3396 mapnum
* sizeof (uint64_t));
3397 gomp_copy_dev2host (devicep
, aq
, sizes
,
3398 (const void *) (uintptr_t) sizes_ptr
,
3399 mapnum
* sizeof (uint64_t));
3400 gomp_copy_dev2host (devicep
, aq
, kinds
,
3401 (const void *) (uintptr_t) kinds_ptr
,
3402 mapnum
* sizeof (unsigned short));
3403 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3404 exit (EXIT_FAILURE
);
3407 size_t tgt_align
= 0, tgt_size
= 0;
3409 /* If actually executed on 32bit systems, the casts lead to wrong code;
3410 but 32bit with offloading is not supported; see top of this function. */
3411 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3412 (void *) (uintptr_t) kinds
,
3413 &tgt_align
, &tgt_size
);
3417 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3418 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3420 tgt
+= tgt_align
- al
;
3422 for (uint64_t i
= 0; i
< mapnum
; i
++)
3423 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3424 && devaddrs
[i
] != 0)
3426 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3427 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3428 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3429 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3433 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3434 (void *) (uintptr_t) devaddrs
[i
],
3436 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3437 exit (EXIT_FAILURE
);
3439 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3440 tgt_size
= tgt_size
+ sizes
[i
];
3441 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3443 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3444 == GOMP_MAP_ATTACH
))
3446 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3447 = (uint64_t) devaddrs
[i
];
3453 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3455 size_t j
, struct_cpy
= 0;
3457 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3458 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3459 gomp_mutex_lock (&devicep
->lock
);
3460 for (uint64_t i
= 0; i
< mapnum
; i
++)
3462 if (devaddrs
[i
] == 0)
3465 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3468 case GOMP_MAP_FIRSTPRIVATE
:
3469 case GOMP_MAP_FIRSTPRIVATE_INT
:
3472 case GOMP_MAP_DELETE
:
3473 case GOMP_MAP_RELEASE
:
3474 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3475 /* Assume it is present; look it up - but ignore unless the
3476 present clause is there. */
3477 case GOMP_MAP_ALLOC
:
3479 case GOMP_MAP_FORCE_ALLOC
:
3480 case GOMP_MAP_FORCE_FROM
:
3481 case GOMP_MAP_ALWAYS_FROM
:
3483 case GOMP_MAP_TOFROM
:
3484 case GOMP_MAP_FORCE_TO
:
3485 case GOMP_MAP_FORCE_TOFROM
:
3486 case GOMP_MAP_ALWAYS_TO
:
3487 case GOMP_MAP_ALWAYS_TOFROM
:
3488 case GOMP_MAP_FORCE_PRESENT
:
3489 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3490 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3491 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3492 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3493 cdata
[i
].devaddr
= devaddrs
[i
];
3494 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3495 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3496 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3498 devaddrs
[i
] + sizes
[i
], zero_len
);
3502 cdata
[i
].present
= true;
3503 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3507 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3509 devaddrs
[i
] + sizes
[i
], zero_len
);
3510 cdata
[i
].present
= n2
!= NULL
;
3512 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3514 gomp_mutex_unlock (&devicep
->lock
);
3515 #ifdef HAVE_INTTYPES_H
3516 gomp_fatal ("present clause: no corresponding data on "
3517 "parent device at %p with size %"PRIu64
,
3518 (void *) (uintptr_t) devaddrs
[i
],
3519 (uint64_t) sizes
[i
]);
3521 gomp_fatal ("present clause: no corresponding data on "
3522 "parent device at %p with size %lu",
3523 (void *) (uintptr_t) devaddrs
[i
],
3524 (unsigned long) sizes
[i
]);
3528 else if (!cdata
[i
].present
3529 && kind
!= GOMP_MAP_DELETE
3530 && kind
!= GOMP_MAP_RELEASE
3531 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3533 cdata
[i
].aligned
= true;
3534 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3536 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3539 else if (n2
!= NULL
)
3540 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3541 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3542 if (((!cdata
[i
].present
|| struct_cpy
)
3543 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3544 || kind
== GOMP_MAP_FORCE_TO
3545 || kind
== GOMP_MAP_FORCE_TOFROM
3546 || GOMP_MAP_ALWAYS_TO_P (kind
))
3548 gomp_copy_dev2host (devicep
, aq
,
3549 (void *) (uintptr_t) devaddrs
[i
],
3550 (void *) (uintptr_t) cdata
[i
].devaddr
,
3552 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3554 gomp_mutex_unlock (&devicep
->lock
);
3555 exit (EXIT_FAILURE
);
3561 case GOMP_MAP_ATTACH
:
3562 case GOMP_MAP_POINTER
:
3563 case GOMP_MAP_ALWAYS_POINTER
:
3564 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3565 devaddrs
[i
] + sizes
[i
],
3566 devaddrs
[i
] + sizes
[i
]
3567 + sizeof (void*), false);
3568 cdata
[i
].present
= n2
!= NULL
;
3569 cdata
[i
].devaddr
= devaddrs
[i
];
3571 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3572 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3575 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3576 devaddrs
[i
] + sizes
[i
],
3577 devaddrs
[i
] + sizes
[i
]
3578 + sizeof (void*), false);
3581 cdata
[i
].present
= true;
3582 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3583 - cdata
[j
].devaddr
);
3586 if (!cdata
[i
].present
)
3587 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3588 /* Assume that when present, the pointer is already correct. */
3590 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3593 case GOMP_MAP_TO_PSET
:
3594 /* Assume that when present, the pointers are fine and no 'to:'
3596 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3597 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3599 cdata
[i
].present
= n2
!= NULL
;
3600 cdata
[i
].devaddr
= devaddrs
[i
];
3602 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3603 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3606 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3608 devaddrs
[i
] + sizes
[i
], false);
3611 cdata
[i
].present
= true;
3612 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3613 - cdata
[j
].devaddr
);
3616 if (!cdata
[i
].present
)
3618 cdata
[i
].aligned
= true;
3619 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3621 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3623 gomp_copy_dev2host (devicep
, aq
,
3624 (void *) (uintptr_t) devaddrs
[i
],
3625 (void *) (uintptr_t) cdata
[i
].devaddr
,
3627 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3629 gomp_mutex_unlock (&devicep
->lock
);
3630 exit (EXIT_FAILURE
);
3633 for (j
= i
+ 1; j
< mapnum
; j
++)
3635 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3636 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3637 && !GOMP_MAP_POINTER_P (kind
))
3639 if (devaddrs
[j
] < devaddrs
[i
])
3641 if (cdata
[i
].present
)
3643 if (devaddrs
[j
] == 0)
3645 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3650 /* Dereference devaddrs[j] to get the device addr. */
3651 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3652 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3654 cdata
[j
].present
= true;
3655 cdata
[j
].devaddr
= devaddrs
[j
];
3656 if (devaddrs
[j
] == 0)
3658 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3660 devaddrs
[j
] + sizeof (void*),
3663 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3664 - cdata
[k
].devaddr
);
3667 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3669 devaddrs
[j
] + sizeof (void*),
3673 gomp_mutex_unlock (&devicep
->lock
);
3674 gomp_fatal ("Pointer target wasn't mapped");
3676 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3677 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3679 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3680 = (void *) (uintptr_t) devaddrs
[j
];
3684 case GOMP_MAP_STRUCT
:
3685 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3686 devaddrs
[i
+ sizes
[i
]]
3687 + sizes
[i
+ sizes
[i
]], false);
3688 cdata
[i
].present
= n2
!= NULL
;
3689 cdata
[i
].devaddr
= devaddrs
[i
];
3690 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3693 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3695 + sizes
[i
+ sizes
[i
]]);
3696 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3697 cdata
[i
].aligned
= true;
3698 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3699 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3702 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3703 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3706 gomp_mutex_unlock (&devicep
->lock
);
3707 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3710 gomp_mutex_unlock (&devicep
->lock
);
3715 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3717 uint64_t struct_cpy
= 0;
3718 bool clean_struct
= false;
3719 for (uint64_t i
= 0; i
< mapnum
; i
++)
3721 if (cdata
[i
].devaddr
== 0)
3723 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3724 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3727 case GOMP_MAP_FORCE_FROM
:
3728 case GOMP_MAP_FORCE_TOFROM
:
3729 case GOMP_MAP_ALWAYS_FROM
:
3730 case GOMP_MAP_ALWAYS_TOFROM
:
3731 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3732 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3736 case GOMP_MAP_TOFROM
:
3739 gomp_copy_host2dev (devicep
, aq
,
3740 (void *) (uintptr_t) cdata
[i
].devaddr
,
3741 (void *) (uintptr_t) devaddrs
[i
],
3742 sizes
[i
], false, NULL
);
3743 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3744 exit (EXIT_FAILURE
);
3754 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3756 clean_struct
= true;
3757 struct_cpy
= sizes
[i
];
3759 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3760 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3761 else if (!cdata
[i
].present
)
3762 free ((void *) (uintptr_t) devaddrs
[i
]);
3765 for (uint64_t i
= 0; i
< mapnum
; i
++)
3766 if (!cdata
[i
].present
3767 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3768 == GOMP_MAP_STRUCT
))
3770 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3771 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3780 /* Host fallback for GOMP_target_data{,_ext} routines. */
3783 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3785 struct gomp_task_icv
*icv
= gomp_icv (false);
3787 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3789 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3790 "be used for offloading");
3792 if (icv
->target_data
)
3794 /* Even when doing a host fallback, if there are any active
3795 #pragma omp target data constructs, need to remember the
3796 new #pragma omp target data, otherwise GOMP_target_end_data
3797 would get out of sync. */
3798 struct target_mem_desc
*tgt
3799 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3800 NULL
, GOMP_MAP_VARS_DATA
);
3801 tgt
->prev
= icv
->target_data
;
3802 icv
->target_data
= tgt
;
3807 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3808 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3810 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3813 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3814 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3815 return gomp_target_data_fallback (devicep
);
3817 struct target_mem_desc
*tgt
3818 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3819 NULL
, GOMP_MAP_VARS_DATA
);
3820 struct gomp_task_icv
*icv
= gomp_icv (true);
3821 tgt
->prev
= icv
->target_data
;
3822 icv
->target_data
= tgt
;
3826 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3827 size_t *sizes
, unsigned short *kinds
)
3829 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3832 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3833 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3834 return gomp_target_data_fallback (devicep
);
3836 struct target_mem_desc
*tgt
3837 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3838 NULL
, GOMP_MAP_VARS_DATA
);
3839 struct gomp_task_icv
*icv
= gomp_icv (true);
3840 tgt
->prev
= icv
->target_data
;
3841 icv
->target_data
= tgt
;
3845 GOMP_target_end_data (void)
3847 struct gomp_task_icv
*icv
= gomp_icv (false);
3848 if (icv
->target_data
)
3850 struct target_mem_desc
*tgt
= icv
->target_data
;
3851 icv
->target_data
= tgt
->prev
;
3852 gomp_unmap_vars (tgt
, true, NULL
);
3857 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3858 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3860 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3863 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3864 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3867 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3871 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3872 size_t *sizes
, unsigned short *kinds
,
3873 unsigned int flags
, void **depend
)
3875 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3877 /* If there are depend clauses, but nowait is not present,
3878 block the parent task until the dependencies are resolved
3879 and then just continue with the rest of the function as if it
3880 is a merged task. Until we are able to schedule task during
3881 variable mapping or unmapping, ignore nowait if depend clauses
3885 struct gomp_thread
*thr
= gomp_thread ();
3886 if (thr
->task
&& thr
->task
->depend_hash
)
3888 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3890 && !thr
->task
->final_task
)
3892 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3893 mapnum
, hostaddrs
, sizes
, kinds
,
3894 flags
| GOMP_TARGET_FLAG_UPDATE
,
3895 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3900 struct gomp_team
*team
= thr
->ts
.team
;
3901 /* If parallel or taskgroup has been cancelled, don't start new
3903 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3905 if (gomp_team_barrier_cancelled (&team
->barrier
))
3907 if (thr
->task
->taskgroup
)
3909 if (thr
->task
->taskgroup
->cancelled
)
3911 if (thr
->task
->taskgroup
->workshare
3912 && thr
->task
->taskgroup
->prev
3913 && thr
->task
->taskgroup
->prev
->cancelled
)
3918 gomp_task_maybe_wait_for_dependencies (depend
);
3924 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3925 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3928 struct gomp_thread
*thr
= gomp_thread ();
3929 struct gomp_team
*team
= thr
->ts
.team
;
3930 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3931 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3933 if (gomp_team_barrier_cancelled (&team
->barrier
))
3935 if (thr
->task
->taskgroup
)
3937 if (thr
->task
->taskgroup
->cancelled
)
3939 if (thr
->task
->taskgroup
->workshare
3940 && thr
->task
->taskgroup
->prev
3941 && thr
->task
->taskgroup
->prev
->cancelled
)
3946 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3950 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3951 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3952 htab_t
*refcount_set
)
3954 const int typemask
= 0xff;
3956 gomp_mutex_lock (&devicep
->lock
);
3957 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3959 gomp_mutex_unlock (&devicep
->lock
);
3963 for (i
= 0; i
< mapnum
; i
++)
3964 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3966 struct splay_tree_key_s cur_node
;
3967 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3968 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3969 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3972 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3977 splay_tree_key remove_vars
[mapnum
];
3979 for (i
= 0; i
< mapnum
; i
++)
3981 struct splay_tree_key_s cur_node
;
3982 unsigned char kind
= kinds
[i
] & typemask
;
3986 case GOMP_MAP_ALWAYS_FROM
:
3987 case GOMP_MAP_DELETE
:
3988 case GOMP_MAP_RELEASE
:
3989 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3990 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3991 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3992 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3993 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3994 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3995 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3996 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4000 bool delete_p
= (kind
== GOMP_MAP_DELETE
4001 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4002 bool do_copy
, do_remove
;
4003 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4006 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4007 || kind
== GOMP_MAP_ALWAYS_FROM
)
4009 if (k
->aux
&& k
->aux
->attach_count
)
4011 /* We have to be careful not to overwrite still attached
4012 pointers during the copyback to host. */
4013 uintptr_t addr
= k
->host_start
;
4014 while (addr
< k
->host_end
)
4016 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4017 if (k
->aux
->attach_count
[i
] == 0)
4018 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4019 (void *) (k
->tgt
->tgt_start
4021 + addr
- k
->host_start
),
4023 addr
+= sizeof (void *);
4027 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4028 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4029 + cur_node
.host_start
4031 cur_node
.host_end
- cur_node
.host_start
);
4034 /* Structure elements lists are removed altogether at once, which
4035 may cause immediate deallocation of the target_mem_desc, causing
4036 errors if we still have following element siblings to copy back.
4037 While we're at it, it also seems more disciplined to simply
4038 queue all removals together for processing below.
4040 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4041 not have this problem, since they maintain an additional
4042 tgt->refcount = 1 reference to the target_mem_desc to start with.
4045 remove_vars
[nrmvars
++] = k
;
4048 case GOMP_MAP_DETACH
:
4051 gomp_mutex_unlock (&devicep
->lock
);
4052 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4057 for (int i
= 0; i
< nrmvars
; i
++)
4058 gomp_remove_var (devicep
, remove_vars
[i
]);
4060 gomp_mutex_unlock (&devicep
->lock
);
4064 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4065 size_t *sizes
, unsigned short *kinds
,
4066 unsigned int flags
, void **depend
)
4068 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4070 /* If there are depend clauses, but nowait is not present,
4071 block the parent task until the dependencies are resolved
4072 and then just continue with the rest of the function as if it
4073 is a merged task. Until we are able to schedule task during
4074 variable mapping or unmapping, ignore nowait if depend clauses
4078 struct gomp_thread
*thr
= gomp_thread ();
4079 if (thr
->task
&& thr
->task
->depend_hash
)
4081 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4083 && !thr
->task
->final_task
)
4085 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4086 mapnum
, hostaddrs
, sizes
, kinds
,
4087 flags
, depend
, NULL
,
4088 GOMP_TARGET_TASK_DATA
))
4093 struct gomp_team
*team
= thr
->ts
.team
;
4094 /* If parallel or taskgroup has been cancelled, don't start new
4096 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4098 if (gomp_team_barrier_cancelled (&team
->barrier
))
4100 if (thr
->task
->taskgroup
)
4102 if (thr
->task
->taskgroup
->cancelled
)
4104 if (thr
->task
->taskgroup
->workshare
4105 && thr
->task
->taskgroup
->prev
4106 && thr
->task
->taskgroup
->prev
->cancelled
)
4111 gomp_task_maybe_wait_for_dependencies (depend
);
4117 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4118 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4121 struct gomp_thread
*thr
= gomp_thread ();
4122 struct gomp_team
*team
= thr
->ts
.team
;
4123 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4124 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4126 if (gomp_team_barrier_cancelled (&team
->barrier
))
4128 if (thr
->task
->taskgroup
)
4130 if (thr
->task
->taskgroup
->cancelled
)
4132 if (thr
->task
->taskgroup
->workshare
4133 && thr
->task
->taskgroup
->prev
4134 && thr
->task
->taskgroup
->prev
->cancelled
)
4139 htab_t refcount_set
= htab_create (mapnum
);
4141 /* The variables are mapped separately such that they can be released
4144 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4145 for (i
= 0; i
< mapnum
; i
++)
4146 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4148 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4149 &kinds
[i
], true, &refcount_set
,
4150 GOMP_MAP_VARS_ENTER_DATA
);
4153 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4155 for (j
= i
+ 1; j
< mapnum
; j
++)
4156 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4157 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4159 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4160 &kinds
[i
], true, &refcount_set
,
4161 GOMP_MAP_VARS_ENTER_DATA
);
4164 else if (i
+ 1 < mapnum
4165 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4166 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4167 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4169 /* An attach operation must be processed together with the mapped
4170 base-pointer list item. */
4171 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4172 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4176 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4177 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4179 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4180 htab_free (refcount_set
);
4184 gomp_target_task_fn (void *data
)
4186 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4187 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4189 if (ttask
->fn
!= NULL
)
4193 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4194 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4195 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4197 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4198 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4203 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4206 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4210 void *actual_arguments
;
4211 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4214 actual_arguments
= ttask
->hostaddrs
;
4218 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4219 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4220 NULL
, GOMP_MAP_VARS_TARGET
);
4221 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4223 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4225 assert (devicep
->async_run_func
);
4226 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4227 ttask
->args
, (void *) ttask
);
4230 else if (devicep
== NULL
4231 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4232 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4236 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4237 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4238 ttask
->kinds
, true);
4241 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4242 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4243 for (i
= 0; i
< ttask
->mapnum
; i
++)
4244 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4246 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4247 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4248 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4249 i
+= ttask
->sizes
[i
];
4252 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4253 &ttask
->kinds
[i
], true, &refcount_set
,
4254 GOMP_MAP_VARS_ENTER_DATA
);
4256 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4257 ttask
->kinds
, &refcount_set
);
4258 htab_free (refcount_set
);
4264 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4268 struct gomp_task_icv
*icv
= gomp_icv (true);
4269 icv
->thread_limit_var
4270 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4276 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4277 unsigned int thread_limit
, bool first
)
4279 struct gomp_thread
*thr
= gomp_thread ();
4284 struct gomp_task_icv
*icv
= gomp_icv (true);
4285 icv
->thread_limit_var
4286 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4288 (void) num_teams_high
;
4289 if (num_teams_low
== 0)
4291 thr
->num_teams
= num_teams_low
- 1;
4294 else if (thr
->team_num
== thr
->num_teams
)
4302 omp_target_alloc (size_t size
, int device_num
)
4304 if (device_num
== omp_initial_device
4305 || device_num
== gomp_get_num_devices ())
4306 return malloc (size
);
4308 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4309 if (devicep
== NULL
)
4312 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4313 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4314 return malloc (size
);
4316 gomp_mutex_lock (&devicep
->lock
);
4317 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4318 gomp_mutex_unlock (&devicep
->lock
);
4323 omp_target_free (void *device_ptr
, int device_num
)
4325 if (device_num
== omp_initial_device
4326 || device_num
== gomp_get_num_devices ())
4332 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4333 if (devicep
== NULL
|| device_ptr
== NULL
)
4336 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4337 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4343 gomp_mutex_lock (&devicep
->lock
);
4344 gomp_free_device_memory (devicep
, device_ptr
);
4345 gomp_mutex_unlock (&devicep
->lock
);
4349 omp_target_is_present (const void *ptr
, int device_num
)
4351 if (device_num
== omp_initial_device
4352 || device_num
== gomp_get_num_devices ())
4355 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4356 if (devicep
== NULL
)
4362 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4363 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4366 gomp_mutex_lock (&devicep
->lock
);
4367 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4368 struct splay_tree_key_s cur_node
;
4370 cur_node
.host_start
= (uintptr_t) ptr
;
4371 cur_node
.host_end
= cur_node
.host_start
;
4372 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4373 int ret
= n
!= NULL
;
4374 gomp_mutex_unlock (&devicep
->lock
);
4379 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4380 struct gomp_device_descr
**dst_devicep
,
4381 struct gomp_device_descr
**src_devicep
)
4383 if (dst_device_num
!= gomp_get_num_devices ()
4384 /* Above gomp_get_num_devices has to be called unconditionally. */
4385 && dst_device_num
!= omp_initial_device
)
4387 *dst_devicep
= resolve_device (dst_device_num
, false);
4388 if (*dst_devicep
== NULL
)
4391 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4392 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4393 *dst_devicep
= NULL
;
4396 if (src_device_num
!= num_devices_openmp
4397 && src_device_num
!= omp_initial_device
)
4399 *src_devicep
= resolve_device (src_device_num
, false);
4400 if (*src_devicep
== NULL
)
4403 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4404 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4405 *src_devicep
= NULL
;
4412 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4413 size_t dst_offset
, size_t src_offset
,
4414 struct gomp_device_descr
*dst_devicep
,
4415 struct gomp_device_descr
*src_devicep
)
4418 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4420 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4423 if (src_devicep
== NULL
)
4425 gomp_mutex_lock (&dst_devicep
->lock
);
4426 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4427 (char *) dst
+ dst_offset
,
4428 (char *) src
+ src_offset
, length
);
4429 gomp_mutex_unlock (&dst_devicep
->lock
);
4430 return (ret
? 0 : EINVAL
);
4432 if (dst_devicep
== NULL
)
4434 gomp_mutex_lock (&src_devicep
->lock
);
4435 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4436 (char *) dst
+ dst_offset
,
4437 (char *) src
+ src_offset
, length
);
4438 gomp_mutex_unlock (&src_devicep
->lock
);
4439 return (ret
? 0 : EINVAL
);
4441 if (src_devicep
== dst_devicep
)
4443 gomp_mutex_lock (&src_devicep
->lock
);
4444 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4445 (char *) dst
+ dst_offset
,
4446 (char *) src
+ src_offset
, length
);
4447 gomp_mutex_unlock (&src_devicep
->lock
);
4448 return (ret
? 0 : EINVAL
);
4454 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4455 size_t src_offset
, int dst_device_num
, int src_device_num
)
4457 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4458 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4459 &dst_devicep
, &src_devicep
);
4464 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4465 dst_devicep
, src_devicep
);
4477 struct gomp_device_descr
*dst_devicep
;
4478 struct gomp_device_descr
*src_devicep
;
4479 } omp_target_memcpy_data
;
4482 omp_target_memcpy_async_helper (void *args
)
4484 omp_target_memcpy_data
*a
= args
;
4485 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4486 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4487 gomp_fatal ("omp_target_memcpy failed");
4491 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4492 size_t dst_offset
, size_t src_offset
,
4493 int dst_device_num
, int src_device_num
,
4494 int depobj_count
, omp_depend_t
*depobj_list
)
4496 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4497 unsigned int flags
= 0;
4498 void *depend
[depobj_count
+ 5];
4500 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4501 &dst_devicep
, &src_devicep
);
4503 omp_target_memcpy_data s
= {
4507 .dst_offset
= dst_offset
,
4508 .src_offset
= src_offset
,
4509 .dst_devicep
= dst_devicep
,
4510 .src_devicep
= src_devicep
4516 if (depobj_count
> 0 && depobj_list
!= NULL
)
4518 flags
|= GOMP_TASK_FLAG_DEPEND
;
4520 depend
[1] = (void *) (uintptr_t) depobj_count
;
4521 depend
[2] = depend
[3] = depend
[4] = 0;
4522 for (i
= 0; i
< depobj_count
; ++i
)
4523 depend
[i
+ 5] = &depobj_list
[i
];
4526 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4527 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4533 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4534 int num_dims
, const size_t *volume
,
4535 const size_t *dst_offsets
,
4536 const size_t *src_offsets
,
4537 const size_t *dst_dimensions
,
4538 const size_t *src_dimensions
,
4539 struct gomp_device_descr
*dst_devicep
,
4540 struct gomp_device_descr
*src_devicep
,
4541 size_t *tmp_size
, void **tmp
)
4543 size_t dst_slice
= element_size
;
4544 size_t src_slice
= element_size
;
4545 size_t j
, dst_off
, src_off
, length
;
4550 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4551 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4552 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4554 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4556 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4560 else if (src_devicep
== NULL
)
4561 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4562 (char *) dst
+ dst_off
,
4563 (const char *) src
+ src_off
,
4565 else if (dst_devicep
== NULL
)
4566 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4567 (char *) dst
+ dst_off
,
4568 (const char *) src
+ src_off
,
4570 else if (src_devicep
== dst_devicep
)
4571 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4572 (char *) dst
+ dst_off
,
4573 (const char *) src
+ src_off
,
4580 *tmp
= malloc (length
);
4584 else if (*tmp_size
< length
)
4588 *tmp
= malloc (length
);
4592 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
, *tmp
,
4593 (const char *) src
+ src_off
,
4596 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4597 (char *) dst
+ dst_off
, *tmp
,
4600 return ret
? 0 : EINVAL
;
4603 /* host->device, device->host and intra device. */
4606 && src_devicep
== dst_devicep
4607 && src_devicep
->memcpy2d_func
)
4608 || (!src_devicep
!= !dst_devicep
4609 && ((src_devicep
&& src_devicep
->memcpy2d_func
)
4610 || (dst_devicep
&& dst_devicep
->memcpy2d_func
)))))
4612 size_t vol_sz1
, dst_sz1
, src_sz1
, dst_off_sz1
, src_off_sz1
;
4613 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4614 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4615 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4617 if (__builtin_mul_overflow (volume
[1], element_size
, &vol_sz1
)
4618 || __builtin_mul_overflow (dst_dimensions
[1], element_size
, &dst_sz1
)
4619 || __builtin_mul_overflow (src_dimensions
[1], element_size
, &src_sz1
)
4620 || __builtin_mul_overflow (dst_offsets
[1], element_size
, &dst_off_sz1
)
4621 || __builtin_mul_overflow (src_offsets
[1], element_size
,
4624 ret
= devp
->memcpy2d_func (dst_id
, src_id
, vol_sz1
, volume
[0],
4625 dst
, dst_off_sz1
, dst_offsets
[0], dst_sz1
,
4626 src
, src_off_sz1
, src_offsets
[0], src_sz1
);
4628 return ret
? 0 : EINVAL
;
4630 else if (num_dims
== 3
4632 && src_devicep
== dst_devicep
4633 && src_devicep
->memcpy3d_func
)
4634 || (!src_devicep
!= !dst_devicep
4635 && ((src_devicep
&& src_devicep
->memcpy3d_func
)
4636 || (dst_devicep
&& dst_devicep
->memcpy3d_func
)))))
4638 size_t vol_sz2
, dst_sz2
, src_sz2
, dst_off_sz2
, src_off_sz2
;
4639 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4640 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4641 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4643 if (__builtin_mul_overflow (volume
[2], element_size
, &vol_sz2
)
4644 || __builtin_mul_overflow (dst_dimensions
[2], element_size
, &dst_sz2
)
4645 || __builtin_mul_overflow (src_dimensions
[2], element_size
, &src_sz2
)
4646 || __builtin_mul_overflow (dst_offsets
[2], element_size
, &dst_off_sz2
)
4647 || __builtin_mul_overflow (src_offsets
[2], element_size
,
4650 ret
= devp
->memcpy3d_func (dst_id
, src_id
, vol_sz2
, volume
[1], volume
[0],
4651 dst
, dst_off_sz2
, dst_offsets
[1],
4652 dst_offsets
[0], dst_sz2
, dst_dimensions
[1],
4653 src
, src_off_sz2
, src_offsets
[1],
4654 src_offsets
[0], src_sz2
, src_dimensions
[1]);
4656 return ret
? 0 : EINVAL
;
4659 for (i
= 1; i
< num_dims
; i
++)
4660 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4661 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4663 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4664 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4666 for (j
= 0; j
< volume
[0]; j
++)
4668 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4669 (const char *) src
+ src_off
,
4670 element_size
, num_dims
- 1,
4671 volume
+ 1, dst_offsets
+ 1,
4672 src_offsets
+ 1, dst_dimensions
+ 1,
4673 src_dimensions
+ 1, dst_devicep
,
4674 src_devicep
, tmp_size
, tmp
);
4677 dst_off
+= dst_slice
;
4678 src_off
+= src_slice
;
4684 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4686 struct gomp_device_descr
**dst_devicep
,
4687 struct gomp_device_descr
**src_devicep
)
4692 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4693 dst_devicep
, src_devicep
);
4701 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4702 size_t element_size
, int num_dims
,
4703 const size_t *volume
, const size_t *dst_offsets
,
4704 const size_t *src_offsets
,
4705 const size_t *dst_dimensions
,
4706 const size_t *src_dimensions
,
4707 struct gomp_device_descr
*dst_devicep
,
4708 struct gomp_device_descr
*src_devicep
)
4710 size_t tmp_size
= 0;
4715 lock_src
= src_devicep
!= NULL
;
4716 lock_dst
= dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
;
4718 gomp_mutex_lock (&src_devicep
->lock
);
4720 gomp_mutex_lock (&dst_devicep
->lock
);
4721 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4722 volume
, dst_offsets
, src_offsets
,
4723 dst_dimensions
, src_dimensions
,
4724 dst_devicep
, src_devicep
,
4727 gomp_mutex_unlock (&src_devicep
->lock
);
4729 gomp_mutex_unlock (&dst_devicep
->lock
);
4737 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4738 int num_dims
, const size_t *volume
,
4739 const size_t *dst_offsets
,
4740 const size_t *src_offsets
,
4741 const size_t *dst_dimensions
,
4742 const size_t *src_dimensions
,
4743 int dst_device_num
, int src_device_num
)
4745 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4747 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4748 src_device_num
, &dst_devicep
,
4754 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4755 volume
, dst_offsets
, src_offsets
,
4756 dst_dimensions
, src_dimensions
,
4757 dst_devicep
, src_devicep
);
4766 size_t element_size
;
4767 const size_t *volume
;
4768 const size_t *dst_offsets
;
4769 const size_t *src_offsets
;
4770 const size_t *dst_dimensions
;
4771 const size_t *src_dimensions
;
4772 struct gomp_device_descr
*dst_devicep
;
4773 struct gomp_device_descr
*src_devicep
;
4775 } omp_target_memcpy_rect_data
;
4778 omp_target_memcpy_rect_async_helper (void *args
)
4780 omp_target_memcpy_rect_data
*a
= args
;
4781 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4782 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4783 a
->src_offsets
, a
->dst_dimensions
,
4784 a
->src_dimensions
, a
->dst_devicep
,
4787 gomp_fatal ("omp_target_memcpy_rect failed");
4791 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4792 int num_dims
, const size_t *volume
,
4793 const size_t *dst_offsets
,
4794 const size_t *src_offsets
,
4795 const size_t *dst_dimensions
,
4796 const size_t *src_dimensions
,
4797 int dst_device_num
, int src_device_num
,
4798 int depobj_count
, omp_depend_t
*depobj_list
)
4800 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4802 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4803 src_device_num
, &dst_devicep
,
4805 void *depend
[depobj_count
+ 5];
4808 omp_target_memcpy_rect_data s
= {
4811 .element_size
= element_size
,
4812 .num_dims
= num_dims
,
4814 .dst_offsets
= dst_offsets
,
4815 .src_offsets
= src_offsets
,
4816 .dst_dimensions
= dst_dimensions
,
4817 .src_dimensions
= src_dimensions
,
4818 .dst_devicep
= dst_devicep
,
4819 .src_devicep
= src_devicep
4825 if (depobj_count
> 0 && depobj_list
!= NULL
)
4827 flags
|= GOMP_TASK_FLAG_DEPEND
;
4829 depend
[1] = (void *) (uintptr_t) depobj_count
;
4830 depend
[2] = depend
[3] = depend
[4] = 0;
4831 for (i
= 0; i
< depobj_count
; ++i
)
4832 depend
[i
+ 5] = &depobj_list
[i
];
4835 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4836 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4842 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4843 size_t size
, size_t device_offset
, int device_num
)
4845 if (device_num
== omp_initial_device
4846 || device_num
== gomp_get_num_devices ())
4849 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4850 if (devicep
== NULL
)
4853 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4854 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4857 gomp_mutex_lock (&devicep
->lock
);
4859 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4860 struct splay_tree_key_s cur_node
;
4863 cur_node
.host_start
= (uintptr_t) host_ptr
;
4864 cur_node
.host_end
= cur_node
.host_start
+ size
;
4865 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4868 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4869 == (uintptr_t) device_ptr
+ device_offset
4870 && n
->host_start
<= cur_node
.host_start
4871 && n
->host_end
>= cur_node
.host_end
)
4876 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4877 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4881 tgt
->to_free
= NULL
;
4883 tgt
->list_count
= 0;
4884 tgt
->device_descr
= devicep
;
4885 splay_tree_node array
= tgt
->array
;
4886 splay_tree_key k
= &array
->key
;
4887 k
->host_start
= cur_node
.host_start
;
4888 k
->host_end
= cur_node
.host_end
;
4890 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4891 k
->refcount
= REFCOUNT_INFINITY
;
4892 k
->dynamic_refcount
= 0;
4895 array
->right
= NULL
;
4896 splay_tree_insert (&devicep
->mem_map
, array
);
4899 gomp_mutex_unlock (&devicep
->lock
);
4904 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4906 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4907 if (devicep
== NULL
)
4910 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4913 gomp_mutex_lock (&devicep
->lock
);
4915 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4916 struct splay_tree_key_s cur_node
;
4919 cur_node
.host_start
= (uintptr_t) ptr
;
4920 cur_node
.host_end
= cur_node
.host_start
;
4921 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4923 && n
->host_start
== cur_node
.host_start
4924 && n
->refcount
== REFCOUNT_INFINITY
4925 && n
->tgt
->tgt_start
== 0
4926 && n
->tgt
->to_free
== NULL
4927 && n
->tgt
->refcount
== 1
4928 && n
->tgt
->list_count
== 0)
4930 splay_tree_remove (&devicep
->mem_map
, n
);
4931 gomp_unmap_tgt (n
->tgt
);
4935 gomp_mutex_unlock (&devicep
->lock
);
4940 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4942 if (device_num
== omp_initial_device
4943 || device_num
== omp_get_initial_device ())
4944 return (void *) ptr
;
4946 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4947 if (devicep
== NULL
)
4950 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4951 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4952 return (void *) ptr
;
4954 gomp_mutex_lock (&devicep
->lock
);
4956 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4957 struct splay_tree_key_s cur_node
;
4960 cur_node
.host_start
= (uintptr_t) ptr
;
4961 cur_node
.host_end
= cur_node
.host_start
;
4962 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4966 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4967 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4970 gomp_mutex_unlock (&devicep
->lock
);
4976 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4978 if (device_num
== omp_initial_device
4979 || device_num
== gomp_get_num_devices ())
4982 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4983 if (devicep
== NULL
)
4986 /* TODO: Unified shared memory must be handled when available. */
4988 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4992 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4995 if (device_num
== omp_initial_device
4996 || device_num
== gomp_get_num_devices ())
4997 return gomp_pause_host ();
4999 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5000 if (devicep
== NULL
)
5003 /* Do nothing for target devices for now. */
5008 omp_pause_resource_all (omp_pause_resource_t kind
)
5011 if (gomp_pause_host ())
5013 /* Do nothing for target devices for now. */
5017 ialias (omp_pause_resource
)
5018 ialias (omp_pause_resource_all
)
5020 #ifdef PLUGIN_SUPPORT
5022 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5024 The handles of the found functions are stored in the corresponding fields
5025 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5028 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
5029 const char *plugin_name
)
5031 const char *err
= NULL
, *last_missing
= NULL
;
5033 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
5035 #if OFFLOAD_DEFAULTED
5041 /* Check if all required functions are available in the plugin and store
5042 their handlers. None of the symbols can legitimately be NULL,
5043 so we don't need to check dlerror all the time. */
5045 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5047 /* Similar, but missing functions are not an error. Return false if
5048 failed, true otherwise. */
5049 #define DLSYM_OPT(f, n) \
5050 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5051 || (last_missing = #n, 0))
5054 if (device
->version_func () != GOMP_VERSION
)
5056 err
= "plugin version mismatch";
5063 DLSYM (get_num_devices
);
5064 DLSYM (init_device
);
5065 DLSYM (fini_device
);
5067 DLSYM (unload_image
);
5072 DLSYM_OPT (memcpy2d
, memcpy2d
);
5073 DLSYM_OPT (memcpy3d
, memcpy3d
);
5074 device
->capabilities
= device
->get_caps_func ();
5075 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5078 DLSYM_OPT (async_run
, async_run
);
5079 DLSYM_OPT (can_run
, can_run
);
5082 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5084 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
5085 || !DLSYM_OPT (openacc
.create_thread_data
,
5086 openacc_create_thread_data
)
5087 || !DLSYM_OPT (openacc
.destroy_thread_data
,
5088 openacc_destroy_thread_data
)
5089 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
5090 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
5091 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
5092 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
5093 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
5094 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5095 openacc_async_queue_callback
)
5096 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5097 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5098 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5099 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5101 /* Require all the OpenACC handlers if we have
5102 GOMP_OFFLOAD_CAP_OPENACC_200. */
5103 err
= "plugin missing OpenACC handler function";
5108 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5109 openacc_cuda_get_current_device
);
5110 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5111 openacc_cuda_get_current_context
);
5112 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5113 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5114 if (cuda
&& cuda
!= 4)
5116 /* Make sure all the CUDA functions are there if any of them are. */
5117 err
= "plugin missing OpenACC CUDA handler function";
5129 gomp_error ("while loading %s: %s", plugin_name
, err
);
5131 gomp_error ("missing function was %s", last_missing
);
5133 dlclose (plugin_handle
);
5138 /* This function finalizes all initialized devices. */
5141 gomp_target_fini (void)
5144 for (i
= 0; i
< num_devices
; i
++)
5147 struct gomp_device_descr
*devicep
= &devices
[i
];
5148 gomp_mutex_lock (&devicep
->lock
);
5149 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5150 ret
= gomp_fini_device (devicep
);
5151 gomp_mutex_unlock (&devicep
->lock
);
5153 gomp_fatal ("device finalization failed");
5157 /* This function initializes the runtime for offloading.
5158 It parses the list of offload plugins, and tries to load these.
5159 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5160 will be set, and the array DEVICES initialized, containing descriptors for
5161 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5165 gomp_target_init (void)
5167 const char *prefix
="libgomp-plugin-";
5168 const char *suffix
= SONAME_SUFFIX (1);
5169 const char *cur
, *next
;
5171 int i
, new_num_devs
;
5172 int num_devs
= 0, num_devs_openmp
;
5173 struct gomp_device_descr
*devs
= NULL
;
5175 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5178 cur
= OFFLOAD_PLUGINS
;
5182 struct gomp_device_descr current_device
;
5183 size_t prefix_len
, suffix_len
, cur_len
;
5185 next
= strchr (cur
, ',');
5187 prefix_len
= strlen (prefix
);
5188 cur_len
= next
? next
- cur
: strlen (cur
);
5189 suffix_len
= strlen (suffix
);
5191 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5198 memcpy (plugin_name
, prefix
, prefix_len
);
5199 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5200 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5202 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5204 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5205 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5206 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5209 int type
= current_device
.get_type_func ();
5210 for (int img
= 0; img
< num_offload_images
; img
++)
5211 if (type
== offload_images
[img
].type
)
5215 char buf
[sizeof ("unified_address, unified_shared_memory, "
5216 "reverse_offload")];
5217 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5218 char *name
= (char *) malloc (cur_len
+ 1);
5219 memcpy (name
, cur
, cur_len
);
5220 name
[cur_len
] = '\0';
5222 "%s devices present but 'omp requires %s' "
5223 "cannot be fulfilled\n", name
, buf
);
5227 else if (new_num_devs
>= 1)
5229 /* Augment DEVICES and NUM_DEVICES. */
5231 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5232 * sizeof (struct gomp_device_descr
));
5240 current_device
.name
= current_device
.get_name_func ();
5241 /* current_device.capabilities has already been set. */
5242 current_device
.type
= current_device
.get_type_func ();
5243 current_device
.mem_map
.root
= NULL
;
5244 current_device
.mem_map_rev
.root
= NULL
;
5245 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5246 for (i
= 0; i
< new_num_devs
; i
++)
5248 current_device
.target_id
= i
;
5249 devs
[num_devs
] = current_device
;
5250 gomp_mutex_init (&devs
[num_devs
].lock
);
5261 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5262 NUM_DEVICES_OPENMP. */
5263 struct gomp_device_descr
*devs_s
5264 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5271 num_devs_openmp
= 0;
5272 for (i
= 0; i
< num_devs
; i
++)
5273 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5274 devs_s
[num_devs_openmp
++] = devs
[i
];
5275 int num_devs_after_openmp
= num_devs_openmp
;
5276 for (i
= 0; i
< num_devs
; i
++)
5277 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5278 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5282 for (i
= 0; i
< num_devs
; i
++)
5284 /* The 'devices' array can be moved (by the realloc call) until we have
5285 found all the plugins, so registering with the OpenACC runtime (which
5286 takes a copy of the pointer argument) must be delayed until now. */
5287 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5288 goacc_register (&devs
[i
]);
5290 if (gomp_global_icv
.default_device_var
== INT_MIN
)
5292 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5293 struct gomp_icv_list
*none
;
5294 none
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX
);
5295 gomp_global_icv
.default_device_var
= (num_devs_openmp
5296 ? 0 : omp_invalid_device
);
5297 none
->icvs
.default_device_var
= gomp_global_icv
.default_device_var
;
5300 num_devices
= num_devs
;
5301 num_devices_openmp
= num_devs_openmp
;
5303 if (atexit (gomp_target_fini
) != 0)
5304 gomp_fatal ("atexit failed");
5307 #else /* PLUGIN_SUPPORT */
5308 /* If dlfcn.h is unavailable we always fallback to host execution.
5309 GOMP_target* routines are just stubs for this case. */
5311 gomp_target_init (void)
5314 #endif /* PLUGIN_SUPPORT */