1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
142 called, which must be done before using default_device_var. */
143 int num_devices
= gomp_get_num_devices ();
145 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
147 struct gomp_task_icv
*icv
= gomp_icv (false);
148 device_id
= icv
->default_device_var
;
154 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
155 : omp_initial_device
))
157 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but only the host device is available");
161 else if (device_id
== omp_invalid_device
)
162 gomp_fatal ("omp_invalid_device encountered");
163 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
164 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
165 "but device not found");
169 else if (device_id
>= num_devices
)
171 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
172 && device_id
!= num_devices
)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device not found");
179 gomp_mutex_lock (&devices
[device_id
].lock
);
180 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
181 gomp_init_device (&devices
[device_id
]);
182 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
187 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
188 "but device is finalized");
192 gomp_mutex_unlock (&devices
[device_id
].lock
);
194 return &devices
[device_id
];
198 static inline splay_tree_key
199 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
201 if (key
->host_start
!= key
->host_end
)
202 return splay_tree_lookup (mem_map
, key
);
205 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
210 n
= splay_tree_lookup (mem_map
, key
);
214 return splay_tree_lookup (mem_map
, key
);
217 static inline reverse_splay_tree_key
218 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
220 return reverse_splay_tree_lookup (mem_map_rev
, key
);
223 static inline splay_tree_key
224 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
226 if (key
->host_start
!= key
->host_end
)
227 return splay_tree_lookup (mem_map
, key
);
230 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
236 gomp_device_copy (struct gomp_device_descr
*devicep
,
237 bool (*copy_func
) (int, void *, const void *, size_t),
238 const char *dst
, void *dstaddr
,
239 const char *src
, const void *srcaddr
,
242 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
244 gomp_mutex_unlock (&devicep
->lock
);
245 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
246 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
251 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
252 bool (*copy_func
) (int, void *, const void *, size_t,
253 struct goacc_asyncqueue
*),
254 const char *dst
, void *dstaddr
,
255 const char *src
, const void *srcaddr
,
256 const void *srcaddr_orig
,
257 size_t size
, struct goacc_asyncqueue
*aq
)
259 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
261 gomp_mutex_unlock (&devicep
->lock
);
262 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
263 gomp_fatal ("Copying of %s object [%p..%p)"
264 " via buffer %s object [%p..%p)"
265 " to %s object [%p..%p) failed",
266 src
, srcaddr_orig
, srcaddr_orig
+ size
,
267 src
, srcaddr
, srcaddr
+ size
,
268 dst
, dstaddr
, dstaddr
+ size
);
270 gomp_fatal ("Copying of %s object [%p..%p)"
271 " to %s object [%p..%p) failed",
272 src
, srcaddr
, srcaddr
+ size
,
273 dst
, dstaddr
, dstaddr
+ size
);
277 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
278 host to device memory transfers. */
280 struct gomp_coalesce_chunk
282 /* The starting and ending point of a coalesced chunk of memory. */
286 struct gomp_coalesce_buf
288 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
289 it will be copied to the device. */
291 struct target_mem_desc
*tgt
;
292 /* Array with offsets, chunks[i].start is the starting offset and
293 chunks[i].end ending offset relative to tgt->tgt_start device address
294 of chunks which are to be copied to buf and later copied to device. */
295 struct gomp_coalesce_chunk
*chunks
;
296 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
299 /* During construction of chunks array, how many memory regions are within
300 the last chunk. If there is just one memory region for a chunk, we copy
301 it directly to device rather than going through buf. */
305 /* Maximum size of memory region considered for coalescing. Larger copies
306 are performed directly. */
307 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
309 /* Maximum size of a gap in between regions to consider them being copied
310 within the same chunk. All the device offsets considered are within
311 newly allocated device memory, so it isn't fatal if we copy some padding
312 in between from host to device. The gaps come either from alignment
313 padding or from memory regions which are not supposed to be copied from
314 host to device (e.g. map(alloc:), map(from:) etc.). */
315 #define MAX_COALESCE_BUF_GAP (4 * 1024)
317 /* Add region with device tgt_start relative offset and length to CBUF.
319 This must not be used for asynchronous copies, because the host data might
320 not be computed yet (by an earlier asynchronous compute region, for
321 example). The exception is for EPHEMERAL data, that we know is available
322 already "by construction". */
325 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
327 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
331 if (cbuf
->chunk_cnt
< 0)
333 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
335 cbuf
->chunk_cnt
= -1;
338 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
340 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
344 /* If the last chunk is only used by one mapping, discard it,
345 as it will be one host to device copy anyway and
346 memcpying it around will only waste cycles. */
347 if (cbuf
->use_cnt
== 1)
350 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
351 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
356 /* Return true for mapping kinds which need to copy data from the
357 host to device for regions that weren't previously mapped. */
360 gomp_to_device_kind_p (int kind
)
366 case GOMP_MAP_FORCE_ALLOC
:
367 case GOMP_MAP_FORCE_FROM
:
368 case GOMP_MAP_ALWAYS_FROM
:
369 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
370 case GOMP_MAP_FORCE_PRESENT
:
377 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
378 non-NULL), when the source data is stack or may otherwise be deallocated
379 before the asynchronous copy takes place, EPHEMERAL must be passed as
382 attribute_hidden
void
383 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
384 struct goacc_asyncqueue
*aq
,
385 void *d
, const void *h
, size_t sz
,
386 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
390 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
391 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
394 long last
= cbuf
->chunk_cnt
- 1;
395 while (first
<= last
)
397 long middle
= (first
+ last
) >> 1;
398 if (cbuf
->chunks
[middle
].end
<= doff
)
400 else if (cbuf
->chunks
[middle
].start
<= doff
)
402 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
404 gomp_mutex_unlock (&devicep
->lock
);
405 gomp_fatal ("internal libgomp cbuf error");
408 /* In an asynchronous context, verify that CBUF isn't used
409 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
410 if (__builtin_expect (aq
!= NULL
, 0))
413 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
423 if (__builtin_expect (aq
!= NULL
, 0))
425 void *h_buf
= (void *) h
;
428 /* We're queueing up an asynchronous copy from data that may
429 disappear before the transfer takes place (i.e. because it is a
430 stack local in a function that is no longer executing). As we've
431 not been able to use CBUF, make a copy of the data into a
433 h_buf
= gomp_malloc (sz
);
434 memcpy (h_buf
, h
, sz
);
436 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
437 "dev", d
, "host", h_buf
, h
, sz
, aq
);
439 /* Free once the transfer has completed. */
440 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
443 gomp_device_copy (devicep
, devicep
->host2dev_func
,
444 "dev", d
, "host", h
, sz
);
447 attribute_hidden
void
448 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
449 struct goacc_asyncqueue
*aq
,
450 void *h
, const void *d
, size_t sz
)
452 if (__builtin_expect (aq
!= NULL
, 0))
453 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
454 "host", h
, "dev", d
, NULL
, sz
, aq
);
456 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
460 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
462 if (!devicep
->free_func (devicep
->target_id
, devptr
))
464 gomp_mutex_unlock (&devicep
->lock
);
465 gomp_fatal ("error in freeing device memory block at %p", devptr
);
469 /* Increment reference count of a splay_tree_key region K by 1.
470 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
471 increment the value if refcount is not yet contained in the set (used for
472 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
473 once for each construct). */
476 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
478 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
481 uintptr_t *refcount_ptr
= &k
->refcount
;
483 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
484 refcount_ptr
= &k
->structelem_refcount
;
485 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
486 refcount_ptr
= k
->structelem_refcount_ptr
;
490 if (htab_find (*refcount_set
, refcount_ptr
))
492 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
493 *slot
= refcount_ptr
;
500 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
501 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
502 track already seen refcounts, and only adjust the value if refcount is not
503 yet contained in the set (like gomp_increment_refcount).
505 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
506 it is already zero and we know we decremented it earlier. This signals that
507 associated maps should be copied back to host.
509 *DO_REMOVE is set to true when we this is the first handling of this refcount
510 and we are setting it to zero. This signals a removal of this key from the
513 Copy and removal are separated due to cases like handling of structure
514 elements, e.g. each map of a structure element representing a possible copy
515 out of a structure field has to be handled individually, but we only signal
516 removal for one (the first encountered) sibing map. */
519 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
520 bool *do_copy
, bool *do_remove
)
522 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
524 *do_copy
= *do_remove
= false;
528 uintptr_t *refcount_ptr
= &k
->refcount
;
530 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
531 refcount_ptr
= &k
->structelem_refcount
;
532 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
533 refcount_ptr
= k
->structelem_refcount_ptr
;
535 bool new_encountered_refcount
;
536 bool set_to_zero
= false;
537 bool is_zero
= false;
539 uintptr_t orig_refcount
= *refcount_ptr
;
543 if (htab_find (*refcount_set
, refcount_ptr
))
545 new_encountered_refcount
= false;
549 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
550 *slot
= refcount_ptr
;
551 new_encountered_refcount
= true;
554 /* If no refcount_set being used, assume all keys are being decremented
555 for the first time. */
556 new_encountered_refcount
= true;
560 else if (*refcount_ptr
> 0)
564 if (*refcount_ptr
== 0)
566 if (orig_refcount
> 0)
572 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
573 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
576 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
577 gomp_map_0len_lookup found oldn for newn.
578 Helper function of gomp_map_vars. */
581 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
582 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
583 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
584 unsigned char kind
, bool always_to_flag
, bool implicit
,
585 struct gomp_coalesce_buf
*cbuf
,
586 htab_t
*refcount_set
)
588 assert (kind
!= GOMP_MAP_ATTACH
589 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
592 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
593 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
594 tgt_var
->is_attach
= false;
595 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
597 /* For implicit maps, old contained in new is valid. */
598 bool implicit_subset
= (implicit
599 && newn
->host_start
<= oldn
->host_start
600 && oldn
->host_end
<= newn
->host_end
);
602 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
604 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
606 if (GOMP_MAP_FORCE_P (kind
)
607 /* For implicit maps, old contained in new is valid. */
609 /* Otherwise, new contained inside old is considered valid. */
610 || (oldn
->host_start
<= newn
->host_start
611 && newn
->host_end
<= oldn
->host_end
)))
613 gomp_mutex_unlock (&devicep
->lock
);
614 gomp_fatal ("Trying to map into device [%p..%p) object when "
615 "[%p..%p) is already mapped",
616 (void *) newn
->host_start
, (void *) newn
->host_end
,
617 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
620 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
622 /* Implicit + always should not happen. If this does occur, below
623 address/length adjustment is a TODO. */
624 assert (!implicit_subset
);
626 if (oldn
->aux
&& oldn
->aux
->attach_count
)
628 /* We have to be careful not to overwrite still attached pointers
629 during the copyback to host. */
630 uintptr_t addr
= newn
->host_start
;
631 while (addr
< newn
->host_end
)
633 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
634 if (oldn
->aux
->attach_count
[i
] == 0)
635 gomp_copy_host2dev (devicep
, aq
,
636 (void *) (oldn
->tgt
->tgt_start
638 + addr
- oldn
->host_start
),
640 sizeof (void *), false, cbuf
);
641 addr
+= sizeof (void *);
645 gomp_copy_host2dev (devicep
, aq
,
646 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
647 + newn
->host_start
- oldn
->host_start
),
648 (void *) newn
->host_start
,
649 newn
->host_end
- newn
->host_start
, false, cbuf
);
652 gomp_increment_refcount (oldn
, refcount_set
);
656 get_kind (bool short_mapkind
, void *kinds
, int idx
)
659 return ((unsigned char *) kinds
)[idx
];
661 int val
= ((unsigned short *) kinds
)[idx
];
662 if (GOMP_MAP_IMPLICIT_P (val
))
663 val
&= ~GOMP_MAP_IMPLICIT
;
669 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
674 int val
= ((unsigned short *) kinds
)[idx
];
675 return GOMP_MAP_IMPLICIT_P (val
);
679 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
680 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
681 struct gomp_coalesce_buf
*cbuf
,
682 bool allow_zero_length_array_sections
)
684 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
685 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
686 struct splay_tree_key_s cur_node
;
688 cur_node
.host_start
= host_ptr
;
689 if (cur_node
.host_start
== (uintptr_t) NULL
)
691 cur_node
.tgt_offset
= (uintptr_t) NULL
;
692 gomp_copy_host2dev (devicep
, aq
,
693 (void *) (tgt
->tgt_start
+ target_offset
),
694 (void *) &cur_node
.tgt_offset
, sizeof (void *),
698 /* Add bias to the pointer value. */
699 cur_node
.host_start
+= bias
;
700 cur_node
.host_end
= cur_node
.host_start
;
701 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
704 if (allow_zero_length_array_sections
)
705 cur_node
.tgt_offset
= 0;
708 gomp_mutex_unlock (&devicep
->lock
);
709 gomp_fatal ("Pointer target of array section wasn't mapped");
714 cur_node
.host_start
-= n
->host_start
;
716 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
717 /* At this point tgt_offset is target address of the
718 array section. Now subtract bias to get what we want
719 to initialize the pointer with. */
720 cur_node
.tgt_offset
-= bias
;
722 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
723 (void *) &cur_node
.tgt_offset
, sizeof (void *),
728 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
729 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
730 size_t first
, size_t i
, void **hostaddrs
,
731 size_t *sizes
, void *kinds
,
732 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
734 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
735 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
736 struct splay_tree_key_s cur_node
;
739 const bool short_mapkind
= true;
740 const int typemask
= short_mapkind
? 0xff : 0x7;
742 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
743 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
744 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
745 kind
= get_kind (short_mapkind
, kinds
, i
);
746 implicit
= get_implicit (short_mapkind
, kinds
, i
);
749 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
751 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
752 kind
& typemask
, false, implicit
, cbuf
,
758 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
760 cur_node
.host_start
--;
761 n2
= splay_tree_lookup (mem_map
, &cur_node
);
762 cur_node
.host_start
++;
765 && n2
->host_start
- n
->host_start
766 == n2
->tgt_offset
- n
->tgt_offset
)
768 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
769 kind
& typemask
, false, implicit
, cbuf
,
775 n2
= splay_tree_lookup (mem_map
, &cur_node
);
779 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
781 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
782 kind
& typemask
, false, implicit
, cbuf
,
787 gomp_mutex_unlock (&devicep
->lock
);
788 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
789 "other mapped elements from the same structure weren't mapped "
790 "together with it", (void *) cur_node
.host_start
,
791 (void *) cur_node
.host_end
);
794 attribute_hidden
void
795 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
796 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
797 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
798 struct gomp_coalesce_buf
*cbufp
,
799 bool allow_zero_length_array_sections
)
801 struct splay_tree_key_s s
;
806 gomp_mutex_unlock (&devicep
->lock
);
807 gomp_fatal ("enclosing struct not mapped for attach");
810 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
811 /* We might have a pointer in a packed struct: however we cannot have more
812 than one such pointer in each pointer-sized portion of the struct, so
814 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
817 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
819 if (!n
->aux
->attach_count
)
821 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
823 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
824 n
->aux
->attach_count
[idx
]++;
827 gomp_mutex_unlock (&devicep
->lock
);
828 gomp_fatal ("attach count overflow");
831 if (n
->aux
->attach_count
[idx
] == 1)
833 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
835 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
839 if ((void *) target
== NULL
)
841 gomp_mutex_unlock (&devicep
->lock
);
842 gomp_fatal ("attempt to attach null pointer");
845 s
.host_start
= target
+ bias
;
846 s
.host_end
= s
.host_start
+ 1;
847 tn
= splay_tree_lookup (mem_map
, &s
);
851 if (allow_zero_length_array_sections
)
852 /* When allowing attachment to zero-length array sections, we
853 allow attaching to NULL pointers when the target region is not
858 gomp_mutex_unlock (&devicep
->lock
);
859 gomp_fatal ("pointer target not mapped for attach");
863 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
866 "%s: attaching host %p, target %p (struct base %p) to %p\n",
867 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
868 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
870 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
871 sizeof (void *), true, cbufp
);
874 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
875 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
878 attribute_hidden
void
879 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
880 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
881 uintptr_t detach_from
, bool finalize
,
882 struct gomp_coalesce_buf
*cbufp
)
888 gomp_mutex_unlock (&devicep
->lock
);
889 gomp_fatal ("enclosing struct not mapped for detach");
892 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
894 if (!n
->aux
|| !n
->aux
->attach_count
)
896 gomp_mutex_unlock (&devicep
->lock
);
897 gomp_fatal ("no attachment counters for struct");
901 n
->aux
->attach_count
[idx
] = 1;
903 if (n
->aux
->attach_count
[idx
] == 0)
905 gomp_mutex_unlock (&devicep
->lock
);
906 gomp_fatal ("attach count underflow");
909 n
->aux
->attach_count
[idx
]--;
911 if (n
->aux
->attach_count
[idx
] == 0)
913 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
915 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
918 "%s: detaching host %p, target %p (struct base %p) to %p\n",
919 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
920 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
923 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
924 sizeof (void *), true, cbufp
);
927 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
928 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
931 attribute_hidden
uintptr_t
932 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
934 if (tgt
->list
[i
].key
!= NULL
)
935 return tgt
->list
[i
].key
->tgt
->tgt_start
936 + tgt
->list
[i
].key
->tgt_offset
937 + tgt
->list
[i
].offset
;
939 switch (tgt
->list
[i
].offset
)
942 return (uintptr_t) hostaddrs
[i
];
948 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
949 + tgt
->list
[i
+ 1].key
->tgt_offset
950 + tgt
->list
[i
+ 1].offset
951 + (uintptr_t) hostaddrs
[i
]
952 - (uintptr_t) hostaddrs
[i
+ 1];
955 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
959 static inline __attribute__((always_inline
)) struct target_mem_desc
*
960 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
961 struct goacc_asyncqueue
*aq
, size_t mapnum
,
962 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
963 void *kinds
, bool short_mapkind
,
964 htab_t
*refcount_set
,
965 enum gomp_map_vars_kind pragma_kind
)
967 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
968 bool has_firstprivate
= false;
969 bool has_always_ptrset
= false;
970 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
971 const int rshift
= short_mapkind
? 8 : 3;
972 const int typemask
= short_mapkind
? 0xff : 0x7;
973 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
974 struct splay_tree_key_s cur_node
;
975 struct target_mem_desc
*tgt
976 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
977 tgt
->list_count
= mapnum
;
978 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
979 tgt
->device_descr
= devicep
;
981 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
990 tgt_align
= sizeof (void *);
996 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
998 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
999 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1002 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1004 size_t align
= 4 * sizeof (void *);
1006 tgt_size
= mapnum
* sizeof (void *);
1008 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1009 cbuf
.chunks
[0].start
= 0;
1010 cbuf
.chunks
[0].end
= tgt_size
;
1013 gomp_mutex_lock (&devicep
->lock
);
1014 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1016 gomp_mutex_unlock (&devicep
->lock
);
1021 for (i
= 0; i
< mapnum
; i
++)
1023 int kind
= get_kind (short_mapkind
, kinds
, i
);
1024 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1025 if (hostaddrs
[i
] == NULL
1026 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1028 tgt
->list
[i
].key
= NULL
;
1029 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1032 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1033 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1035 tgt
->list
[i
].key
= NULL
;
1038 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1039 on a separate construct prior to using use_device_{addr,ptr}.
1040 In OpenMP 5.0, map directives need to be ordered by the
1041 middle-end before the use_device_* clauses. If
1042 !not_found_cnt, all mappings requested (if any) are already
1043 mapped, so use_device_{addr,ptr} can be resolved right away.
1044 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1045 now but would succeed after performing the mappings in the
1046 following loop. We can't defer this always to the second
1047 loop, because it is not even invoked when !not_found_cnt
1048 after the first loop. */
1049 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1050 cur_node
.host_end
= cur_node
.host_start
;
1051 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1054 cur_node
.host_start
-= n
->host_start
;
1056 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1057 + cur_node
.host_start
);
1059 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1061 gomp_mutex_unlock (&devicep
->lock
);
1062 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1064 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1065 /* If not present, continue using the host address. */
1068 __builtin_unreachable ();
1069 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1072 tgt
->list
[i
].offset
= 0;
1075 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1077 size_t first
= i
+ 1;
1078 size_t last
= i
+ sizes
[i
];
1079 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1080 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1082 tgt
->list
[i
].key
= NULL
;
1083 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1084 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1087 size_t align
= (size_t) 1 << (kind
>> rshift
);
1088 if (tgt_align
< align
)
1090 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1091 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1092 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1093 not_found_cnt
+= last
- i
;
1094 for (i
= first
; i
<= last
; i
++)
1096 tgt
->list
[i
].key
= NULL
;
1098 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1100 gomp_coalesce_buf_add (&cbuf
,
1101 tgt_size
- cur_node
.host_end
1102 + (uintptr_t) hostaddrs
[i
],
1108 for (i
= first
; i
<= last
; i
++)
1109 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1110 sizes
, kinds
, NULL
, refcount_set
);
1114 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1116 tgt
->list
[i
].key
= NULL
;
1117 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1118 has_firstprivate
= true;
1121 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1122 || ((kind
& typemask
)
1123 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1125 tgt
->list
[i
].key
= NULL
;
1126 has_firstprivate
= true;
1129 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1130 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1131 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1133 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1134 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1136 tgt
->list
[i
].key
= NULL
;
1138 size_t align
= (size_t) 1 << (kind
>> rshift
);
1139 if (tgt_align
< align
)
1141 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1143 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1144 cur_node
.host_end
- cur_node
.host_start
);
1145 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1146 has_firstprivate
= true;
1150 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1152 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1155 tgt
->list
[i
].key
= NULL
;
1156 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1161 n
= splay_tree_lookup (mem_map
, &cur_node
);
1162 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1164 int always_to_cnt
= 0;
1165 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1167 bool has_nullptr
= false;
1169 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1170 if (n
->tgt
->list
[j
].key
== n
)
1172 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1175 if (n
->tgt
->list_count
== 0)
1177 /* 'declare target'; assume has_nullptr; it could also be
1178 statically assigned pointer, but that it should be to
1179 the equivalent variable on the host. */
1180 assert (n
->refcount
== REFCOUNT_INFINITY
);
1184 assert (j
< n
->tgt
->list_count
);
1185 /* Re-map the data if there is an 'always' modifier or if it a
1186 null pointer was there and non a nonnull has been found; that
1187 permits transparent re-mapping for Fortran array descriptors
1188 which were previously mapped unallocated. */
1189 for (j
= i
+ 1; j
< mapnum
; j
++)
1191 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1192 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1194 || !GOMP_MAP_POINTER_P (ptr_kind
)
1195 || *(void **) hostaddrs
[j
] == NULL
))
1197 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1198 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1199 > cur_node
.host_end
))
1203 has_always_ptrset
= true;
1208 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1209 kind
& typemask
, always_to_cnt
> 0, implicit
,
1210 NULL
, refcount_set
);
1215 tgt
->list
[i
].key
= NULL
;
1217 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1219 /* Not present, hence, skip entry - including its MAP_POINTER,
1221 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1223 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1224 == GOMP_MAP_POINTER
))
1227 tgt
->list
[i
].key
= NULL
;
1228 tgt
->list
[i
].offset
= 0;
1232 size_t align
= (size_t) 1 << (kind
>> rshift
);
1234 if (tgt_align
< align
)
1236 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1238 && gomp_to_device_kind_p (kind
& typemask
))
1239 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1240 cur_node
.host_end
- cur_node
.host_start
);
1241 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1242 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1246 for (j
= i
+ 1; j
< mapnum
; j
++)
1247 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1248 kinds
, j
)) & typemask
))
1249 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1251 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1252 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1253 > cur_node
.host_end
))
1257 tgt
->list
[j
].key
= NULL
;
1268 gomp_mutex_unlock (&devicep
->lock
);
1269 gomp_fatal ("unexpected aggregation");
1271 tgt
->to_free
= devaddrs
[0];
1272 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1273 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1275 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1277 /* Allocate tgt_align aligned tgt_size block of memory. */
1278 /* FIXME: Perhaps change interface to allocate properly aligned
1280 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1281 tgt_size
+ tgt_align
- 1);
1284 gomp_mutex_unlock (&devicep
->lock
);
1285 gomp_fatal ("device memory allocation fail");
1288 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1289 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1290 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1292 if (cbuf
.use_cnt
== 1)
1294 if (cbuf
.chunk_cnt
> 0)
1297 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1307 tgt
->to_free
= NULL
;
1313 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1314 tgt_size
= mapnum
* sizeof (void *);
1317 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1320 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1321 splay_tree_node array
= tgt
->array
;
1322 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1323 uintptr_t field_tgt_base
= 0;
1324 splay_tree_key field_tgt_structelem_first
= NULL
;
1326 for (i
= 0; i
< mapnum
; i
++)
1327 if (has_always_ptrset
1329 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1330 == GOMP_MAP_TO_PSET
)
1332 splay_tree_key k
= tgt
->list
[i
].key
;
1333 bool has_nullptr
= false;
1335 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1336 if (k
->tgt
->list
[j
].key
== k
)
1338 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1341 if (k
->tgt
->list_count
== 0)
1344 assert (j
< k
->tgt
->list_count
);
1346 tgt
->list
[i
].has_null_ptr_assoc
= false;
1347 for (j
= i
+ 1; j
< mapnum
; j
++)
1349 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1350 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1352 || !GOMP_MAP_POINTER_P (ptr_kind
)
1353 || *(void **) hostaddrs
[j
] == NULL
))
1355 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1356 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1361 if (*(void **) hostaddrs
[j
] == NULL
)
1362 tgt
->list
[i
].has_null_ptr_assoc
= true;
1363 tgt
->list
[j
].key
= k
;
1364 tgt
->list
[j
].copy_from
= false;
1365 tgt
->list
[j
].always_copy_from
= false;
1366 tgt
->list
[j
].is_attach
= false;
1367 gomp_increment_refcount (k
, refcount_set
);
1368 gomp_map_pointer (k
->tgt
, aq
,
1369 (uintptr_t) *(void **) hostaddrs
[j
],
1370 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1372 sizes
[j
], cbufp
, false);
1377 else if (tgt
->list
[i
].key
== NULL
)
1379 int kind
= get_kind (short_mapkind
, kinds
, i
);
1380 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1381 if (hostaddrs
[i
] == NULL
)
1383 switch (kind
& typemask
)
1385 size_t align
, len
, first
, last
;
1387 case GOMP_MAP_FIRSTPRIVATE
:
1388 align
= (size_t) 1 << (kind
>> rshift
);
1389 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1390 tgt
->list
[i
].offset
= tgt_size
;
1392 gomp_copy_host2dev (devicep
, aq
,
1393 (void *) (tgt
->tgt_start
+ tgt_size
),
1394 (void *) hostaddrs
[i
], len
, false, cbufp
);
1395 /* Save device address in hostaddr to permit latter availablity
1396 when doing a deep-firstprivate with pointer attach. */
1397 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1400 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1401 firstprivate to hostaddrs[i+1], which is assumed to contain a
1405 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1407 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1408 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1410 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1411 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1412 this probably needs revision for 'aq' usage. */
1414 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1415 sizeof (void *), false, cbufp
);
1419 case GOMP_MAP_FIRSTPRIVATE_INT
:
1420 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1422 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1423 /* The OpenACC 'host_data' construct only allows 'use_device'
1424 "mapping" clauses, so in the first loop, 'not_found_cnt'
1425 must always have been zero, so all OpenACC 'use_device'
1426 clauses have already been handled. (We can only easily test
1427 'use_device' with 'if_present' clause here.) */
1428 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1429 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1430 code conceptually simple, similar to the first loop. */
1431 case GOMP_MAP_USE_DEVICE_PTR
:
1432 if (tgt
->list
[i
].offset
== 0)
1434 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1435 cur_node
.host_end
= cur_node
.host_start
;
1436 n
= gomp_map_lookup (mem_map
, &cur_node
);
1439 cur_node
.host_start
-= n
->host_start
;
1441 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1442 + cur_node
.host_start
);
1444 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1446 gomp_mutex_unlock (&devicep
->lock
);
1447 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1449 else if ((kind
& typemask
)
1450 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1451 /* If not present, continue using the host address. */
1454 __builtin_unreachable ();
1455 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1458 case GOMP_MAP_STRUCT
:
1460 last
= i
+ sizes
[i
];
1461 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1462 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1464 if (tgt
->list
[first
].key
!= NULL
)
1466 n
= splay_tree_lookup (mem_map
, &cur_node
);
1469 size_t align
= (size_t) 1 << (kind
>> rshift
);
1470 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1471 - (uintptr_t) hostaddrs
[i
];
1472 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1473 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1474 - (uintptr_t) hostaddrs
[i
];
1475 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1476 field_tgt_offset
= tgt_size
;
1477 field_tgt_clear
= last
;
1478 field_tgt_structelem_first
= NULL
;
1479 tgt_size
+= cur_node
.host_end
1480 - (uintptr_t) hostaddrs
[first
];
1483 for (i
= first
; i
<= last
; i
++)
1484 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1485 sizes
, kinds
, cbufp
, refcount_set
);
1488 case GOMP_MAP_ALWAYS_POINTER
:
1489 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1490 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1491 n
= splay_tree_lookup (mem_map
, &cur_node
);
1493 || n
->host_start
> cur_node
.host_start
1494 || n
->host_end
< cur_node
.host_end
)
1496 gomp_mutex_unlock (&devicep
->lock
);
1497 gomp_fatal ("always pointer not mapped");
1500 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1501 != GOMP_MAP_ALWAYS_POINTER
))
1502 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1503 if (cur_node
.tgt_offset
)
1504 cur_node
.tgt_offset
-= sizes
[i
];
1505 gomp_copy_host2dev (devicep
, aq
,
1506 (void *) (n
->tgt
->tgt_start
1508 + cur_node
.host_start
1510 (void *) &cur_node
.tgt_offset
,
1511 sizeof (void *), true, cbufp
);
1512 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1513 + cur_node
.host_start
- n
->host_start
;
1515 case GOMP_MAP_IF_PRESENT
:
1516 /* Not present - otherwise handled above. Skip over its
1517 MAP_POINTER as well. */
1519 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1520 == GOMP_MAP_POINTER
))
1523 case GOMP_MAP_ATTACH
:
1524 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1526 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1527 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1528 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1531 tgt
->list
[i
].key
= n
;
1532 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1533 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1534 tgt
->list
[i
].copy_from
= false;
1535 tgt
->list
[i
].always_copy_from
= false;
1536 tgt
->list
[i
].is_attach
= true;
1537 /* OpenACC 'attach'/'detach' doesn't affect
1538 structured/dynamic reference counts ('n->refcount',
1539 'n->dynamic_refcount'). */
1542 = ((kind
& typemask
)
1543 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1544 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1545 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1548 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1550 gomp_mutex_unlock (&devicep
->lock
);
1551 gomp_fatal ("outer struct not mapped for attach");
1558 splay_tree_key k
= &array
->key
;
1559 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1560 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1561 k
->host_end
= k
->host_start
+ sizes
[i
];
1563 k
->host_end
= k
->host_start
+ sizeof (void *);
1564 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1565 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1566 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1567 kind
& typemask
, false, implicit
, cbufp
,
1572 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1574 /* Replace target address of the pointer with target address
1575 of mapped object in the splay tree. */
1576 splay_tree_remove (mem_map
, n
);
1578 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1579 k
->aux
->link_key
= n
;
1581 size_t align
= (size_t) 1 << (kind
>> rshift
);
1582 tgt
->list
[i
].key
= k
;
1585 k
->dynamic_refcount
= 0;
1586 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1588 k
->tgt_offset
= k
->host_start
- field_tgt_base
1592 k
->refcount
= REFCOUNT_STRUCTELEM
;
1593 if (field_tgt_structelem_first
== NULL
)
1595 /* Set to first structure element of sequence. */
1596 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1597 field_tgt_structelem_first
= k
;
1600 /* Point to refcount of leading element, but do not
1602 k
->structelem_refcount_ptr
1603 = &field_tgt_structelem_first
->structelem_refcount
;
1605 if (i
== field_tgt_clear
)
1607 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1608 field_tgt_structelem_first
= NULL
;
1611 if (i
== field_tgt_clear
)
1612 field_tgt_clear
= FIELD_TGT_EMPTY
;
1616 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1617 k
->tgt_offset
= tgt_size
;
1618 tgt_size
+= k
->host_end
- k
->host_start
;
1620 /* First increment, from 0 to 1. gomp_increment_refcount
1621 encapsulates the different increment cases, so use this
1622 instead of directly setting 1 during initialization. */
1623 gomp_increment_refcount (k
, refcount_set
);
1625 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1626 tgt
->list
[i
].always_copy_from
1627 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1628 tgt
->list
[i
].is_attach
= false;
1629 tgt
->list
[i
].offset
= 0;
1630 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1633 array
->right
= NULL
;
1634 splay_tree_insert (mem_map
, array
);
1635 switch (kind
& typemask
)
1637 case GOMP_MAP_ALLOC
:
1639 case GOMP_MAP_FORCE_ALLOC
:
1640 case GOMP_MAP_FORCE_FROM
:
1641 case GOMP_MAP_ALWAYS_FROM
:
1644 case GOMP_MAP_TOFROM
:
1645 case GOMP_MAP_FORCE_TO
:
1646 case GOMP_MAP_FORCE_TOFROM
:
1647 case GOMP_MAP_ALWAYS_TO
:
1648 case GOMP_MAP_ALWAYS_TOFROM
:
1649 gomp_copy_host2dev (devicep
, aq
,
1650 (void *) (tgt
->tgt_start
1652 (void *) k
->host_start
,
1653 k
->host_end
- k
->host_start
,
1656 case GOMP_MAP_POINTER
:
1657 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1659 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1660 k
->tgt_offset
, sizes
[i
], cbufp
,
1662 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1664 case GOMP_MAP_TO_PSET
:
1665 gomp_copy_host2dev (devicep
, aq
,
1666 (void *) (tgt
->tgt_start
1668 (void *) k
->host_start
,
1669 k
->host_end
- k
->host_start
,
1671 tgt
->list
[i
].has_null_ptr_assoc
= false;
1673 for (j
= i
+ 1; j
< mapnum
; j
++)
1675 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1677 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1678 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1680 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1681 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1686 tgt
->list
[j
].key
= k
;
1687 tgt
->list
[j
].copy_from
= false;
1688 tgt
->list
[j
].always_copy_from
= false;
1689 tgt
->list
[j
].is_attach
= false;
1690 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1691 /* For OpenMP, the use of refcount_sets causes
1692 errors if we set k->refcount = 1 above but also
1693 increment it again here, for decrementing will
1694 not properly match, since we decrement only once
1695 for each key's refcount. Therefore avoid this
1696 increment for OpenMP constructs. */
1698 gomp_increment_refcount (k
, refcount_set
);
1699 gomp_map_pointer (tgt
, aq
,
1700 (uintptr_t) *(void **) hostaddrs
[j
],
1702 + ((uintptr_t) hostaddrs
[j
]
1704 sizes
[j
], cbufp
, false);
1709 case GOMP_MAP_FORCE_PRESENT
:
1710 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1711 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1712 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1714 /* We already looked up the memory region above and it
1716 size_t size
= k
->host_end
- k
->host_start
;
1717 gomp_mutex_unlock (&devicep
->lock
);
1718 #ifdef HAVE_INTTYPES_H
1719 gomp_fatal ("present clause: not present on the device "
1720 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
1721 "dev: %d)", (void *) k
->host_start
,
1722 (uint64_t) size
, (uint64_t) size
,
1723 devicep
->target_id
);
1725 gomp_fatal ("present clause: not present on the device "
1726 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1727 (void *) k
->host_start
,
1728 (unsigned long) size
, (unsigned long) size
,
1729 devicep
->target_id
);
1733 case GOMP_MAP_FORCE_DEVICEPTR
:
1734 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1735 gomp_copy_host2dev (devicep
, aq
,
1736 (void *) (tgt
->tgt_start
1738 (void *) k
->host_start
,
1739 sizeof (void *), false, cbufp
);
1742 gomp_mutex_unlock (&devicep
->lock
);
1743 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1747 if (k
->aux
&& k
->aux
->link_key
)
1749 /* Set link pointer on target to the device address of the
1751 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1752 /* We intentionally do not use coalescing here, as it's not
1753 data allocated by the current call to this function. */
1754 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1755 &tgt_addr
, sizeof (void *), true, NULL
);
1762 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1764 for (i
= 0; i
< mapnum
; i
++)
1766 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1767 gomp_copy_host2dev (devicep
, aq
,
1768 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1769 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1777 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1778 gomp_copy_host2dev (devicep
, aq
,
1779 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1780 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1781 - cbuf
.chunks
[0].start
),
1782 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1785 /* Free once the transfer has completed. */
1786 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1793 /* If the variable from "omp target enter data" map-list was already mapped,
1794 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1796 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1802 gomp_mutex_unlock (&devicep
->lock
);
1806 static struct target_mem_desc
*
1807 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1808 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1809 bool short_mapkind
, htab_t
*refcount_set
,
1810 enum gomp_map_vars_kind pragma_kind
)
1812 /* This management of a local refcount_set is for convenience of callers
1813 who do not share a refcount_set over multiple map/unmap uses. */
1814 htab_t local_refcount_set
= NULL
;
1815 if (refcount_set
== NULL
)
1817 local_refcount_set
= htab_create (mapnum
);
1818 refcount_set
= &local_refcount_set
;
1821 struct target_mem_desc
*tgt
;
1822 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1823 sizes
, kinds
, short_mapkind
, refcount_set
,
1825 if (local_refcount_set
)
1826 htab_free (local_refcount_set
);
1831 attribute_hidden
struct target_mem_desc
*
1832 goacc_map_vars (struct gomp_device_descr
*devicep
,
1833 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1834 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1835 void *kinds
, bool short_mapkind
,
1836 enum gomp_map_vars_kind pragma_kind
)
1838 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1839 sizes
, kinds
, short_mapkind
, NULL
,
1840 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1844 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1846 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1848 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1855 gomp_unref_tgt (void *ptr
)
1857 bool is_tgt_unmapped
= false;
1859 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1861 if (tgt
->refcount
> 1)
1865 gomp_unmap_tgt (tgt
);
1866 is_tgt_unmapped
= true;
1869 return is_tgt_unmapped
;
1873 gomp_unref_tgt_void (void *ptr
)
1875 (void) gomp_unref_tgt (ptr
);
1879 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1881 splay_tree_remove (sp
, k
);
1884 if (k
->aux
->link_key
)
1885 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1886 if (k
->aux
->attach_count
)
1887 free (k
->aux
->attach_count
);
1893 static inline __attribute__((always_inline
)) bool
1894 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1895 struct goacc_asyncqueue
*aq
)
1897 bool is_tgt_unmapped
= false;
1899 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1901 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1902 /* Infer the splay_tree_key of the first structelem key using the
1903 pointer to the first structleme_refcount. */
1904 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1905 - offsetof (struct splay_tree_key_s
,
1906 structelem_refcount
));
1907 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1909 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1910 with the splay_tree_keys embedded inside. */
1911 splay_tree_node node
=
1912 (splay_tree_node
) ((char *) k
1913 - offsetof (struct splay_tree_node_s
, key
));
1916 /* Starting from the _FIRST key, and continue for all following
1918 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1919 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1926 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1929 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1932 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1933 return is_tgt_unmapped
;
1936 attribute_hidden
bool
1937 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1939 return gomp_remove_var_internal (devicep
, k
, NULL
);
1942 /* Remove a variable asynchronously. This actually removes the variable
1943 mapping immediately, but retains the linked target_mem_desc until the
1944 asynchronous operation has completed (as it may still refer to target
1945 memory). The device lock must be held before entry, and remains locked on
1948 attribute_hidden
void
1949 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1950 struct goacc_asyncqueue
*aq
)
1952 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1955 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1956 variables back from device to host: if it is false, it is assumed that this
1957 has been done already. */
1959 static inline __attribute__((always_inline
)) void
1960 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1961 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1963 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1965 if (tgt
->list_count
== 0)
1971 gomp_mutex_lock (&devicep
->lock
);
1972 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1974 gomp_mutex_unlock (&devicep
->lock
);
1982 /* We must perform detachments before any copies back to the host. */
1983 for (i
= 0; i
< tgt
->list_count
; i
++)
1985 splay_tree_key k
= tgt
->list
[i
].key
;
1987 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1988 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1989 + tgt
->list
[i
].offset
,
1993 for (i
= 0; i
< tgt
->list_count
; i
++)
1995 splay_tree_key k
= tgt
->list
[i
].key
;
1999 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2000 counts ('n->refcount', 'n->dynamic_refcount'). */
2001 if (tgt
->list
[i
].is_attach
)
2004 bool do_copy
, do_remove
;
2005 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2007 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2008 || tgt
->list
[i
].always_copy_from
)
2009 gomp_copy_dev2host (devicep
, aq
,
2010 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2011 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2012 + tgt
->list
[i
].offset
),
2013 tgt
->list
[i
].length
);
2016 struct target_mem_desc
*k_tgt
= k
->tgt
;
2017 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2018 /* It would be bad if TGT got unmapped while we're still iterating
2019 over its LIST_COUNT, and also expect to use it in the following
2021 assert (!is_tgt_unmapped
2027 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2030 gomp_unref_tgt ((void *) tgt
);
2032 gomp_mutex_unlock (&devicep
->lock
);
2036 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2037 htab_t
*refcount_set
)
2039 /* This management of a local refcount_set is for convenience of callers
2040 who do not share a refcount_set over multiple map/unmap uses. */
2041 htab_t local_refcount_set
= NULL
;
2042 if (refcount_set
== NULL
)
2044 local_refcount_set
= htab_create (tgt
->list_count
);
2045 refcount_set
= &local_refcount_set
;
2048 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2050 if (local_refcount_set
)
2051 htab_free (local_refcount_set
);
2054 attribute_hidden
void
2055 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2056 struct goacc_asyncqueue
*aq
)
2058 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2062 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2063 size_t *sizes
, void *kinds
, bool short_mapkind
)
2066 struct splay_tree_key_s cur_node
;
2067 const int typemask
= short_mapkind
? 0xff : 0x7;
2075 gomp_mutex_lock (&devicep
->lock
);
2076 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2078 gomp_mutex_unlock (&devicep
->lock
);
2082 for (i
= 0; i
< mapnum
; i
++)
2085 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2086 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2087 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2090 int kind
= get_kind (short_mapkind
, kinds
, i
);
2091 if (n
->host_start
> cur_node
.host_start
2092 || n
->host_end
< cur_node
.host_end
)
2094 gomp_mutex_unlock (&devicep
->lock
);
2095 gomp_fatal ("Trying to update [%p..%p) object when "
2096 "only [%p..%p) is mapped",
2097 (void *) cur_node
.host_start
,
2098 (void *) cur_node
.host_end
,
2099 (void *) n
->host_start
,
2100 (void *) n
->host_end
);
2103 if (n
->aux
&& n
->aux
->attach_count
)
2105 uintptr_t addr
= cur_node
.host_start
;
2106 while (addr
< cur_node
.host_end
)
2108 /* We have to be careful not to overwrite still attached
2109 pointers during host<->device updates. */
2110 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2111 if (n
->aux
->attach_count
[i
] == 0)
2113 void *devaddr
= (void *) (n
->tgt
->tgt_start
2115 + addr
- n
->host_start
);
2116 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2117 gomp_copy_host2dev (devicep
, NULL
,
2118 devaddr
, (void *) addr
,
2119 sizeof (void *), false, NULL
);
2120 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2121 gomp_copy_dev2host (devicep
, NULL
,
2122 (void *) addr
, devaddr
,
2125 addr
+= sizeof (void *);
2130 void *hostaddr
= (void *) cur_node
.host_start
;
2131 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2132 + cur_node
.host_start
2134 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2136 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2137 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2139 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2140 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2145 int kind
= get_kind (short_mapkind
, kinds
, i
);
2147 if (GOMP_MAP_PRESENT_P (kind
))
2149 /* We already looked up the memory region above and it
2151 gomp_mutex_unlock (&devicep
->lock
);
2152 #ifdef HAVE_INTTYPES_H
2153 gomp_fatal ("present clause: not present on the device "
2154 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
2155 "dev: %d)", (void *) hostaddrs
[i
],
2156 (uint64_t) sizes
[i
], (uint64_t) sizes
[i
],
2157 devicep
->target_id
);
2159 gomp_fatal ("present clause: not present on the device "
2160 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2161 (void *) hostaddrs
[i
], (unsigned long) sizes
[i
],
2162 (unsigned long) sizes
[i
], devicep
->target_id
);
2167 gomp_mutex_unlock (&devicep
->lock
);
2170 static struct gomp_offload_icv_list
*
2171 gomp_get_offload_icv_item (int dev_num
)
2173 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2174 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2180 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2181 depending on the device num and the variable hierarchy
2182 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2183 device and thus no item with that device number is contained in
2184 gomp_offload_icv_list, then a new item is created and added to the list. */
2186 static struct gomp_offload_icvs
*
2187 get_gomp_offload_icvs (int dev_num
)
2189 struct gomp_icv_list
*dev
2190 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2191 struct gomp_icv_list
*all
2192 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2193 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2194 struct gomp_offload_icv_list
*offload_icvs
2195 = gomp_get_offload_icv_item (dev_num
);
2197 if (offload_icvs
!= NULL
)
2198 return &offload_icvs
->icvs
;
2200 struct gomp_offload_icv_list
*new
2201 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2203 new->device_num
= dev_num
;
2204 new->icvs
.device_num
= dev_num
;
2205 new->next
= gomp_offload_icv_list
;
2207 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2208 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2209 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2210 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2211 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2212 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2214 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2217 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2218 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2219 else if (dev
!= NULL
2220 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2221 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2222 else if (all
!= NULL
2223 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2224 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2226 new->icvs
.teams_thread_limit
2227 = gomp_default_icv_values
.teams_thread_limit_var
;
2230 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2231 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2232 else if (dev
!= NULL
2233 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2234 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2235 else if (all
!= NULL
2236 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2237 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2239 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2241 gomp_offload_icv_list
= new;
2245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2246 And insert to splay tree the mapping between addresses from HOST_TABLE and
2247 from loaded target image. We rely in the host and device compiler
2248 emitting variable and functions in the same order. */
2251 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2252 const void *host_table
, const void *target_data
,
2253 bool is_register_lock
)
2255 void **host_func_table
= ((void ***) host_table
)[0];
2256 void **host_funcs_end
= ((void ***) host_table
)[1];
2257 void **host_var_table
= ((void ***) host_table
)[2];
2258 void **host_vars_end
= ((void ***) host_table
)[3];
2260 /* The func table contains only addresses, the var table contains addresses
2261 and corresponding sizes. */
2262 int num_funcs
= host_funcs_end
- host_func_table
;
2263 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2265 /* Load image to device and get target addresses for the image. */
2266 struct addr_pair
*target_table
= NULL
;
2267 uint64_t *rev_target_fn_table
= NULL
;
2268 int i
, num_target_entries
;
2270 /* With reverse offload, insert also target-host addresses. */
2271 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2274 = devicep
->load_image_func (devicep
->target_id
, version
,
2275 target_data
, &target_table
,
2276 rev_lookup
? &rev_target_fn_table
: NULL
);
2278 if (num_target_entries
!= num_funcs
+ num_vars
2279 /* "+1" due to the additional ICV struct. */
2280 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2282 gomp_mutex_unlock (&devicep
->lock
);
2283 if (is_register_lock
)
2284 gomp_mutex_unlock (®ister_lock
);
2285 gomp_fatal ("Cannot map target functions or variables"
2286 " (expected %u, have %u)", num_funcs
+ num_vars
,
2287 num_target_entries
);
2290 /* Insert host-target address mapping into splay tree. */
2291 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2292 /* "+1" due to the additional ICV struct. */
2293 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2294 * sizeof (*tgt
->array
));
2295 if (rev_target_fn_table
)
2296 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2298 tgt
->rev_array
= NULL
;
2299 tgt
->refcount
= REFCOUNT_INFINITY
;
2302 tgt
->to_free
= NULL
;
2304 tgt
->list_count
= 0;
2305 tgt
->device_descr
= devicep
;
2306 splay_tree_node array
= tgt
->array
;
2307 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2309 for (i
= 0; i
< num_funcs
; i
++)
2311 splay_tree_key k
= &array
->key
;
2312 k
->host_start
= (uintptr_t) host_func_table
[i
];
2313 k
->host_end
= k
->host_start
+ 1;
2315 k
->tgt_offset
= target_table
[i
].start
;
2316 k
->refcount
= REFCOUNT_INFINITY
;
2317 k
->dynamic_refcount
= 0;
2320 array
->right
= NULL
;
2321 splay_tree_insert (&devicep
->mem_map
, array
);
2322 if (rev_target_fn_table
)
2324 reverse_splay_tree_key k2
= &rev_array
->key
;
2325 k2
->dev
= rev_target_fn_table
[i
];
2327 rev_array
->left
= NULL
;
2328 rev_array
->right
= NULL
;
2330 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2336 /* Most significant bit of the size in host and target tables marks
2337 "omp declare target link" variables. */
2338 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2339 const uintptr_t size_mask
= ~link_bit
;
2341 for (i
= 0; i
< num_vars
; i
++)
2343 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2344 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2345 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2347 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2349 gomp_mutex_unlock (&devicep
->lock
);
2350 if (is_register_lock
)
2351 gomp_mutex_unlock (®ister_lock
);
2352 gomp_fatal ("Cannot map target variables (size mismatch)");
2355 splay_tree_key k
= &array
->key
;
2356 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2358 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2360 k
->tgt_offset
= target_var
->start
;
2361 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2362 k
->dynamic_refcount
= 0;
2365 array
->right
= NULL
;
2366 splay_tree_insert (&devicep
->mem_map
, array
);
2370 /* Last entry is for a ICVs variable.
2371 Tolerate case where plugin does not return those entries. */
2372 if (num_funcs
+ num_vars
< num_target_entries
)
2374 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2376 /* Start address will be non-zero for the ICVs variable if
2377 the variable was found in this image. */
2378 if (var
->start
!= 0)
2380 /* The index of the devicep within devices[] is regarded as its
2381 'device number', which is different from the per-device type
2382 devicep->target_id. */
2383 int dev_num
= (int) (devicep
- &devices
[0]);
2384 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2385 size_t var_size
= var
->end
- var
->start
;
2386 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2388 gomp_mutex_unlock (&devicep
->lock
);
2389 if (is_register_lock
)
2390 gomp_mutex_unlock (®ister_lock
);
2391 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2394 /* Copy the ICVs variable to place on device memory, hereby
2395 actually designating its device number into effect. */
2396 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2397 var_size
, false, NULL
);
2398 splay_tree_key k
= &array
->key
;
2399 k
->host_start
= (uintptr_t) icvs
;
2401 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2403 k
->tgt_offset
= var
->start
;
2404 k
->refcount
= REFCOUNT_INFINITY
;
2405 k
->dynamic_refcount
= 0;
2408 array
->right
= NULL
;
2409 splay_tree_insert (&devicep
->mem_map
, array
);
2414 free (target_table
);
2417 /* Unload the mappings described by target_data from device DEVICE_P.
2418 The device must be locked. */
2421 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2423 const void *host_table
, const void *target_data
)
2425 void **host_func_table
= ((void ***) host_table
)[0];
2426 void **host_funcs_end
= ((void ***) host_table
)[1];
2427 void **host_var_table
= ((void ***) host_table
)[2];
2428 void **host_vars_end
= ((void ***) host_table
)[3];
2430 /* The func table contains only addresses, the var table contains addresses
2431 and corresponding sizes. */
2432 int num_funcs
= host_funcs_end
- host_func_table
;
2433 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2435 struct splay_tree_key_s k
;
2436 splay_tree_key node
= NULL
;
2438 /* Find mapping at start of node array */
2439 if (num_funcs
|| num_vars
)
2441 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2442 : (uintptr_t) host_var_table
[0]);
2443 k
.host_end
= k
.host_start
+ 1;
2444 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2447 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2449 gomp_mutex_unlock (&devicep
->lock
);
2450 gomp_fatal ("image unload fail");
2452 if (devicep
->mem_map_rev
.root
)
2454 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2456 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2457 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2458 free (node
->tgt
->rev_array
);
2459 devicep
->mem_map_rev
.root
= NULL
;
2462 /* Remove mappings from splay tree. */
2464 for (i
= 0; i
< num_funcs
; i
++)
2466 k
.host_start
= (uintptr_t) host_func_table
[i
];
2467 k
.host_end
= k
.host_start
+ 1;
2468 splay_tree_remove (&devicep
->mem_map
, &k
);
2471 /* Most significant bit of the size in host and target tables marks
2472 "omp declare target link" variables. */
2473 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2474 const uintptr_t size_mask
= ~link_bit
;
2475 bool is_tgt_unmapped
= false;
2477 for (i
= 0; i
< num_vars
; i
++)
2479 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2481 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2483 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2484 splay_tree_remove (&devicep
->mem_map
, &k
);
2487 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2488 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2492 if (node
&& !is_tgt_unmapped
)
2500 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2502 char *end
= buf
+ size
, *p
= buf
;
2503 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2504 p
+= snprintf (p
, end
- p
, "unified_address");
2505 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2506 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2507 (p
== buf
? "" : ", "));
2508 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2509 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2510 (p
== buf
? "" : ", "));
2513 /* This function should be called from every offload image while loading.
2514 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2515 the target, and DATA. */
2518 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2519 int target_type
, const void *data
)
2523 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2524 gomp_fatal ("Library too old for offload (version %u < %u)",
2525 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2528 const void *target_data
;
2529 if (GOMP_VERSION_LIB (version
) > 1)
2531 omp_req
= (int) (size_t) ((void **) data
)[0];
2532 target_data
= &((void **) data
)[1];
2540 gomp_mutex_lock (®ister_lock
);
2542 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2544 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2545 "reverse_offload")];
2546 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2547 "reverse_offload")];
2548 gomp_requires_to_name (buf2
, sizeof (buf2
),
2549 omp_req
!= GOMP_REQUIRES_TARGET_USED
2550 ? omp_req
: omp_requires_mask
);
2551 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2552 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2554 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2555 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2556 "in multiple compilation units: '%s' vs. '%s'",
2560 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2561 "some compilation units", buf2
);
2563 omp_requires_mask
= omp_req
;
2565 /* Load image to all initialized devices. */
2566 for (i
= 0; i
< num_devices
; i
++)
2568 struct gomp_device_descr
*devicep
= &devices
[i
];
2569 gomp_mutex_lock (&devicep
->lock
);
2570 if (devicep
->type
== target_type
2571 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2572 gomp_load_image_to_device (devicep
, version
,
2573 host_table
, target_data
, true);
2574 gomp_mutex_unlock (&devicep
->lock
);
2577 /* Insert image to array of pending images. */
2579 = gomp_realloc_unlock (offload_images
,
2580 (num_offload_images
+ 1)
2581 * sizeof (struct offload_image_descr
));
2582 offload_images
[num_offload_images
].version
= version
;
2583 offload_images
[num_offload_images
].type
= target_type
;
2584 offload_images
[num_offload_images
].host_table
= host_table
;
2585 offload_images
[num_offload_images
].target_data
= target_data
;
2587 num_offload_images
++;
2588 gomp_mutex_unlock (®ister_lock
);
2591 /* Legacy entry point. */
2594 GOMP_offload_register (const void *host_table
, int target_type
,
2595 const void *target_data
)
2597 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2600 /* This function should be called from every offload image while unloading.
2601 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2602 the target, and DATA. */
2605 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2606 int target_type
, const void *data
)
2610 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2611 gomp_fatal ("Library too old for offload (version %u < %u)",
2612 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2614 const void *target_data
;
2615 if (GOMP_VERSION_LIB (version
) > 1)
2616 target_data
= &((void **) data
)[1];
2620 gomp_mutex_lock (®ister_lock
);
2622 /* Unload image from all initialized devices. */
2623 for (i
= 0; i
< num_devices
; i
++)
2625 struct gomp_device_descr
*devicep
= &devices
[i
];
2626 gomp_mutex_lock (&devicep
->lock
);
2627 if (devicep
->type
== target_type
2628 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2629 gomp_unload_image_from_device (devicep
, version
,
2630 host_table
, target_data
);
2631 gomp_mutex_unlock (&devicep
->lock
);
2634 /* Remove image from array of pending images. */
2635 for (i
= 0; i
< num_offload_images
; i
++)
2636 if (offload_images
[i
].target_data
== target_data
)
2638 offload_images
[i
] = offload_images
[--num_offload_images
];
2642 gomp_mutex_unlock (®ister_lock
);
2645 /* Legacy entry point. */
2648 GOMP_offload_unregister (const void *host_table
, int target_type
,
2649 const void *target_data
)
2651 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2654 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2655 must be locked on entry, and remains locked on return. */
2657 attribute_hidden
void
2658 gomp_init_device (struct gomp_device_descr
*devicep
)
2661 if (!devicep
->init_device_func (devicep
->target_id
))
2663 gomp_mutex_unlock (&devicep
->lock
);
2664 gomp_fatal ("device initialization failed");
2667 /* Load to device all images registered by the moment. */
2668 for (i
= 0; i
< num_offload_images
; i
++)
2670 struct offload_image_descr
*image
= &offload_images
[i
];
2671 if (image
->type
== devicep
->type
)
2672 gomp_load_image_to_device (devicep
, image
->version
,
2673 image
->host_table
, image
->target_data
,
2677 /* Initialize OpenACC asynchronous queues. */
2678 goacc_init_asyncqueues (devicep
);
2680 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2683 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2684 must be locked on entry, and remains locked on return. */
2686 attribute_hidden
bool
2687 gomp_fini_device (struct gomp_device_descr
*devicep
)
2689 bool ret
= goacc_fini_asyncqueues (devicep
);
2690 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2691 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2695 attribute_hidden
void
2696 gomp_unload_device (struct gomp_device_descr
*devicep
)
2698 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2702 /* Unload from device all images registered at the moment. */
2703 for (i
= 0; i
< num_offload_images
; i
++)
2705 struct offload_image_descr
*image
= &offload_images
[i
];
2706 if (image
->type
== devicep
->type
)
2707 gomp_unload_image_from_device (devicep
, image
->version
,
2709 image
->target_data
);
2714 /* Host fallback for GOMP_target{,_ext} routines. */
2717 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2718 struct gomp_device_descr
*devicep
, void **args
)
2720 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2722 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2724 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2725 "be used for offloading");
2728 memset (thr
, '\0', sizeof (*thr
));
2729 if (gomp_places_list
)
2731 thr
->place
= old_thr
.place
;
2732 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2737 intptr_t id
= (intptr_t) *args
++, val
;
2738 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2739 val
= (intptr_t) *args
++;
2741 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2742 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2744 id
&= GOMP_TARGET_ARG_ID_MASK
;
2745 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2747 val
= val
> INT_MAX
? INT_MAX
: val
;
2749 gomp_icv (true)->thread_limit_var
= val
;
2754 gomp_free_thread (thr
);
2758 /* Calculate alignment and size requirements of a private copy of data shared
2759 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2762 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2763 unsigned short *kinds
, size_t *tgt_align
,
2767 for (i
= 0; i
< mapnum
; i
++)
2768 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2770 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2771 if (*tgt_align
< align
)
2773 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2774 *tgt_size
+= sizes
[i
];
2778 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2781 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2782 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2785 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2787 tgt
+= tgt_align
- al
;
2790 for (i
= 0; i
< mapnum
; i
++)
2791 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2793 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2794 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2795 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2796 hostaddrs
[i
] = tgt
+ tgt_size
;
2797 tgt_size
= tgt_size
+ sizes
[i
];
2798 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2800 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2806 /* Helper function of GOMP_target{,_ext} routines. */
2809 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2810 void (*host_fn
) (void *))
2812 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2813 return (void *) host_fn
;
2816 gomp_mutex_lock (&devicep
->lock
);
2817 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2819 gomp_mutex_unlock (&devicep
->lock
);
2823 struct splay_tree_key_s k
;
2824 k
.host_start
= (uintptr_t) host_fn
;
2825 k
.host_end
= k
.host_start
+ 1;
2826 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2827 gomp_mutex_unlock (&devicep
->lock
);
2831 return (void *) tgt_fn
->tgt_offset
;
2835 /* Called when encountering a target directive. If DEVICE
2836 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2837 GOMP_DEVICE_HOST_FALLBACK (or any value
2838 larger than last available hw device), use host fallback.
2839 FN is address of host code, UNUSED is part of the current ABI, but
2840 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2841 with MAPNUM entries, with addresses of the host objects,
2842 sizes of the host objects (resp. for pointer kind pointer bias
2843 and assumed sizeof (void *) size) and kinds. */
2846 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2847 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2848 unsigned char *kinds
)
2850 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2854 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2855 /* All shared memory devices should use the GOMP_target_ext function. */
2856 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2857 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2858 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2860 htab_t refcount_set
= htab_create (mapnum
);
2861 struct target_mem_desc
*tgt_vars
2862 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2863 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2864 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2866 htab_clear (refcount_set
);
2867 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2868 htab_free (refcount_set
);
2871 static inline unsigned int
2872 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2874 /* If we cannot run asynchronously, simply ignore nowait. */
2875 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2876 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2882 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2884 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2888 void *host_ptr
= &item
->icvs
;
2889 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2890 if (dev_ptr
!= NULL
)
2891 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2892 sizeof (struct gomp_offload_icvs
));
2895 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2896 and several arguments have been added:
2897 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2898 DEPEND is array of dependencies, see GOMP_task for details.
2900 ARGS is a pointer to an array consisting of a variable number of both
2901 device-independent and device-specific arguments, which can take one two
2902 elements where the first specifies for which device it is intended, the type
2903 and optionally also the value. If the value is not present in the first
2904 one, the whole second element the actual value. The last element of the
2905 array is a single NULL. Among the device independent can be for example
2906 NUM_TEAMS and THREAD_LIMIT.
2908 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2909 that value, or 1 if teams construct is not present, or 0, if
2910 teams construct does not have num_teams clause and so the choice is
2911 implementation defined, and -1 if it can't be determined on the host
2912 what value will GOMP_teams have on the device.
2913 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2914 body with that value, or 0, if teams construct does not have thread_limit
2915 clause or the teams construct is not present, or -1 if it can't be
2916 determined on the host what value will GOMP_teams have on the device. */
2919 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2920 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2921 unsigned int flags
, void **depend
, void **args
)
2923 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2924 size_t tgt_align
= 0, tgt_size
= 0;
2925 bool fpc_done
= false;
2927 /* Obtain the original TEAMS and THREADS values from ARGS. */
2928 intptr_t orig_teams
= 1, orig_threads
= 0;
2929 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2930 void **tmpargs
= args
;
2933 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2934 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2936 val
= (intptr_t) *tmpargs
++;
2941 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2945 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2947 val
= val
> INT_MAX
? INT_MAX
: val
;
2948 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2953 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2960 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2961 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2962 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2963 value could not be determined. No change.
2964 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2965 Set device-specific value.
2966 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2968 if (orig_teams
== -2)
2970 else if (orig_teams
== 0)
2972 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2974 new_teams
= item
->icvs
.nteams
;
2976 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2977 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2978 e.g. a THREAD_LIMIT clause. */
2979 if (orig_teams
> -2 && orig_threads
== 0)
2981 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2983 new_threads
= item
->icvs
.teams_thread_limit
;
2986 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2988 void **new_args
= args
;
2989 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
2991 size_t tms_len
= (orig_teams
== new_teams
2993 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
2995 size_t ths_len
= (orig_threads
== new_threads
2997 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
2999 /* One additional item after the last arg must be NULL. */
3000 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3002 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3005 void **tmp_new_args
= new_args
;
3006 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3007 too if they have not been changed and skipped otherwise. */
3010 intptr_t id
= (intptr_t) *tmpargs
;
3011 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3012 && orig_teams
!= new_teams
)
3013 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3014 && orig_threads
!= new_threads
))
3017 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3022 *tmp_new_args
++ = *tmpargs
++;
3023 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3024 *tmp_new_args
++ = *tmpargs
++;
3028 /* Add the new TEAMS arg to the new args list if it has been changed. */
3029 if (orig_teams
!= new_teams
)
3031 intptr_t new_val
= new_teams
;
3034 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3035 | GOMP_TARGET_ARG_NUM_TEAMS
;
3036 *tmp_new_args
++ = (void *) new_val
;
3040 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3041 | GOMP_TARGET_ARG_NUM_TEAMS
);
3042 *tmp_new_args
++ = (void *) new_val
;
3046 /* Add the new THREADS arg to the new args list if it has been changed. */
3047 if (orig_threads
!= new_threads
)
3049 intptr_t new_val
= new_threads
;
3052 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3053 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3054 *tmp_new_args
++ = (void *) new_val
;
3058 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3059 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3060 *tmp_new_args
++ = (void *) new_val
;
3064 *tmp_new_args
= NULL
;
3067 flags
= clear_unsupported_flags (devicep
, flags
);
3069 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3071 struct gomp_thread
*thr
= gomp_thread ();
3072 /* Create a team if we don't have any around, as nowait
3073 target tasks make sense to run asynchronously even when
3074 outside of any parallel. */
3075 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3077 struct gomp_team
*team
= gomp_new_team (1);
3078 struct gomp_task
*task
= thr
->task
;
3079 struct gomp_task
**implicit_task
= &task
;
3080 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3081 team
->prev_ts
= thr
->ts
;
3082 thr
->ts
.team
= team
;
3083 thr
->ts
.team_id
= 0;
3084 thr
->ts
.work_share
= &team
->work_shares
[0];
3085 thr
->ts
.last_work_share
= NULL
;
3086 #ifdef HAVE_SYNC_BUILTINS
3087 thr
->ts
.single_count
= 0;
3089 thr
->ts
.static_trip
= 0;
3090 thr
->task
= &team
->implicit_task
[0];
3091 gomp_init_task (thr
->task
, NULL
, icv
);
3092 while (*implicit_task
3093 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3094 implicit_task
= &(*implicit_task
)->parent
;
3097 thr
->task
= *implicit_task
;
3099 free (*implicit_task
);
3100 thr
->task
= &team
->implicit_task
[0];
3103 pthread_setspecific (gomp_thread_destructor
, thr
);
3104 if (implicit_task
!= &task
)
3106 *implicit_task
= thr
->task
;
3111 && !thr
->task
->final_task
)
3113 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3114 sizes
, kinds
, flags
, depend
, new_args
,
3115 GOMP_TARGET_TASK_BEFORE_MAP
);
3120 /* If there are depend clauses, but nowait is not present
3121 (or we are in a final task), block the parent task until the
3122 dependencies are resolved and then just continue with the rest
3123 of the function as if it is a merged task. */
3126 struct gomp_thread
*thr
= gomp_thread ();
3127 if (thr
->task
&& thr
->task
->depend_hash
)
3129 /* If we might need to wait, copy firstprivate now. */
3130 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3131 &tgt_align
, &tgt_size
);
3134 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3135 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3136 tgt_align
, tgt_size
);
3139 gomp_task_maybe_wait_for_dependencies (depend
);
3145 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3146 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3147 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3151 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3152 &tgt_align
, &tgt_size
);
3155 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3156 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3157 tgt_align
, tgt_size
);
3160 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3164 struct target_mem_desc
*tgt_vars
;
3165 htab_t refcount_set
= NULL
;
3167 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3171 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3172 &tgt_align
, &tgt_size
);
3175 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3176 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3177 tgt_align
, tgt_size
);
3184 refcount_set
= htab_create (mapnum
);
3185 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3186 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3188 devicep
->run_func (devicep
->target_id
, fn_addr
,
3189 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3193 htab_clear (refcount_set
);
3194 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3197 htab_free (refcount_set
);
3199 /* Copy back ICVs from device to host.
3200 HOST_PTR is expected to exist since it was added in
3201 gomp_load_image_to_device if not already available. */
3202 gomp_copy_back_icvs (devicep
, device
);
3207 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3208 keeping track of all variable handling - assuming that reverse offload occurs
3209 ony very rarely. Downside is that the reverse search is slow. */
3211 struct gomp_splay_tree_rev_lookup_data
{
3212 uintptr_t tgt_start
;
3218 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3220 struct gomp_splay_tree_rev_lookup_data
*data
;
3221 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3222 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3224 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3228 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3229 if (key
->tgt
->list
[j
].key
== key
)
3231 assert (j
< key
->tgt
->list_count
);
3232 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3234 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3235 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3243 static inline splay_tree_key
3244 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3247 struct gomp_splay_tree_rev_lookup_data data
;
3249 data
.tgt_start
= tgt_start
;
3250 data
.tgt_end
= tgt_end
;
3252 if (tgt_start
!= tgt_end
)
3254 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3259 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3260 if (data
.key
!= NULL
|| zero_len
)
3265 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3272 bool present
, aligned
;
3276 /* Search just mapped reverse-offload data; returns index if found,
3280 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3281 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3282 uint64_t tgt_start
, uint64_t tgt_end
)
3284 const bool short_mapkind
= true;
3285 const int typemask
= short_mapkind
? 0xff : 0x7;
3287 for (i
= 0; i
< n
; i
++)
3289 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3290 == GOMP_MAP_STRUCT
);
3293 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3296 if (i
+ sizes
[i
] < n
)
3297 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3299 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3301 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3302 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3311 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3312 unsigned short *kinds
, uint64_t *sizes
,
3313 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3317 if (tgt_start
!= tgt_end
)
3318 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3319 tgt_start
, tgt_end
);
3321 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3322 tgt_start
, tgt_end
);
3323 if (i
< n
|| zero_len
)
3328 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3329 tgt_start
, tgt_end
);
3332 /* Handle reverse offload. This is called by the device plugins for a
3333 reverse offload; it is not called if the outer target runs on the host.
3334 The mapping is simplified device-affecting constructs (except for target
3335 with device(ancestor:1)) must not be encountered; in particular not
3336 target (enter/exit) data. */
3339 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3340 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3341 struct goacc_asyncqueue
*aq
)
3343 /* Return early if there is no offload code. */
3344 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3346 /* Currently, this fails because of calculate_firstprivate_requirements
3347 below; it could be fixed but additional code needs to be updated to
3348 handle 32bit hosts - thus, it is not worthwhile. */
3349 if (sizeof (void *) != sizeof (uint64_t))
3350 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3352 struct cpy_data
*cdata
= NULL
;
3355 unsigned short *kinds
;
3356 const bool short_mapkind
= true;
3357 const int typemask
= short_mapkind
? 0xff : 0x7;
3358 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3360 reverse_splay_tree_key n
;
3361 struct reverse_splay_tree_key_s k
;
3364 gomp_mutex_lock (&devicep
->lock
);
3365 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3366 gomp_mutex_unlock (&devicep
->lock
);
3369 gomp_fatal ("Cannot find reverse-offload function");
3370 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3372 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3374 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3375 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3376 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3380 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3381 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3382 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3383 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3384 (const void *) (uintptr_t) devaddrs_ptr
,
3385 mapnum
* sizeof (uint64_t));
3386 gomp_copy_dev2host (devicep
, aq
, sizes
,
3387 (const void *) (uintptr_t) sizes_ptr
,
3388 mapnum
* sizeof (uint64_t));
3389 gomp_copy_dev2host (devicep
, aq
, kinds
,
3390 (const void *) (uintptr_t) kinds_ptr
,
3391 mapnum
* sizeof (unsigned short));
3392 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3393 exit (EXIT_FAILURE
);
3396 size_t tgt_align
= 0, tgt_size
= 0;
3398 /* If actually executed on 32bit systems, the casts lead to wrong code;
3399 but 32bit with offloading is not supported; see top of this function. */
3400 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3401 (void *) (uintptr_t) kinds
,
3402 &tgt_align
, &tgt_size
);
3406 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3407 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3409 tgt
+= tgt_align
- al
;
3411 for (uint64_t i
= 0; i
< mapnum
; i
++)
3412 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3413 && devaddrs
[i
] != 0)
3415 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3416 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3417 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3418 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3422 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3423 (void *) (uintptr_t) devaddrs
[i
],
3425 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3426 exit (EXIT_FAILURE
);
3428 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3429 tgt_size
= tgt_size
+ sizes
[i
];
3430 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3432 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3433 == GOMP_MAP_ATTACH
))
3435 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3436 = (uint64_t) devaddrs
[i
];
3442 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3444 size_t j
, struct_cpy
= 0;
3446 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3447 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3448 gomp_mutex_lock (&devicep
->lock
);
3449 for (uint64_t i
= 0; i
< mapnum
; i
++)
3451 if (devaddrs
[i
] == 0)
3454 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3457 case GOMP_MAP_FIRSTPRIVATE
:
3458 case GOMP_MAP_FIRSTPRIVATE_INT
:
3461 case GOMP_MAP_DELETE
:
3462 case GOMP_MAP_RELEASE
:
3463 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3464 /* Assume it is present; look it up - but ignore unless the
3465 present clause is there. */
3466 case GOMP_MAP_ALLOC
:
3468 case GOMP_MAP_FORCE_ALLOC
:
3469 case GOMP_MAP_FORCE_FROM
:
3470 case GOMP_MAP_ALWAYS_FROM
:
3472 case GOMP_MAP_TOFROM
:
3473 case GOMP_MAP_FORCE_TO
:
3474 case GOMP_MAP_FORCE_TOFROM
:
3475 case GOMP_MAP_ALWAYS_TO
:
3476 case GOMP_MAP_ALWAYS_TOFROM
:
3477 case GOMP_MAP_FORCE_PRESENT
:
3478 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3479 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3480 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3481 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3482 cdata
[i
].devaddr
= devaddrs
[i
];
3483 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3484 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3485 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3487 devaddrs
[i
] + sizes
[i
], zero_len
);
3491 cdata
[i
].present
= true;
3492 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3496 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3498 devaddrs
[i
] + sizes
[i
], zero_len
);
3499 cdata
[i
].present
= n2
!= NULL
;
3501 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3503 gomp_mutex_unlock (&devicep
->lock
);
3504 #ifdef HAVE_INTTYPES_H
3505 gomp_fatal ("present clause: no corresponding data on "
3506 "parent device at %p with size %"PRIu64
,
3507 (void *) (uintptr_t) devaddrs
[i
],
3508 (uint64_t) sizes
[i
]);
3510 gomp_fatal ("present clause: no corresponding data on "
3511 "parent device at %p with size %lu",
3512 (void *) (uintptr_t) devaddrs
[i
],
3513 (unsigned long) sizes
[i
]);
3517 else if (!cdata
[i
].present
3518 && kind
!= GOMP_MAP_DELETE
3519 && kind
!= GOMP_MAP_RELEASE
3520 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3522 cdata
[i
].aligned
= true;
3523 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3525 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3528 else if (n2
!= NULL
)
3529 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3530 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3531 if (((!cdata
[i
].present
|| struct_cpy
)
3532 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3533 || kind
== GOMP_MAP_FORCE_TO
3534 || kind
== GOMP_MAP_FORCE_TOFROM
3535 || GOMP_MAP_ALWAYS_TO_P (kind
))
3537 gomp_copy_dev2host (devicep
, aq
,
3538 (void *) (uintptr_t) devaddrs
[i
],
3539 (void *) (uintptr_t) cdata
[i
].devaddr
,
3541 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3543 gomp_mutex_unlock (&devicep
->lock
);
3544 exit (EXIT_FAILURE
);
3550 case GOMP_MAP_ATTACH
:
3551 case GOMP_MAP_POINTER
:
3552 case GOMP_MAP_ALWAYS_POINTER
:
3553 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3554 devaddrs
[i
] + sizes
[i
],
3555 devaddrs
[i
] + sizes
[i
]
3556 + sizeof (void*), false);
3557 cdata
[i
].present
= n2
!= NULL
;
3558 cdata
[i
].devaddr
= devaddrs
[i
];
3560 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3561 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3564 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3565 devaddrs
[i
] + sizes
[i
],
3566 devaddrs
[i
] + sizes
[i
]
3567 + sizeof (void*), false);
3570 cdata
[i
].present
= true;
3571 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3572 - cdata
[j
].devaddr
);
3575 if (!cdata
[i
].present
)
3576 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3577 /* Assume that when present, the pointer is already correct. */
3579 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3582 case GOMP_MAP_TO_PSET
:
3583 /* Assume that when present, the pointers are fine and no 'to:'
3585 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3586 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3588 cdata
[i
].present
= n2
!= NULL
;
3589 cdata
[i
].devaddr
= devaddrs
[i
];
3591 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3592 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3595 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3597 devaddrs
[i
] + sizes
[i
], false);
3600 cdata
[i
].present
= true;
3601 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3602 - cdata
[j
].devaddr
);
3605 if (!cdata
[i
].present
)
3607 cdata
[i
].aligned
= true;
3608 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3610 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3612 gomp_copy_dev2host (devicep
, aq
,
3613 (void *) (uintptr_t) devaddrs
[i
],
3614 (void *) (uintptr_t) cdata
[i
].devaddr
,
3616 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3618 gomp_mutex_unlock (&devicep
->lock
);
3619 exit (EXIT_FAILURE
);
3622 for (j
= i
+ 1; j
< mapnum
; j
++)
3624 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3625 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3626 && !GOMP_MAP_POINTER_P (kind
))
3628 if (devaddrs
[j
] < devaddrs
[i
])
3630 if (cdata
[i
].present
)
3632 if (devaddrs
[j
] == 0)
3634 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3639 /* Dereference devaddrs[j] to get the device addr. */
3640 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3641 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3643 cdata
[j
].present
= true;
3644 cdata
[j
].devaddr
= devaddrs
[j
];
3645 if (devaddrs
[j
] == 0)
3647 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3649 devaddrs
[j
] + sizeof (void*),
3652 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3653 - cdata
[k
].devaddr
);
3656 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3658 devaddrs
[j
] + sizeof (void*),
3662 gomp_mutex_unlock (&devicep
->lock
);
3663 gomp_fatal ("Pointer target wasn't mapped");
3665 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3666 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3668 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3669 = (void *) (uintptr_t) devaddrs
[j
];
3673 case GOMP_MAP_STRUCT
:
3674 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3675 devaddrs
[i
+ sizes
[i
]]
3676 + sizes
[i
+ sizes
[i
]], false);
3677 cdata
[i
].present
= n2
!= NULL
;
3678 cdata
[i
].devaddr
= devaddrs
[i
];
3679 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3682 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3684 + sizes
[i
+ sizes
[i
]]);
3685 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3686 cdata
[i
].aligned
= true;
3687 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3688 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3691 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3692 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3695 gomp_mutex_unlock (&devicep
->lock
);
3696 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3699 gomp_mutex_unlock (&devicep
->lock
);
3704 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3706 uint64_t struct_cpy
= 0;
3707 bool clean_struct
= false;
3708 for (uint64_t i
= 0; i
< mapnum
; i
++)
3710 if (cdata
[i
].devaddr
== 0)
3712 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3713 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3716 case GOMP_MAP_FORCE_FROM
:
3717 case GOMP_MAP_FORCE_TOFROM
:
3718 case GOMP_MAP_ALWAYS_FROM
:
3719 case GOMP_MAP_ALWAYS_TOFROM
:
3720 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3721 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3725 case GOMP_MAP_TOFROM
:
3728 gomp_copy_host2dev (devicep
, aq
,
3729 (void *) (uintptr_t) cdata
[i
].devaddr
,
3730 (void *) (uintptr_t) devaddrs
[i
],
3731 sizes
[i
], false, NULL
);
3732 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3733 exit (EXIT_FAILURE
);
3743 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3745 clean_struct
= true;
3746 struct_cpy
= sizes
[i
];
3748 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3749 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3750 else if (!cdata
[i
].present
)
3751 free ((void *) (uintptr_t) devaddrs
[i
]);
3754 for (uint64_t i
= 0; i
< mapnum
; i
++)
3755 if (!cdata
[i
].present
3756 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3757 == GOMP_MAP_STRUCT
))
3759 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3760 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3769 /* Host fallback for GOMP_target_data{,_ext} routines. */
3772 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3774 struct gomp_task_icv
*icv
= gomp_icv (false);
3776 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3778 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3779 "be used for offloading");
3781 if (icv
->target_data
)
3783 /* Even when doing a host fallback, if there are any active
3784 #pragma omp target data constructs, need to remember the
3785 new #pragma omp target data, otherwise GOMP_target_end_data
3786 would get out of sync. */
3787 struct target_mem_desc
*tgt
3788 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3789 NULL
, GOMP_MAP_VARS_DATA
);
3790 tgt
->prev
= icv
->target_data
;
3791 icv
->target_data
= tgt
;
3796 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3797 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3799 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3802 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3803 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3804 return gomp_target_data_fallback (devicep
);
3806 struct target_mem_desc
*tgt
3807 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3808 NULL
, GOMP_MAP_VARS_DATA
);
3809 struct gomp_task_icv
*icv
= gomp_icv (true);
3810 tgt
->prev
= icv
->target_data
;
3811 icv
->target_data
= tgt
;
3815 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3816 size_t *sizes
, unsigned short *kinds
)
3818 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3821 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3822 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3823 return gomp_target_data_fallback (devicep
);
3825 struct target_mem_desc
*tgt
3826 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3827 NULL
, GOMP_MAP_VARS_DATA
);
3828 struct gomp_task_icv
*icv
= gomp_icv (true);
3829 tgt
->prev
= icv
->target_data
;
3830 icv
->target_data
= tgt
;
3834 GOMP_target_end_data (void)
3836 struct gomp_task_icv
*icv
= gomp_icv (false);
3837 if (icv
->target_data
)
3839 struct target_mem_desc
*tgt
= icv
->target_data
;
3840 icv
->target_data
= tgt
->prev
;
3841 gomp_unmap_vars (tgt
, true, NULL
);
3846 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3847 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3849 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3852 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3853 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3856 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3860 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3861 size_t *sizes
, unsigned short *kinds
,
3862 unsigned int flags
, void **depend
)
3864 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3866 /* If there are depend clauses, but nowait is not present,
3867 block the parent task until the dependencies are resolved
3868 and then just continue with the rest of the function as if it
3869 is a merged task. Until we are able to schedule task during
3870 variable mapping or unmapping, ignore nowait if depend clauses
3874 struct gomp_thread
*thr
= gomp_thread ();
3875 if (thr
->task
&& thr
->task
->depend_hash
)
3877 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3879 && !thr
->task
->final_task
)
3881 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3882 mapnum
, hostaddrs
, sizes
, kinds
,
3883 flags
| GOMP_TARGET_FLAG_UPDATE
,
3884 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3889 struct gomp_team
*team
= thr
->ts
.team
;
3890 /* If parallel or taskgroup has been cancelled, don't start new
3892 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3894 if (gomp_team_barrier_cancelled (&team
->barrier
))
3896 if (thr
->task
->taskgroup
)
3898 if (thr
->task
->taskgroup
->cancelled
)
3900 if (thr
->task
->taskgroup
->workshare
3901 && thr
->task
->taskgroup
->prev
3902 && thr
->task
->taskgroup
->prev
->cancelled
)
3907 gomp_task_maybe_wait_for_dependencies (depend
);
3913 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3914 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3917 struct gomp_thread
*thr
= gomp_thread ();
3918 struct gomp_team
*team
= thr
->ts
.team
;
3919 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3920 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3922 if (gomp_team_barrier_cancelled (&team
->barrier
))
3924 if (thr
->task
->taskgroup
)
3926 if (thr
->task
->taskgroup
->cancelled
)
3928 if (thr
->task
->taskgroup
->workshare
3929 && thr
->task
->taskgroup
->prev
3930 && thr
->task
->taskgroup
->prev
->cancelled
)
3935 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3939 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3940 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3941 htab_t
*refcount_set
)
3943 const int typemask
= 0xff;
3945 gomp_mutex_lock (&devicep
->lock
);
3946 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3948 gomp_mutex_unlock (&devicep
->lock
);
3952 for (i
= 0; i
< mapnum
; i
++)
3953 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3955 struct splay_tree_key_s cur_node
;
3956 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3957 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3958 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3961 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3966 splay_tree_key remove_vars
[mapnum
];
3968 for (i
= 0; i
< mapnum
; i
++)
3970 struct splay_tree_key_s cur_node
;
3971 unsigned char kind
= kinds
[i
] & typemask
;
3975 case GOMP_MAP_ALWAYS_FROM
:
3976 case GOMP_MAP_DELETE
:
3977 case GOMP_MAP_RELEASE
:
3978 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3979 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3980 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3981 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3982 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3983 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3984 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3985 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3989 bool delete_p
= (kind
== GOMP_MAP_DELETE
3990 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3991 bool do_copy
, do_remove
;
3992 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3995 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3996 || kind
== GOMP_MAP_ALWAYS_FROM
)
3998 if (k
->aux
&& k
->aux
->attach_count
)
4000 /* We have to be careful not to overwrite still attached
4001 pointers during the copyback to host. */
4002 uintptr_t addr
= k
->host_start
;
4003 while (addr
< k
->host_end
)
4005 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4006 if (k
->aux
->attach_count
[i
] == 0)
4007 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4008 (void *) (k
->tgt
->tgt_start
4010 + addr
- k
->host_start
),
4012 addr
+= sizeof (void *);
4016 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4017 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4018 + cur_node
.host_start
4020 cur_node
.host_end
- cur_node
.host_start
);
4023 /* Structure elements lists are removed altogether at once, which
4024 may cause immediate deallocation of the target_mem_desc, causing
4025 errors if we still have following element siblings to copy back.
4026 While we're at it, it also seems more disciplined to simply
4027 queue all removals together for processing below.
4029 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4030 not have this problem, since they maintain an additional
4031 tgt->refcount = 1 reference to the target_mem_desc to start with.
4034 remove_vars
[nrmvars
++] = k
;
4037 case GOMP_MAP_DETACH
:
4040 gomp_mutex_unlock (&devicep
->lock
);
4041 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4046 for (int i
= 0; i
< nrmvars
; i
++)
4047 gomp_remove_var (devicep
, remove_vars
[i
]);
4049 gomp_mutex_unlock (&devicep
->lock
);
4053 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4054 size_t *sizes
, unsigned short *kinds
,
4055 unsigned int flags
, void **depend
)
4057 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4059 /* If there are depend clauses, but nowait is not present,
4060 block the parent task until the dependencies are resolved
4061 and then just continue with the rest of the function as if it
4062 is a merged task. Until we are able to schedule task during
4063 variable mapping or unmapping, ignore nowait if depend clauses
4067 struct gomp_thread
*thr
= gomp_thread ();
4068 if (thr
->task
&& thr
->task
->depend_hash
)
4070 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4072 && !thr
->task
->final_task
)
4074 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4075 mapnum
, hostaddrs
, sizes
, kinds
,
4076 flags
, depend
, NULL
,
4077 GOMP_TARGET_TASK_DATA
))
4082 struct gomp_team
*team
= thr
->ts
.team
;
4083 /* If parallel or taskgroup has been cancelled, don't start new
4085 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4087 if (gomp_team_barrier_cancelled (&team
->barrier
))
4089 if (thr
->task
->taskgroup
)
4091 if (thr
->task
->taskgroup
->cancelled
)
4093 if (thr
->task
->taskgroup
->workshare
4094 && thr
->task
->taskgroup
->prev
4095 && thr
->task
->taskgroup
->prev
->cancelled
)
4100 gomp_task_maybe_wait_for_dependencies (depend
);
4106 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4107 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4110 struct gomp_thread
*thr
= gomp_thread ();
4111 struct gomp_team
*team
= thr
->ts
.team
;
4112 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4113 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4115 if (gomp_team_barrier_cancelled (&team
->barrier
))
4117 if (thr
->task
->taskgroup
)
4119 if (thr
->task
->taskgroup
->cancelled
)
4121 if (thr
->task
->taskgroup
->workshare
4122 && thr
->task
->taskgroup
->prev
4123 && thr
->task
->taskgroup
->prev
->cancelled
)
4128 htab_t refcount_set
= htab_create (mapnum
);
4130 /* The variables are mapped separately such that they can be released
4133 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4134 for (i
= 0; i
< mapnum
; i
++)
4135 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4137 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4138 &kinds
[i
], true, &refcount_set
,
4139 GOMP_MAP_VARS_ENTER_DATA
);
4142 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4144 for (j
= i
+ 1; j
< mapnum
; j
++)
4145 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4146 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4148 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4149 &kinds
[i
], true, &refcount_set
,
4150 GOMP_MAP_VARS_ENTER_DATA
);
4153 else if (i
+ 1 < mapnum
4154 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4155 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4156 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4158 /* An attach operation must be processed together with the mapped
4159 base-pointer list item. */
4160 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4161 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4165 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4166 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4168 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4169 htab_free (refcount_set
);
4173 gomp_target_task_fn (void *data
)
4175 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4176 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4178 if (ttask
->fn
!= NULL
)
4182 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4183 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4184 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4186 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4187 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4192 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4195 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4199 void *actual_arguments
;
4200 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4203 actual_arguments
= ttask
->hostaddrs
;
4207 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4208 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4209 NULL
, GOMP_MAP_VARS_TARGET
);
4210 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4212 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4214 assert (devicep
->async_run_func
);
4215 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4216 ttask
->args
, (void *) ttask
);
4219 else if (devicep
== NULL
4220 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4221 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4225 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4226 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4227 ttask
->kinds
, true);
4230 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4231 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4232 for (i
= 0; i
< ttask
->mapnum
; i
++)
4233 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4235 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4236 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4237 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4238 i
+= ttask
->sizes
[i
];
4241 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4242 &ttask
->kinds
[i
], true, &refcount_set
,
4243 GOMP_MAP_VARS_ENTER_DATA
);
4245 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4246 ttask
->kinds
, &refcount_set
);
4247 htab_free (refcount_set
);
4253 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4257 struct gomp_task_icv
*icv
= gomp_icv (true);
4258 icv
->thread_limit_var
4259 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4265 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4266 unsigned int thread_limit
, bool first
)
4268 struct gomp_thread
*thr
= gomp_thread ();
4273 struct gomp_task_icv
*icv
= gomp_icv (true);
4274 icv
->thread_limit_var
4275 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4277 (void) num_teams_high
;
4278 if (num_teams_low
== 0)
4280 thr
->num_teams
= num_teams_low
- 1;
4283 else if (thr
->team_num
== thr
->num_teams
)
4291 omp_target_alloc (size_t size
, int device_num
)
4293 if (device_num
== omp_initial_device
4294 || device_num
== gomp_get_num_devices ())
4295 return malloc (size
);
4297 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4298 if (devicep
== NULL
)
4301 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4302 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4303 return malloc (size
);
4305 gomp_mutex_lock (&devicep
->lock
);
4306 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4307 gomp_mutex_unlock (&devicep
->lock
);
4312 omp_target_free (void *device_ptr
, int device_num
)
4314 if (device_num
== omp_initial_device
4315 || device_num
== gomp_get_num_devices ())
4321 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4322 if (devicep
== NULL
|| device_ptr
== NULL
)
4325 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4326 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4332 gomp_mutex_lock (&devicep
->lock
);
4333 gomp_free_device_memory (devicep
, device_ptr
);
4334 gomp_mutex_unlock (&devicep
->lock
);
4338 omp_target_is_present (const void *ptr
, int device_num
)
4340 if (device_num
== omp_initial_device
4341 || device_num
== gomp_get_num_devices ())
4344 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4345 if (devicep
== NULL
)
4351 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4352 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4355 gomp_mutex_lock (&devicep
->lock
);
4356 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4357 struct splay_tree_key_s cur_node
;
4359 cur_node
.host_start
= (uintptr_t) ptr
;
4360 cur_node
.host_end
= cur_node
.host_start
;
4361 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4362 int ret
= n
!= NULL
;
4363 gomp_mutex_unlock (&devicep
->lock
);
4368 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4369 struct gomp_device_descr
**dst_devicep
,
4370 struct gomp_device_descr
**src_devicep
)
4372 if (dst_device_num
!= gomp_get_num_devices ()
4373 /* Above gomp_get_num_devices has to be called unconditionally. */
4374 && dst_device_num
!= omp_initial_device
)
4376 *dst_devicep
= resolve_device (dst_device_num
, false);
4377 if (*dst_devicep
== NULL
)
4380 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4381 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4382 *dst_devicep
= NULL
;
4385 if (src_device_num
!= num_devices_openmp
4386 && src_device_num
!= omp_initial_device
)
4388 *src_devicep
= resolve_device (src_device_num
, false);
4389 if (*src_devicep
== NULL
)
4392 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4393 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4394 *src_devicep
= NULL
;
4401 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4402 size_t dst_offset
, size_t src_offset
,
4403 struct gomp_device_descr
*dst_devicep
,
4404 struct gomp_device_descr
*src_devicep
)
4407 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4409 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4412 if (src_devicep
== NULL
)
4414 gomp_mutex_lock (&dst_devicep
->lock
);
4415 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4416 (char *) dst
+ dst_offset
,
4417 (char *) src
+ src_offset
, length
);
4418 gomp_mutex_unlock (&dst_devicep
->lock
);
4419 return (ret
? 0 : EINVAL
);
4421 if (dst_devicep
== NULL
)
4423 gomp_mutex_lock (&src_devicep
->lock
);
4424 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4425 (char *) dst
+ dst_offset
,
4426 (char *) src
+ src_offset
, length
);
4427 gomp_mutex_unlock (&src_devicep
->lock
);
4428 return (ret
? 0 : EINVAL
);
4430 if (src_devicep
== dst_devicep
)
4432 gomp_mutex_lock (&src_devicep
->lock
);
4433 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4434 (char *) dst
+ dst_offset
,
4435 (char *) src
+ src_offset
, length
);
4436 gomp_mutex_unlock (&src_devicep
->lock
);
4437 return (ret
? 0 : EINVAL
);
4443 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4444 size_t src_offset
, int dst_device_num
, int src_device_num
)
4446 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4447 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4448 &dst_devicep
, &src_devicep
);
4453 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4454 dst_devicep
, src_devicep
);
4466 struct gomp_device_descr
*dst_devicep
;
4467 struct gomp_device_descr
*src_devicep
;
4468 } omp_target_memcpy_data
;
4471 omp_target_memcpy_async_helper (void *args
)
4473 omp_target_memcpy_data
*a
= args
;
4474 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4475 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4476 gomp_fatal ("omp_target_memcpy failed");
4480 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4481 size_t dst_offset
, size_t src_offset
,
4482 int dst_device_num
, int src_device_num
,
4483 int depobj_count
, omp_depend_t
*depobj_list
)
4485 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4486 unsigned int flags
= 0;
4487 void *depend
[depobj_count
+ 5];
4489 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4490 &dst_devicep
, &src_devicep
);
4492 omp_target_memcpy_data s
= {
4496 .dst_offset
= dst_offset
,
4497 .src_offset
= src_offset
,
4498 .dst_devicep
= dst_devicep
,
4499 .src_devicep
= src_devicep
4505 if (depobj_count
> 0 && depobj_list
!= NULL
)
4507 flags
|= GOMP_TASK_FLAG_DEPEND
;
4509 depend
[1] = (void *) (uintptr_t) depobj_count
;
4510 depend
[2] = depend
[3] = depend
[4] = 0;
4511 for (i
= 0; i
< depobj_count
; ++i
)
4512 depend
[i
+ 5] = &depobj_list
[i
];
4515 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4516 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4522 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4523 int num_dims
, const size_t *volume
,
4524 const size_t *dst_offsets
,
4525 const size_t *src_offsets
,
4526 const size_t *dst_dimensions
,
4527 const size_t *src_dimensions
,
4528 struct gomp_device_descr
*dst_devicep
,
4529 struct gomp_device_descr
*src_devicep
)
4531 size_t dst_slice
= element_size
;
4532 size_t src_slice
= element_size
;
4533 size_t j
, dst_off
, src_off
, length
;
4538 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4539 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4540 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4542 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4544 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4548 else if (src_devicep
== NULL
)
4549 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4550 (char *) dst
+ dst_off
,
4551 (const char *) src
+ src_off
,
4553 else if (dst_devicep
== NULL
)
4554 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4555 (char *) dst
+ dst_off
,
4556 (const char *) src
+ src_off
,
4558 else if (src_devicep
== dst_devicep
)
4559 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4560 (char *) dst
+ dst_off
,
4561 (const char *) src
+ src_off
,
4565 return ret
? 0 : EINVAL
;
4568 /* FIXME: it would be nice to have some plugin function to handle
4569 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4570 be handled in the generic recursion below, and for host-host it
4571 should be used even for any num_dims >= 2. */
4573 for (i
= 1; i
< num_dims
; i
++)
4574 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4575 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4577 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4578 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4580 for (j
= 0; j
< volume
[0]; j
++)
4582 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4583 (const char *) src
+ src_off
,
4584 element_size
, num_dims
- 1,
4585 volume
+ 1, dst_offsets
+ 1,
4586 src_offsets
+ 1, dst_dimensions
+ 1,
4587 src_dimensions
+ 1, dst_devicep
,
4591 dst_off
+= dst_slice
;
4592 src_off
+= src_slice
;
4598 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4600 struct gomp_device_descr
**dst_devicep
,
4601 struct gomp_device_descr
**src_devicep
)
4606 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4607 dst_devicep
, src_devicep
);
4611 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
4618 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4619 size_t element_size
, int num_dims
,
4620 const size_t *volume
, const size_t *dst_offsets
,
4621 const size_t *src_offsets
,
4622 const size_t *dst_dimensions
,
4623 const size_t *src_dimensions
,
4624 struct gomp_device_descr
*dst_devicep
,
4625 struct gomp_device_descr
*src_devicep
)
4628 gomp_mutex_lock (&src_devicep
->lock
);
4629 else if (dst_devicep
)
4630 gomp_mutex_lock (&dst_devicep
->lock
);
4631 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4632 volume
, dst_offsets
, src_offsets
,
4633 dst_dimensions
, src_dimensions
,
4634 dst_devicep
, src_devicep
);
4636 gomp_mutex_unlock (&src_devicep
->lock
);
4637 else if (dst_devicep
)
4638 gomp_mutex_unlock (&dst_devicep
->lock
);
4644 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4645 int num_dims
, const size_t *volume
,
4646 const size_t *dst_offsets
,
4647 const size_t *src_offsets
,
4648 const size_t *dst_dimensions
,
4649 const size_t *src_dimensions
,
4650 int dst_device_num
, int src_device_num
)
4652 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4654 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4655 src_device_num
, &dst_devicep
,
4661 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4662 volume
, dst_offsets
, src_offsets
,
4663 dst_dimensions
, src_dimensions
,
4664 dst_devicep
, src_devicep
);
4673 size_t element_size
;
4674 const size_t *volume
;
4675 const size_t *dst_offsets
;
4676 const size_t *src_offsets
;
4677 const size_t *dst_dimensions
;
4678 const size_t *src_dimensions
;
4679 struct gomp_device_descr
*dst_devicep
;
4680 struct gomp_device_descr
*src_devicep
;
4682 } omp_target_memcpy_rect_data
;
4685 omp_target_memcpy_rect_async_helper (void *args
)
4687 omp_target_memcpy_rect_data
*a
= args
;
4688 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4689 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4690 a
->src_offsets
, a
->dst_dimensions
,
4691 a
->src_dimensions
, a
->dst_devicep
,
4694 gomp_fatal ("omp_target_memcpy_rect failed");
4698 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4699 int num_dims
, const size_t *volume
,
4700 const size_t *dst_offsets
,
4701 const size_t *src_offsets
,
4702 const size_t *dst_dimensions
,
4703 const size_t *src_dimensions
,
4704 int dst_device_num
, int src_device_num
,
4705 int depobj_count
, omp_depend_t
*depobj_list
)
4707 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4709 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4710 src_device_num
, &dst_devicep
,
4712 void *depend
[depobj_count
+ 5];
4715 omp_target_memcpy_rect_data s
= {
4718 .element_size
= element_size
,
4719 .num_dims
= num_dims
,
4721 .dst_offsets
= dst_offsets
,
4722 .src_offsets
= src_offsets
,
4723 .dst_dimensions
= dst_dimensions
,
4724 .src_dimensions
= src_dimensions
,
4725 .dst_devicep
= dst_devicep
,
4726 .src_devicep
= src_devicep
4732 if (depobj_count
> 0 && depobj_list
!= NULL
)
4734 flags
|= GOMP_TASK_FLAG_DEPEND
;
4736 depend
[1] = (void *) (uintptr_t) depobj_count
;
4737 depend
[2] = depend
[3] = depend
[4] = 0;
4738 for (i
= 0; i
< depobj_count
; ++i
)
4739 depend
[i
+ 5] = &depobj_list
[i
];
4742 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4743 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4749 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4750 size_t size
, size_t device_offset
, int device_num
)
4752 if (device_num
== omp_initial_device
4753 || device_num
== gomp_get_num_devices ())
4756 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4757 if (devicep
== NULL
)
4760 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4761 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4764 gomp_mutex_lock (&devicep
->lock
);
4766 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4767 struct splay_tree_key_s cur_node
;
4770 cur_node
.host_start
= (uintptr_t) host_ptr
;
4771 cur_node
.host_end
= cur_node
.host_start
+ size
;
4772 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4775 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4776 == (uintptr_t) device_ptr
+ device_offset
4777 && n
->host_start
<= cur_node
.host_start
4778 && n
->host_end
>= cur_node
.host_end
)
4783 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4784 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4788 tgt
->to_free
= NULL
;
4790 tgt
->list_count
= 0;
4791 tgt
->device_descr
= devicep
;
4792 splay_tree_node array
= tgt
->array
;
4793 splay_tree_key k
= &array
->key
;
4794 k
->host_start
= cur_node
.host_start
;
4795 k
->host_end
= cur_node
.host_end
;
4797 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4798 k
->refcount
= REFCOUNT_INFINITY
;
4799 k
->dynamic_refcount
= 0;
4802 array
->right
= NULL
;
4803 splay_tree_insert (&devicep
->mem_map
, array
);
4806 gomp_mutex_unlock (&devicep
->lock
);
4811 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4813 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4814 if (devicep
== NULL
)
4817 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4820 gomp_mutex_lock (&devicep
->lock
);
4822 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4823 struct splay_tree_key_s cur_node
;
4826 cur_node
.host_start
= (uintptr_t) ptr
;
4827 cur_node
.host_end
= cur_node
.host_start
;
4828 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4830 && n
->host_start
== cur_node
.host_start
4831 && n
->refcount
== REFCOUNT_INFINITY
4832 && n
->tgt
->tgt_start
== 0
4833 && n
->tgt
->to_free
== NULL
4834 && n
->tgt
->refcount
== 1
4835 && n
->tgt
->list_count
== 0)
4837 splay_tree_remove (&devicep
->mem_map
, n
);
4838 gomp_unmap_tgt (n
->tgt
);
4842 gomp_mutex_unlock (&devicep
->lock
);
4847 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4849 if (device_num
== omp_initial_device
4850 || device_num
== omp_get_initial_device ())
4851 return (void *) ptr
;
4853 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4854 if (devicep
== NULL
)
4857 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4858 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4859 return (void *) ptr
;
4861 gomp_mutex_lock (&devicep
->lock
);
4863 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4864 struct splay_tree_key_s cur_node
;
4867 cur_node
.host_start
= (uintptr_t) ptr
;
4868 cur_node
.host_end
= cur_node
.host_start
;
4869 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4873 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4874 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4877 gomp_mutex_unlock (&devicep
->lock
);
4883 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4885 if (device_num
== omp_initial_device
4886 || device_num
== gomp_get_num_devices ())
4889 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4890 if (devicep
== NULL
)
4893 /* TODO: Unified shared memory must be handled when available. */
4895 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4899 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4902 if (device_num
== omp_initial_device
4903 || device_num
== gomp_get_num_devices ())
4904 return gomp_pause_host ();
4906 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4907 if (devicep
== NULL
)
4910 /* Do nothing for target devices for now. */
4915 omp_pause_resource_all (omp_pause_resource_t kind
)
4918 if (gomp_pause_host ())
4920 /* Do nothing for target devices for now. */
4924 ialias (omp_pause_resource
)
4925 ialias (omp_pause_resource_all
)
4927 #ifdef PLUGIN_SUPPORT
4929 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4931 The handles of the found functions are stored in the corresponding fields
4932 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4935 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4936 const char *plugin_name
)
4938 const char *err
= NULL
, *last_missing
= NULL
;
4940 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4942 #if OFFLOAD_DEFAULTED
4948 /* Check if all required functions are available in the plugin and store
4949 their handlers. None of the symbols can legitimately be NULL,
4950 so we don't need to check dlerror all the time. */
4952 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4954 /* Similar, but missing functions are not an error. Return false if
4955 failed, true otherwise. */
4956 #define DLSYM_OPT(f, n) \
4957 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4958 || (last_missing = #n, 0))
4961 if (device
->version_func () != GOMP_VERSION
)
4963 err
= "plugin version mismatch";
4970 DLSYM (get_num_devices
);
4971 DLSYM (init_device
);
4972 DLSYM (fini_device
);
4974 DLSYM (unload_image
);
4979 device
->capabilities
= device
->get_caps_func ();
4980 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4983 DLSYM_OPT (async_run
, async_run
);
4984 DLSYM_OPT (can_run
, can_run
);
4987 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4989 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4990 || !DLSYM_OPT (openacc
.create_thread_data
,
4991 openacc_create_thread_data
)
4992 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4993 openacc_destroy_thread_data
)
4994 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4995 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4996 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4997 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4998 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4999 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5000 openacc_async_queue_callback
)
5001 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5002 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5003 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5004 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5006 /* Require all the OpenACC handlers if we have
5007 GOMP_OFFLOAD_CAP_OPENACC_200. */
5008 err
= "plugin missing OpenACC handler function";
5013 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5014 openacc_cuda_get_current_device
);
5015 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5016 openacc_cuda_get_current_context
);
5017 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5018 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5019 if (cuda
&& cuda
!= 4)
5021 /* Make sure all the CUDA functions are there if any of them are. */
5022 err
= "plugin missing OpenACC CUDA handler function";
5034 gomp_error ("while loading %s: %s", plugin_name
, err
);
5036 gomp_error ("missing function was %s", last_missing
);
5038 dlclose (plugin_handle
);
5043 /* This function finalizes all initialized devices. */
5046 gomp_target_fini (void)
5049 for (i
= 0; i
< num_devices
; i
++)
5052 struct gomp_device_descr
*devicep
= &devices
[i
];
5053 gomp_mutex_lock (&devicep
->lock
);
5054 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5055 ret
= gomp_fini_device (devicep
);
5056 gomp_mutex_unlock (&devicep
->lock
);
5058 gomp_fatal ("device finalization failed");
5062 /* This function initializes the runtime for offloading.
5063 It parses the list of offload plugins, and tries to load these.
5064 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5065 will be set, and the array DEVICES initialized, containing descriptors for
5066 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5070 gomp_target_init (void)
5072 const char *prefix
="libgomp-plugin-";
5073 const char *suffix
= SONAME_SUFFIX (1);
5074 const char *cur
, *next
;
5076 int i
, new_num_devs
;
5077 int num_devs
= 0, num_devs_openmp
;
5078 struct gomp_device_descr
*devs
= NULL
;
5080 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5083 cur
= OFFLOAD_PLUGINS
;
5087 struct gomp_device_descr current_device
;
5088 size_t prefix_len
, suffix_len
, cur_len
;
5090 next
= strchr (cur
, ',');
5092 prefix_len
= strlen (prefix
);
5093 cur_len
= next
? next
- cur
: strlen (cur
);
5094 suffix_len
= strlen (suffix
);
5096 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5103 memcpy (plugin_name
, prefix
, prefix_len
);
5104 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5105 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5107 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5109 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5110 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5111 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5114 int type
= current_device
.get_type_func ();
5115 for (int img
= 0; img
< num_offload_images
; img
++)
5116 if (type
== offload_images
[img
].type
)
5120 char buf
[sizeof ("unified_address, unified_shared_memory, "
5121 "reverse_offload")];
5122 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5123 char *name
= (char *) malloc (cur_len
+ 1);
5124 memcpy (name
, cur
, cur_len
);
5125 name
[cur_len
] = '\0';
5127 "%s devices present but 'omp requires %s' "
5128 "cannot be fulfilled\n", name
, buf
);
5132 else if (new_num_devs
>= 1)
5134 /* Augment DEVICES and NUM_DEVICES. */
5136 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5137 * sizeof (struct gomp_device_descr
));
5145 current_device
.name
= current_device
.get_name_func ();
5146 /* current_device.capabilities has already been set. */
5147 current_device
.type
= current_device
.get_type_func ();
5148 current_device
.mem_map
.root
= NULL
;
5149 current_device
.mem_map_rev
.root
= NULL
;
5150 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5151 for (i
= 0; i
< new_num_devs
; i
++)
5153 current_device
.target_id
= i
;
5154 devs
[num_devs
] = current_device
;
5155 gomp_mutex_init (&devs
[num_devs
].lock
);
5166 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5167 NUM_DEVICES_OPENMP. */
5168 struct gomp_device_descr
*devs_s
5169 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5176 num_devs_openmp
= 0;
5177 for (i
= 0; i
< num_devs
; i
++)
5178 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5179 devs_s
[num_devs_openmp
++] = devs
[i
];
5180 int num_devs_after_openmp
= num_devs_openmp
;
5181 for (i
= 0; i
< num_devs
; i
++)
5182 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5183 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5187 for (i
= 0; i
< num_devs
; i
++)
5189 /* The 'devices' array can be moved (by the realloc call) until we have
5190 found all the plugins, so registering with the OpenACC runtime (which
5191 takes a copy of the pointer argument) must be delayed until now. */
5192 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5193 goacc_register (&devs
[i
]);
5195 if (gomp_global_icv
.default_device_var
== INT_MIN
)
5197 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5198 struct gomp_icv_list
*none
;
5199 none
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX
);
5200 gomp_global_icv
.default_device_var
= (num_devs_openmp
5201 ? 0 : omp_invalid_device
);
5202 none
->icvs
.default_device_var
= gomp_global_icv
.default_device_var
;
5205 num_devices
= num_devs
;
5206 num_devices_openmp
= num_devs_openmp
;
5208 if (atexit (gomp_target_fini
) != 0)
5209 gomp_fatal ("atexit failed");
5212 #else /* PLUGIN_SUPPORT */
5213 /* If dlfcn.h is unavailable we always fallback to host execution.
5214 GOMP_target* routines are just stubs for this case. */
5216 gomp_target_init (void)
5219 #endif /* PLUGIN_SUPPORT */