1 /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type
;
56 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
57 static inline void htab_free (void *ptr
) { free (ptr
); }
60 ialias_redirect (GOMP_task
)
62 static inline hashval_t
63 htab_hash (hash_entry_type element
)
65 return hash_pointer ((void *) element
);
69 htab_eq (hash_entry_type x
, hash_entry_type y
)
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock
;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr
{
89 enum offload_target_type type
;
90 const void *host_table
;
91 const void *target_data
;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr
*offload_images
;
97 /* Total number of offload images. */
98 static int num_offload_images
;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr
*devices
;
103 /* Total number of available devices. */
104 static int num_devices
;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp
;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask
;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
115 gomp_realloc_unlock (void *old
, size_t size
)
117 void *ret
= realloc (old
, size
);
120 gomp_mutex_unlock (®ister_lock
);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
126 attribute_hidden
void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp
;
139 static struct gomp_device_descr
*
140 resolve_device (int device_id
, bool remapped
)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices
= gomp_get_num_devices ();
146 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
148 struct gomp_task_icv
*icv
= gomp_icv (false);
149 device_id
= icv
->default_device_var
;
155 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device
))
158 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id
== omp_invalid_device
)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
170 else if (device_id
>= num_devices
)
172 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id
!= num_devices
)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
180 gomp_mutex_lock (&devices
[device_id
].lock
);
181 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
182 gomp_init_device (&devices
[device_id
]);
183 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
185 gomp_mutex_unlock (&devices
[device_id
].lock
);
187 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
193 gomp_mutex_unlock (&devices
[device_id
].lock
);
195 return &devices
[device_id
];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
202 if (key
->host_start
!= key
->host_end
)
203 return splay_tree_lookup (mem_map
, key
);
206 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
211 n
= splay_tree_lookup (mem_map
, key
);
215 return splay_tree_lookup (mem_map
, key
);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
221 return reverse_splay_tree_lookup (mem_map_rev
, key
);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
227 if (key
->host_start
!= key
->host_end
)
228 return splay_tree_lookup (mem_map
, key
);
231 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
237 gomp_device_copy (struct gomp_device_descr
*devicep
,
238 bool (*copy_func
) (int, void *, const void *, size_t),
239 const char *dst
, void *dstaddr
,
240 const char *src
, const void *srcaddr
,
243 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
245 gomp_mutex_unlock (&devicep
->lock
);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
252 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
253 bool (*copy_func
) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue
*),
255 const char *dst
, void *dstaddr
,
256 const char *src
, const void *srcaddr
,
257 const void *srcaddr_orig
,
258 size_t size
, struct goacc_asyncqueue
*aq
)
260 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
262 gomp_mutex_unlock (&devicep
->lock
);
263 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src
, srcaddr_orig
, srcaddr_orig
+ size
,
268 src
, srcaddr
, srcaddr
+ size
,
269 dst
, dstaddr
, dstaddr
+ size
);
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src
, srcaddr
, srcaddr
+ size
,
274 dst
, dstaddr
, dstaddr
+ size
);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
292 struct target_mem_desc
*tgt
;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk
*chunks
;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
328 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
332 if (cbuf
->chunk_cnt
< 0)
334 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
336 cbuf
->chunk_cnt
= -1;
339 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
341 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf
->use_cnt
== 1)
351 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
352 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
361 gomp_to_device_kind_p (int kind
)
367 case GOMP_MAP_FORCE_ALLOC
:
368 case GOMP_MAP_FORCE_FROM
:
369 case GOMP_MAP_ALWAYS_FROM
:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
371 case GOMP_MAP_FORCE_PRESENT
:
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
383 attribute_hidden
void
384 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
385 struct goacc_asyncqueue
*aq
,
386 void *d
, const void *h
, size_t sz
,
387 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
391 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
392 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
395 long last
= cbuf
->chunk_cnt
- 1;
396 while (first
<= last
)
398 long middle
= (first
+ last
) >> 1;
399 if (cbuf
->chunks
[middle
].end
<= doff
)
401 else if (cbuf
->chunks
[middle
].start
<= doff
)
403 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
405 gomp_mutex_unlock (&devicep
->lock
);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq
!= NULL
, 0))
414 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
424 if (__builtin_expect (aq
!= NULL
, 0))
426 void *h_buf
= (void *) h
;
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
434 h_buf
= gomp_malloc (sz
);
435 memcpy (h_buf
, h
, sz
);
437 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
438 "dev", d
, "host", h_buf
, h
, sz
, aq
);
440 /* Free once the transfer has completed. */
441 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
444 gomp_device_copy (devicep
, devicep
->host2dev_func
,
445 "dev", d
, "host", h
, sz
);
448 attribute_hidden
void
449 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
450 struct goacc_asyncqueue
*aq
,
451 void *h
, const void *d
, size_t sz
)
453 if (__builtin_expect (aq
!= NULL
, 0))
454 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
455 "host", h
, "dev", d
, NULL
, sz
, aq
);
457 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
461 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
463 if (!devicep
->free_func (devicep
->target_id
, devptr
))
465 gomp_mutex_unlock (&devicep
->lock
);
466 gomp_fatal ("error in freeing device memory block at %p", devptr
);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
477 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
480 || k
->refcount
== REFCOUNT_INFINITY
481 || k
->refcount
== REFCOUNT_ACC_MAP_DATA
)
484 uintptr_t *refcount_ptr
= &k
->refcount
;
486 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
487 refcount_ptr
= &k
->structelem_refcount
;
488 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
489 refcount_ptr
= k
->structelem_refcount_ptr
;
493 if (htab_find (*refcount_set
, refcount_ptr
))
495 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
496 *slot
= refcount_ptr
;
503 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
504 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
505 track already seen refcounts, and only adjust the value if refcount is not
506 yet contained in the set (like gomp_increment_refcount).
508 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
509 it is already zero and we know we decremented it earlier. This signals that
510 associated maps should be copied back to host.
512 *DO_REMOVE is set to true when we this is the first handling of this refcount
513 and we are setting it to zero. This signals a removal of this key from the
516 Copy and removal are separated due to cases like handling of structure
517 elements, e.g. each map of a structure element representing a possible copy
518 out of a structure field has to be handled individually, but we only signal
519 removal for one (the first encountered) sibing map. */
522 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
523 bool *do_copy
, bool *do_remove
)
526 || k
->refcount
== REFCOUNT_INFINITY
527 || k
->refcount
== REFCOUNT_ACC_MAP_DATA
)
529 *do_copy
= *do_remove
= false;
533 uintptr_t *refcount_ptr
= &k
->refcount
;
535 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
536 refcount_ptr
= &k
->structelem_refcount
;
537 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
538 refcount_ptr
= k
->structelem_refcount_ptr
;
540 bool new_encountered_refcount
;
541 bool set_to_zero
= false;
542 bool is_zero
= false;
544 uintptr_t orig_refcount
= *refcount_ptr
;
548 if (htab_find (*refcount_set
, refcount_ptr
))
550 new_encountered_refcount
= false;
554 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
555 *slot
= refcount_ptr
;
556 new_encountered_refcount
= true;
559 /* If no refcount_set being used, assume all keys are being decremented
560 for the first time. */
561 new_encountered_refcount
= true;
565 else if (*refcount_ptr
> 0)
569 if (*refcount_ptr
== 0)
571 if (orig_refcount
> 0)
577 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
578 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
581 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
582 gomp_map_0len_lookup found oldn for newn.
583 Helper function of gomp_map_vars. */
586 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
587 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
588 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
589 unsigned char kind
, bool always_to_flag
, bool implicit
,
590 struct gomp_coalesce_buf
*cbuf
,
591 htab_t
*refcount_set
)
593 assert (kind
!= GOMP_MAP_ATTACH
594 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
597 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
598 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
599 tgt_var
->is_attach
= false;
600 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
602 /* For implicit maps, old contained in new is valid. */
603 bool implicit_subset
= (implicit
604 && newn
->host_start
<= oldn
->host_start
605 && oldn
->host_end
<= newn
->host_end
);
607 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
609 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
611 if (GOMP_MAP_FORCE_P (kind
)
612 /* For implicit maps, old contained in new is valid. */
614 /* Otherwise, new contained inside old is considered valid. */
615 || (oldn
->host_start
<= newn
->host_start
616 && newn
->host_end
<= oldn
->host_end
)))
618 gomp_mutex_unlock (&devicep
->lock
);
619 gomp_fatal ("Trying to map into device [%p..%p) object when "
620 "[%p..%p) is already mapped",
621 (void *) newn
->host_start
, (void *) newn
->host_end
,
622 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
625 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
627 /* Implicit + always should not happen. If this does occur, below
628 address/length adjustment is a TODO. */
629 assert (!implicit_subset
);
631 if (oldn
->aux
&& oldn
->aux
->attach_count
)
633 /* We have to be careful not to overwrite still attached pointers
634 during the copyback to host. */
635 uintptr_t addr
= newn
->host_start
;
636 while (addr
< newn
->host_end
)
638 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
639 if (oldn
->aux
->attach_count
[i
] == 0)
640 gomp_copy_host2dev (devicep
, aq
,
641 (void *) (oldn
->tgt
->tgt_start
643 + addr
- oldn
->host_start
),
645 sizeof (void *), false, cbuf
);
646 addr
+= sizeof (void *);
650 gomp_copy_host2dev (devicep
, aq
,
651 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
652 + newn
->host_start
- oldn
->host_start
),
653 (void *) newn
->host_start
,
654 newn
->host_end
- newn
->host_start
, false, cbuf
);
657 gomp_increment_refcount (oldn
, refcount_set
);
661 get_kind (bool short_mapkind
, void *kinds
, int idx
)
664 return ((unsigned char *) kinds
)[idx
];
666 int val
= ((unsigned short *) kinds
)[idx
];
667 if (GOMP_MAP_IMPLICIT_P (val
))
668 val
&= ~GOMP_MAP_IMPLICIT
;
674 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
679 int val
= ((unsigned short *) kinds
)[idx
];
680 return GOMP_MAP_IMPLICIT_P (val
);
684 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
685 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
686 struct gomp_coalesce_buf
*cbuf
,
687 bool allow_zero_length_array_sections
)
689 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
690 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
691 struct splay_tree_key_s cur_node
;
693 cur_node
.host_start
= host_ptr
;
694 if (cur_node
.host_start
== (uintptr_t) NULL
)
696 cur_node
.tgt_offset
= (uintptr_t) NULL
;
697 gomp_copy_host2dev (devicep
, aq
,
698 (void *) (tgt
->tgt_start
+ target_offset
),
699 (void *) &cur_node
.tgt_offset
, sizeof (void *),
703 /* Add bias to the pointer value. */
704 cur_node
.host_start
+= bias
;
705 cur_node
.host_end
= cur_node
.host_start
;
706 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
709 if (allow_zero_length_array_sections
)
710 cur_node
.tgt_offset
= cur_node
.host_start
;
713 gomp_mutex_unlock (&devicep
->lock
);
714 gomp_fatal ("Pointer target of array section wasn't mapped");
719 cur_node
.host_start
-= n
->host_start
;
721 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
722 /* At this point tgt_offset is target address of the
723 array section. Now subtract bias to get what we want
724 to initialize the pointer with. */
725 cur_node
.tgt_offset
-= bias
;
727 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
728 (void *) &cur_node
.tgt_offset
, sizeof (void *),
733 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
734 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
735 size_t first
, size_t i
, void **hostaddrs
,
736 size_t *sizes
, void *kinds
,
737 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
739 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
740 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
741 struct splay_tree_key_s cur_node
;
744 const bool short_mapkind
= true;
745 const int typemask
= short_mapkind
? 0xff : 0x7;
747 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
748 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
749 splay_tree_key n2
= gomp_map_0len_lookup (mem_map
, &cur_node
);
750 kind
= get_kind (short_mapkind
, kinds
, i
);
751 implicit
= get_implicit (short_mapkind
, kinds
, i
);
754 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
756 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
757 kind
& typemask
, false, implicit
, cbuf
,
763 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
765 cur_node
.host_start
--;
766 n2
= splay_tree_lookup (mem_map
, &cur_node
);
767 cur_node
.host_start
++;
770 && n2
->host_start
- n
->host_start
771 == n2
->tgt_offset
- n
->tgt_offset
)
773 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
774 kind
& typemask
, false, implicit
, cbuf
,
780 n2
= splay_tree_lookup (mem_map
, &cur_node
);
784 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
786 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
787 kind
& typemask
, false, implicit
, cbuf
,
792 gomp_mutex_unlock (&devicep
->lock
);
793 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
794 "other mapped elements from the same structure weren't mapped "
795 "together with it", (void *) cur_node
.host_start
,
796 (void *) cur_node
.host_end
);
799 attribute_hidden
void
800 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
801 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
802 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
803 struct gomp_coalesce_buf
*cbufp
,
804 bool allow_zero_length_array_sections
)
806 struct splay_tree_key_s s
;
811 gomp_mutex_unlock (&devicep
->lock
);
812 gomp_fatal ("enclosing struct not mapped for attach");
815 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
816 /* We might have a pointer in a packed struct: however we cannot have more
817 than one such pointer in each pointer-sized portion of the struct, so
819 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
822 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
824 if (!n
->aux
->attach_count
)
826 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
828 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
829 n
->aux
->attach_count
[idx
]++;
832 gomp_mutex_unlock (&devicep
->lock
);
833 gomp_fatal ("attach count overflow");
836 if (n
->aux
->attach_count
[idx
] == 1)
838 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
840 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
844 if ((void *) target
== NULL
)
846 /* As a special case, allow attaching NULL host pointers. This
847 allows e.g. unassociated Fortran pointers to be mapped
852 "%s: attaching NULL host pointer, target %p "
853 "(struct base %p)\n", __FUNCTION__
, (void *) devptr
,
854 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
));
856 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
857 sizeof (void *), true, cbufp
);
862 s
.host_start
= target
+ bias
;
863 s
.host_end
= s
.host_start
+ 1;
864 tn
= splay_tree_lookup (mem_map
, &s
);
868 if (allow_zero_length_array_sections
)
869 /* When allowing attachment to zero-length array sections, we
870 copy the host pointer when the target region is not mapped. */
874 gomp_mutex_unlock (&devicep
->lock
);
875 gomp_fatal ("pointer target not mapped for attach");
879 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
882 "%s: attaching host %p, target %p (struct base %p) to %p\n",
883 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
884 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
886 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
887 sizeof (void *), true, cbufp
);
890 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
891 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
894 attribute_hidden
void
895 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
896 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
897 uintptr_t detach_from
, bool finalize
,
898 struct gomp_coalesce_buf
*cbufp
)
904 gomp_mutex_unlock (&devicep
->lock
);
905 gomp_fatal ("enclosing struct not mapped for detach");
908 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
910 if (!n
->aux
|| !n
->aux
->attach_count
)
912 gomp_mutex_unlock (&devicep
->lock
);
913 gomp_fatal ("no attachment counters for struct");
917 n
->aux
->attach_count
[idx
] = 1;
919 if (n
->aux
->attach_count
[idx
] == 0)
921 gomp_mutex_unlock (&devicep
->lock
);
922 gomp_fatal ("attach count underflow");
925 n
->aux
->attach_count
[idx
]--;
927 if (n
->aux
->attach_count
[idx
] == 0)
929 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
931 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
934 "%s: detaching host %p, target %p (struct base %p) to %p\n",
935 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
936 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
939 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
940 sizeof (void *), true, cbufp
);
943 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
944 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
947 attribute_hidden
uintptr_t
948 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
950 if (tgt
->list
[i
].key
!= NULL
)
951 return tgt
->list
[i
].key
->tgt
->tgt_start
952 + tgt
->list
[i
].key
->tgt_offset
953 + tgt
->list
[i
].offset
;
955 switch (tgt
->list
[i
].offset
)
958 return (uintptr_t) hostaddrs
[i
];
964 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
965 + tgt
->list
[i
+ 1].key
->tgt_offset
966 + tgt
->list
[i
+ 1].offset
967 + (uintptr_t) hostaddrs
[i
]
968 - (uintptr_t) hostaddrs
[i
+ 1];
971 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
975 static inline __attribute__((always_inline
)) struct target_mem_desc
*
976 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
977 struct goacc_asyncqueue
*aq
, size_t mapnum
,
978 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
979 void *kinds
, bool short_mapkind
,
980 htab_t
*refcount_set
,
981 enum gomp_map_vars_kind pragma_kind
)
983 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
984 bool has_firstprivate
= false;
985 bool has_always_ptrset
= false;
986 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
987 const int rshift
= short_mapkind
? 8 : 3;
988 const int typemask
= short_mapkind
? 0xff : 0x7;
989 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
990 struct splay_tree_key_s cur_node
;
991 struct target_mem_desc
*tgt
992 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
993 tgt
->list_count
= mapnum
;
994 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
995 tgt
->device_descr
= devicep
;
997 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
1006 tgt_align
= sizeof (void *);
1009 cbuf
.chunk_cnt
= -1;
1012 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1014 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
1015 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1018 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1020 size_t align
= 4 * sizeof (void *);
1022 tgt_size
= mapnum
* sizeof (void *);
1024 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1025 cbuf
.chunks
[0].start
= 0;
1026 cbuf
.chunks
[0].end
= tgt_size
;
1029 gomp_mutex_lock (&devicep
->lock
);
1030 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1032 gomp_mutex_unlock (&devicep
->lock
);
1037 for (i
= 0; i
< mapnum
; i
++)
1039 int kind
= get_kind (short_mapkind
, kinds
, i
);
1040 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1041 if (hostaddrs
[i
] == NULL
1042 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1044 tgt
->list
[i
].key
= NULL
;
1045 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1048 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1049 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1051 tgt
->list
[i
].key
= NULL
;
1054 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1055 on a separate construct prior to using use_device_{addr,ptr}.
1056 In OpenMP 5.0, map directives need to be ordered by the
1057 middle-end before the use_device_* clauses. If
1058 !not_found_cnt, all mappings requested (if any) are already
1059 mapped, so use_device_{addr,ptr} can be resolved right away.
1060 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1061 now but would succeed after performing the mappings in the
1062 following loop. We can't defer this always to the second
1063 loop, because it is not even invoked when !not_found_cnt
1064 after the first loop. */
1065 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1066 cur_node
.host_end
= cur_node
.host_start
;
1067 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1070 cur_node
.host_start
-= n
->host_start
;
1072 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1073 + cur_node
.host_start
);
1075 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1077 gomp_mutex_unlock (&devicep
->lock
);
1078 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1080 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1081 /* If not present, continue using the host address. */
1084 __builtin_unreachable ();
1085 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1088 tgt
->list
[i
].offset
= 0;
1091 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
1092 || (kind
& typemask
) == GOMP_MAP_STRUCT_UNORD
)
1094 size_t first
= i
+ 1;
1095 size_t last
= i
+ sizes
[i
];
1096 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1097 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1099 tgt
->list
[i
].key
= NULL
;
1100 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1101 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1104 size_t align
= (size_t) 1 << (kind
>> rshift
);
1105 if (tgt_align
< align
)
1107 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1108 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1109 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1110 not_found_cnt
+= last
- i
;
1111 for (i
= first
; i
<= last
; i
++)
1113 tgt
->list
[i
].key
= NULL
;
1115 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1118 gomp_coalesce_buf_add (&cbuf
,
1119 tgt_size
- cur_node
.host_end
1120 + (uintptr_t) hostaddrs
[i
],
1126 for (i
= first
; i
<= last
; i
++)
1127 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1128 sizes
, kinds
, NULL
, refcount_set
);
1132 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1134 tgt
->list
[i
].key
= NULL
;
1135 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1136 has_firstprivate
= true;
1139 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1140 || ((kind
& typemask
)
1141 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1143 tgt
->list
[i
].key
= NULL
;
1144 has_firstprivate
= true;
1147 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1148 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1149 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1151 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1152 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1154 tgt
->list
[i
].key
= NULL
;
1156 size_t align
= (size_t) 1 << (kind
>> rshift
);
1157 if (tgt_align
< align
)
1159 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1161 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1162 cur_node
.host_end
- cur_node
.host_start
);
1163 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1164 has_firstprivate
= true;
1168 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1170 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1173 tgt
->list
[i
].key
= NULL
;
1174 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1179 n
= splay_tree_lookup (mem_map
, &cur_node
);
1180 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1182 int always_to_cnt
= 0;
1183 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1185 bool has_nullptr
= false;
1187 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1188 if (n
->tgt
->list
[j
].key
== n
)
1190 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1193 if (n
->tgt
->list_count
== 0)
1195 /* 'declare target'; assume has_nullptr; it could also be
1196 statically assigned pointer, but that it should be to
1197 the equivalent variable on the host. */
1198 assert (n
->refcount
== REFCOUNT_INFINITY
);
1202 assert (j
< n
->tgt
->list_count
);
1203 /* Re-map the data if there is an 'always' modifier or if it a
1204 null pointer was there and non a nonnull has been found; that
1205 permits transparent re-mapping for Fortran array descriptors
1206 which were previously mapped unallocated. */
1207 for (j
= i
+ 1; j
< mapnum
; j
++)
1209 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1210 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1212 || !GOMP_MAP_POINTER_P (ptr_kind
)
1213 || *(void **) hostaddrs
[j
] == NULL
))
1215 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1216 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1217 > cur_node
.host_end
))
1221 has_always_ptrset
= true;
1226 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1227 kind
& typemask
, always_to_cnt
> 0, implicit
,
1228 NULL
, refcount_set
);
1233 tgt
->list
[i
].key
= NULL
;
1235 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1237 /* Not present, hence, skip entry - including its MAP_POINTER,
1239 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1241 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1242 == GOMP_MAP_POINTER
))
1245 tgt
->list
[i
].key
= NULL
;
1246 tgt
->list
[i
].offset
= 0;
1250 size_t align
= (size_t) 1 << (kind
>> rshift
);
1252 if (tgt_align
< align
)
1254 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1256 && gomp_to_device_kind_p (kind
& typemask
))
1257 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1258 cur_node
.host_end
- cur_node
.host_start
);
1259 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1260 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1264 for (j
= i
+ 1; j
< mapnum
; j
++)
1265 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1266 kinds
, j
)) & typemask
))
1267 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1269 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1270 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1271 > cur_node
.host_end
))
1275 tgt
->list
[j
].key
= NULL
;
1286 gomp_mutex_unlock (&devicep
->lock
);
1287 gomp_fatal ("unexpected aggregation");
1289 tgt
->to_free
= devaddrs
[0];
1290 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1291 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1293 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1295 /* Allocate tgt_align aligned tgt_size block of memory. */
1296 /* FIXME: Perhaps change interface to allocate properly aligned
1298 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1299 tgt_size
+ tgt_align
- 1);
1302 gomp_mutex_unlock (&devicep
->lock
);
1303 gomp_fatal ("device memory allocation fail");
1306 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1307 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1308 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1310 if (cbuf
.use_cnt
== 1)
1312 if (cbuf
.chunk_cnt
> 0)
1315 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1325 tgt
->to_free
= NULL
;
1331 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1332 tgt_size
= mapnum
* sizeof (void *);
1335 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1338 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1339 splay_tree_node array
= tgt
->array
;
1340 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1341 uintptr_t field_tgt_base
= 0;
1342 splay_tree_key field_tgt_structelem_first
= NULL
;
1344 for (i
= 0; i
< mapnum
; i
++)
1345 if (has_always_ptrset
1347 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1348 == GOMP_MAP_TO_PSET
)
1350 splay_tree_key k
= tgt
->list
[i
].key
;
1351 bool has_nullptr
= false;
1353 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1354 if (k
->tgt
->list
[j
].key
== k
)
1356 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1359 if (k
->tgt
->list_count
== 0)
1362 assert (j
< k
->tgt
->list_count
);
1364 tgt
->list
[i
].has_null_ptr_assoc
= false;
1365 for (j
= i
+ 1; j
< mapnum
; j
++)
1367 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1368 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1370 || !GOMP_MAP_POINTER_P (ptr_kind
)
1371 || *(void **) hostaddrs
[j
] == NULL
))
1373 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1374 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1379 if (*(void **) hostaddrs
[j
] == NULL
)
1380 tgt
->list
[i
].has_null_ptr_assoc
= true;
1381 tgt
->list
[j
].key
= k
;
1382 tgt
->list
[j
].copy_from
= false;
1383 tgt
->list
[j
].always_copy_from
= false;
1384 tgt
->list
[j
].is_attach
= false;
1385 gomp_increment_refcount (k
, refcount_set
);
1386 gomp_map_pointer (k
->tgt
, aq
,
1387 (uintptr_t) *(void **) hostaddrs
[j
],
1388 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1390 sizes
[j
], cbufp
, false);
1395 else if (tgt
->list
[i
].key
== NULL
)
1397 int kind
= get_kind (short_mapkind
, kinds
, i
);
1398 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1399 if (hostaddrs
[i
] == NULL
)
1401 switch (kind
& typemask
)
1403 size_t align
, len
, first
, last
;
1405 case GOMP_MAP_FIRSTPRIVATE
:
1406 align
= (size_t) 1 << (kind
>> rshift
);
1407 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1408 tgt
->list
[i
].offset
= tgt_size
;
1410 gomp_copy_host2dev (devicep
, aq
,
1411 (void *) (tgt
->tgt_start
+ tgt_size
),
1412 (void *) hostaddrs
[i
], len
, false, cbufp
);
1413 /* Save device address in hostaddr to permit latter availablity
1414 when doing a deep-firstprivate with pointer attach. */
1415 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1418 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1419 firstprivate to hostaddrs[i+1], which is assumed to contain a
1423 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1425 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1426 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1428 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1429 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1430 this probably needs revision for 'aq' usage. */
1432 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1433 sizeof (void *), false, cbufp
);
1437 case GOMP_MAP_FIRSTPRIVATE_INT
:
1438 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1440 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1441 /* The OpenACC 'host_data' construct only allows 'use_device'
1442 "mapping" clauses, so in the first loop, 'not_found_cnt'
1443 must always have been zero, so all OpenACC 'use_device'
1444 clauses have already been handled. (We can only easily test
1445 'use_device' with 'if_present' clause here.) */
1446 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1447 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1448 code conceptually simple, similar to the first loop. */
1449 case GOMP_MAP_USE_DEVICE_PTR
:
1450 if (tgt
->list
[i
].offset
== 0)
1452 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1453 cur_node
.host_end
= cur_node
.host_start
;
1454 n
= gomp_map_lookup (mem_map
, &cur_node
);
1457 cur_node
.host_start
-= n
->host_start
;
1459 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1460 + cur_node
.host_start
);
1462 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1464 gomp_mutex_unlock (&devicep
->lock
);
1465 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1467 else if ((kind
& typemask
)
1468 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1469 /* If not present, continue using the host address. */
1472 __builtin_unreachable ();
1473 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1476 case GOMP_MAP_STRUCT_UNORD
:
1479 void *first
= hostaddrs
[i
+ 1];
1480 for (size_t j
= i
+ 1; j
< i
+ sizes
[i
]; j
++)
1481 if (hostaddrs
[j
+ 1] != first
)
1483 gomp_mutex_unlock (&devicep
->lock
);
1484 gomp_fatal ("Mapped array elements must be the "
1485 "same (%p vs %p)", first
,
1490 case GOMP_MAP_STRUCT
:
1492 last
= i
+ sizes
[i
];
1493 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1494 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1496 if (tgt
->list
[first
].key
!= NULL
)
1498 if (sizes
[last
] == 0)
1499 cur_node
.host_end
++;
1500 n
= splay_tree_lookup (mem_map
, &cur_node
);
1501 if (sizes
[last
] == 0)
1502 cur_node
.host_end
--;
1503 if (n
== NULL
&& cur_node
.host_start
== cur_node
.host_end
)
1505 gomp_mutex_unlock (&devicep
->lock
);
1506 gomp_fatal ("Struct pointer member not mapped (%p)",
1507 (void*) hostaddrs
[first
]);
1511 size_t align
= (size_t) 1 << (kind
>> rshift
);
1512 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1513 - (uintptr_t) hostaddrs
[i
];
1514 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1515 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1516 - (uintptr_t) hostaddrs
[i
];
1517 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1518 field_tgt_offset
= tgt_size
;
1519 field_tgt_clear
= last
;
1520 field_tgt_structelem_first
= NULL
;
1521 tgt_size
+= cur_node
.host_end
1522 - (uintptr_t) hostaddrs
[first
];
1525 for (i
= first
; i
<= last
; i
++)
1526 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1527 sizes
, kinds
, cbufp
, refcount_set
);
1530 case GOMP_MAP_ALWAYS_POINTER
:
1531 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1532 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1533 n
= splay_tree_lookup (mem_map
, &cur_node
);
1535 || n
->host_start
> cur_node
.host_start
1536 || n
->host_end
< cur_node
.host_end
)
1538 gomp_mutex_unlock (&devicep
->lock
);
1539 gomp_fatal ("always pointer not mapped");
1542 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1543 != GOMP_MAP_ALWAYS_POINTER
))
1544 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1545 if (cur_node
.tgt_offset
)
1546 cur_node
.tgt_offset
-= sizes
[i
];
1547 gomp_copy_host2dev (devicep
, aq
,
1548 (void *) (n
->tgt
->tgt_start
1550 + cur_node
.host_start
1552 (void *) &cur_node
.tgt_offset
,
1553 sizeof (void *), true, cbufp
);
1554 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1555 + cur_node
.host_start
- n
->host_start
;
1557 case GOMP_MAP_IF_PRESENT
:
1558 /* Not present - otherwise handled above. Skip over its
1559 MAP_POINTER as well. */
1561 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1562 == GOMP_MAP_POINTER
))
1565 case GOMP_MAP_ATTACH
:
1566 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1568 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1569 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1570 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1573 tgt
->list
[i
].key
= n
;
1574 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1575 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1576 tgt
->list
[i
].copy_from
= false;
1577 tgt
->list
[i
].always_copy_from
= false;
1578 tgt
->list
[i
].is_attach
= true;
1579 /* OpenACC 'attach'/'detach' doesn't affect
1580 structured/dynamic reference counts ('n->refcount',
1581 'n->dynamic_refcount'). */
1584 = ((kind
& typemask
)
1585 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1586 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1587 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1590 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1592 gomp_mutex_unlock (&devicep
->lock
);
1593 gomp_fatal ("outer struct not mapped for attach");
1600 splay_tree_key k
= &array
->key
;
1601 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1602 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1603 k
->host_end
= k
->host_start
+ sizes
[i
];
1605 k
->host_end
= k
->host_start
+ sizeof (void *);
1606 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1607 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1609 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1611 /* For this condition to be true, there must be a
1612 duplicate struct element mapping. This can happen with
1613 GOMP_MAP_STRUCT_UNORD mappings, for example. */
1614 tgt
->list
[i
].key
= n
;
1617 assert ((n
->refcount
& REFCOUNT_STRUCTELEM
) != 0);
1618 assert (field_tgt_structelem_first
!= NULL
);
1620 if (i
== field_tgt_clear
)
1622 n
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1623 field_tgt_structelem_first
= NULL
;
1626 if (i
== field_tgt_clear
)
1627 field_tgt_clear
= FIELD_TGT_EMPTY
;
1628 gomp_increment_refcount (n
, refcount_set
);
1629 tgt
->list
[i
].copy_from
1630 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1631 tgt
->list
[i
].always_copy_from
1632 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1633 tgt
->list
[i
].is_attach
= false;
1634 tgt
->list
[i
].offset
= 0;
1635 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1638 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1639 kind
& typemask
, false, implicit
,
1640 cbufp
, refcount_set
);
1645 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1647 /* Replace target address of the pointer with target address
1648 of mapped object in the splay tree. */
1649 splay_tree_remove (mem_map
, n
);
1651 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1652 k
->aux
->link_key
= n
;
1654 size_t align
= (size_t) 1 << (kind
>> rshift
);
1655 tgt
->list
[i
].key
= k
;
1658 k
->dynamic_refcount
= 0;
1659 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1661 k
->tgt_offset
= k
->host_start
- field_tgt_base
1665 k
->refcount
= REFCOUNT_STRUCTELEM
;
1666 if (field_tgt_structelem_first
== NULL
)
1668 /* Set to first structure element of sequence. */
1669 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1670 field_tgt_structelem_first
= k
;
1673 /* Point to refcount of leading element, but do not
1675 k
->structelem_refcount_ptr
1676 = &field_tgt_structelem_first
->structelem_refcount
;
1678 if (i
== field_tgt_clear
)
1680 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1681 field_tgt_structelem_first
= NULL
;
1684 if (i
== field_tgt_clear
)
1685 field_tgt_clear
= FIELD_TGT_EMPTY
;
1689 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1690 k
->tgt_offset
= tgt_size
;
1691 tgt_size
+= k
->host_end
- k
->host_start
;
1693 /* First increment, from 0 to 1. gomp_increment_refcount
1694 encapsulates the different increment cases, so use this
1695 instead of directly setting 1 during initialization. */
1696 gomp_increment_refcount (k
, refcount_set
);
1698 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1699 tgt
->list
[i
].always_copy_from
1700 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1701 tgt
->list
[i
].is_attach
= false;
1702 tgt
->list
[i
].offset
= 0;
1703 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1706 array
->right
= NULL
;
1707 splay_tree_insert (mem_map
, array
);
1708 switch (kind
& typemask
)
1710 case GOMP_MAP_ALLOC
:
1712 case GOMP_MAP_FORCE_ALLOC
:
1713 case GOMP_MAP_FORCE_FROM
:
1714 case GOMP_MAP_ALWAYS_FROM
:
1717 case GOMP_MAP_TOFROM
:
1718 case GOMP_MAP_FORCE_TO
:
1719 case GOMP_MAP_FORCE_TOFROM
:
1720 case GOMP_MAP_ALWAYS_TO
:
1721 case GOMP_MAP_ALWAYS_TOFROM
:
1722 gomp_copy_host2dev (devicep
, aq
,
1723 (void *) (tgt
->tgt_start
1725 (void *) k
->host_start
,
1726 k
->host_end
- k
->host_start
,
1729 case GOMP_MAP_POINTER
:
1730 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1732 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1733 k
->tgt_offset
, sizes
[i
], cbufp
,
1735 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1737 case GOMP_MAP_TO_PSET
:
1738 gomp_copy_host2dev (devicep
, aq
,
1739 (void *) (tgt
->tgt_start
1741 (void *) k
->host_start
,
1742 k
->host_end
- k
->host_start
,
1744 tgt
->list
[i
].has_null_ptr_assoc
= false;
1746 for (j
= i
+ 1; j
< mapnum
; j
++)
1748 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1750 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1751 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1753 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1754 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1759 tgt
->list
[j
].key
= k
;
1760 tgt
->list
[j
].copy_from
= false;
1761 tgt
->list
[j
].always_copy_from
= false;
1762 tgt
->list
[j
].is_attach
= false;
1763 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1764 /* For OpenMP, the use of refcount_sets causes
1765 errors if we set k->refcount = 1 above but also
1766 increment it again here, for decrementing will
1767 not properly match, since we decrement only once
1768 for each key's refcount. Therefore avoid this
1769 increment for OpenMP constructs. */
1771 gomp_increment_refcount (k
, refcount_set
);
1772 gomp_map_pointer (tgt
, aq
,
1773 (uintptr_t) *(void **) hostaddrs
[j
],
1775 + ((uintptr_t) hostaddrs
[j
]
1777 sizes
[j
], cbufp
, false);
1782 case GOMP_MAP_FORCE_PRESENT
:
1783 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1784 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1785 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1787 /* We already looked up the memory region above and it
1789 size_t size
= k
->host_end
- k
->host_start
;
1790 gomp_mutex_unlock (&devicep
->lock
);
1791 #ifdef HAVE_INTTYPES_H
1792 gomp_fatal ("present clause: not present on the device "
1793 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
1794 "dev: %d)", (void *) k
->host_start
,
1795 (uint64_t) size
, (uint64_t) size
,
1796 devicep
->target_id
);
1798 gomp_fatal ("present clause: not present on the device "
1799 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1800 (void *) k
->host_start
,
1801 (unsigned long) size
, (unsigned long) size
,
1802 devicep
->target_id
);
1806 case GOMP_MAP_FORCE_DEVICEPTR
:
1807 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1808 gomp_copy_host2dev (devicep
, aq
,
1809 (void *) (tgt
->tgt_start
1811 (void *) k
->host_start
,
1812 sizeof (void *), false, cbufp
);
1815 gomp_mutex_unlock (&devicep
->lock
);
1816 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1820 if (k
->aux
&& k
->aux
->link_key
)
1822 /* Set link pointer on target to the device address of the
1824 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1825 /* We intentionally do not use coalescing here, as it's not
1826 data allocated by the current call to this function. */
1827 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1828 &tgt_addr
, sizeof (void *), true, NULL
);
1835 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1837 for (i
= 0; i
< mapnum
; i
++)
1839 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1840 gomp_copy_host2dev (devicep
, aq
,
1841 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1842 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1850 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1851 gomp_copy_host2dev (devicep
, aq
,
1852 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1853 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1854 - cbuf
.chunks
[0].start
),
1855 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1858 /* Free once the transfer has completed. */
1859 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1866 /* If the variable from "omp target enter data" map-list was already mapped,
1867 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1869 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1875 gomp_mutex_unlock (&devicep
->lock
);
1879 static struct target_mem_desc
*
1880 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1881 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1882 bool short_mapkind
, htab_t
*refcount_set
,
1883 enum gomp_map_vars_kind pragma_kind
)
1885 /* This management of a local refcount_set is for convenience of callers
1886 who do not share a refcount_set over multiple map/unmap uses. */
1887 htab_t local_refcount_set
= NULL
;
1888 if (refcount_set
== NULL
)
1890 local_refcount_set
= htab_create (mapnum
);
1891 refcount_set
= &local_refcount_set
;
1894 struct target_mem_desc
*tgt
;
1895 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1896 sizes
, kinds
, short_mapkind
, refcount_set
,
1898 if (local_refcount_set
)
1899 htab_free (local_refcount_set
);
1904 attribute_hidden
struct target_mem_desc
*
1905 goacc_map_vars (struct gomp_device_descr
*devicep
,
1906 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1907 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1908 void *kinds
, bool short_mapkind
,
1909 enum gomp_map_vars_kind pragma_kind
)
1911 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1912 sizes
, kinds
, short_mapkind
, NULL
,
1913 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1917 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1919 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1921 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1928 gomp_unref_tgt (void *ptr
)
1930 bool is_tgt_unmapped
= false;
1932 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1934 if (tgt
->refcount
> 1)
1938 gomp_unmap_tgt (tgt
);
1939 is_tgt_unmapped
= true;
1942 return is_tgt_unmapped
;
1946 gomp_unref_tgt_void (void *ptr
)
1948 (void) gomp_unref_tgt (ptr
);
1952 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1954 splay_tree_remove (sp
, k
);
1957 if (k
->aux
->link_key
)
1958 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1959 if (k
->aux
->attach_count
)
1960 free (k
->aux
->attach_count
);
1966 static inline __attribute__((always_inline
)) bool
1967 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1968 struct goacc_asyncqueue
*aq
)
1970 bool is_tgt_unmapped
= false;
1972 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1974 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1975 /* Infer the splay_tree_key of the first structelem key using the
1976 pointer to the first structleme_refcount. */
1977 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1978 - offsetof (struct splay_tree_key_s
,
1979 structelem_refcount
));
1980 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1982 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1983 with the splay_tree_keys embedded inside. */
1984 splay_tree_node node
=
1985 (splay_tree_node
) ((char *) k
1986 - offsetof (struct splay_tree_node_s
, key
));
1989 /* Starting from the _FIRST key, and continue for all following
1991 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1992 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1999 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2002 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2005 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
2006 return is_tgt_unmapped
;
2009 attribute_hidden
bool
2010 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
2012 return gomp_remove_var_internal (devicep
, k
, NULL
);
2015 /* Remove a variable asynchronously. This actually removes the variable
2016 mapping immediately, but retains the linked target_mem_desc until the
2017 asynchronous operation has completed (as it may still refer to target
2018 memory). The device lock must be held before entry, and remains locked on
2021 attribute_hidden
void
2022 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2023 struct goacc_asyncqueue
*aq
)
2025 (void) gomp_remove_var_internal (devicep
, k
, aq
);
2028 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2029 variables back from device to host: if it is false, it is assumed that this
2030 has been done already. */
2032 static inline __attribute__((always_inline
)) void
2033 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2034 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
2036 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
2038 if (tgt
->list_count
== 0)
2044 gomp_mutex_lock (&devicep
->lock
);
2045 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2047 gomp_mutex_unlock (&devicep
->lock
);
2055 /* We must perform detachments before any copies back to the host. */
2056 for (i
= 0; i
< tgt
->list_count
; i
++)
2058 splay_tree_key k
= tgt
->list
[i
].key
;
2060 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2061 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2062 + tgt
->list
[i
].offset
,
2066 for (i
= 0; i
< tgt
->list_count
; i
++)
2068 splay_tree_key k
= tgt
->list
[i
].key
;
2072 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2073 counts ('n->refcount', 'n->dynamic_refcount'). */
2074 if (tgt
->list
[i
].is_attach
)
2077 bool do_copy
, do_remove
;
2078 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2080 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2081 || tgt
->list
[i
].always_copy_from
)
2082 gomp_copy_dev2host (devicep
, aq
,
2083 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2084 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2085 + tgt
->list
[i
].offset
),
2086 tgt
->list
[i
].length
);
2089 struct target_mem_desc
*k_tgt
= k
->tgt
;
2090 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2091 /* It would be bad if TGT got unmapped while we're still iterating
2092 over its LIST_COUNT, and also expect to use it in the following
2094 assert (!is_tgt_unmapped
2100 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2103 gomp_unref_tgt ((void *) tgt
);
2105 gomp_mutex_unlock (&devicep
->lock
);
2109 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2110 htab_t
*refcount_set
)
2112 /* This management of a local refcount_set is for convenience of callers
2113 who do not share a refcount_set over multiple map/unmap uses. */
2114 htab_t local_refcount_set
= NULL
;
2115 if (refcount_set
== NULL
)
2117 local_refcount_set
= htab_create (tgt
->list_count
);
2118 refcount_set
= &local_refcount_set
;
2121 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2123 if (local_refcount_set
)
2124 htab_free (local_refcount_set
);
2127 attribute_hidden
void
2128 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2129 struct goacc_asyncqueue
*aq
)
2131 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2135 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2136 size_t *sizes
, void *kinds
, bool short_mapkind
)
2139 struct splay_tree_key_s cur_node
;
2140 const int typemask
= short_mapkind
? 0xff : 0x7;
2148 gomp_mutex_lock (&devicep
->lock
);
2149 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2151 gomp_mutex_unlock (&devicep
->lock
);
2155 for (i
= 0; i
< mapnum
; i
++)
2158 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2159 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2160 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2163 int kind
= get_kind (short_mapkind
, kinds
, i
);
2164 if (n
->host_start
> cur_node
.host_start
2165 || n
->host_end
< cur_node
.host_end
)
2167 gomp_mutex_unlock (&devicep
->lock
);
2168 gomp_fatal ("Trying to update [%p..%p) object when "
2169 "only [%p..%p) is mapped",
2170 (void *) cur_node
.host_start
,
2171 (void *) cur_node
.host_end
,
2172 (void *) n
->host_start
,
2173 (void *) n
->host_end
);
2176 if (n
->aux
&& n
->aux
->attach_count
)
2178 uintptr_t addr
= cur_node
.host_start
;
2179 while (addr
< cur_node
.host_end
)
2181 /* We have to be careful not to overwrite still attached
2182 pointers during host<->device updates. */
2183 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2184 if (n
->aux
->attach_count
[i
] == 0)
2186 void *devaddr
= (void *) (n
->tgt
->tgt_start
2188 + addr
- n
->host_start
);
2189 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2190 gomp_copy_host2dev (devicep
, NULL
,
2191 devaddr
, (void *) addr
,
2192 sizeof (void *), false, NULL
);
2193 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2194 gomp_copy_dev2host (devicep
, NULL
,
2195 (void *) addr
, devaddr
,
2198 addr
+= sizeof (void *);
2203 void *hostaddr
= (void *) cur_node
.host_start
;
2204 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2205 + cur_node
.host_start
2207 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2209 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2210 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2212 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2213 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2218 int kind
= get_kind (short_mapkind
, kinds
, i
);
2220 if (GOMP_MAP_PRESENT_P (kind
))
2222 /* We already looked up the memory region above and it
2224 gomp_mutex_unlock (&devicep
->lock
);
2225 #ifdef HAVE_INTTYPES_H
2226 gomp_fatal ("present clause: not present on the device "
2227 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
2228 "dev: %d)", (void *) hostaddrs
[i
],
2229 (uint64_t) sizes
[i
], (uint64_t) sizes
[i
],
2230 devicep
->target_id
);
2232 gomp_fatal ("present clause: not present on the device "
2233 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2234 (void *) hostaddrs
[i
], (unsigned long) sizes
[i
],
2235 (unsigned long) sizes
[i
], devicep
->target_id
);
2240 gomp_mutex_unlock (&devicep
->lock
);
2243 static struct gomp_offload_icv_list
*
2244 gomp_get_offload_icv_item (int dev_num
)
2246 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2247 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2253 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2254 depending on the device num and the variable hierarchy
2255 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2256 device and thus no item with that device number is contained in
2257 gomp_offload_icv_list, then a new item is created and added to the list. */
2259 static struct gomp_offload_icvs
*
2260 get_gomp_offload_icvs (int dev_num
)
2262 struct gomp_icv_list
*dev
2263 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2264 struct gomp_icv_list
*all
2265 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2266 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2267 struct gomp_offload_icv_list
*offload_icvs
2268 = gomp_get_offload_icv_item (dev_num
);
2270 if (offload_icvs
!= NULL
)
2271 return &offload_icvs
->icvs
;
2273 struct gomp_offload_icv_list
*new
2274 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2276 new->device_num
= dev_num
;
2277 new->icvs
.device_num
= dev_num
;
2278 new->next
= gomp_offload_icv_list
;
2280 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2281 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2282 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2283 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2284 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2285 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2287 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2290 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2291 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2292 else if (dev
!= NULL
2293 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2294 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2295 else if (all
!= NULL
2296 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2297 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2299 new->icvs
.teams_thread_limit
2300 = gomp_default_icv_values
.teams_thread_limit_var
;
2303 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2304 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2305 else if (dev
!= NULL
2306 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2307 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2308 else if (all
!= NULL
2309 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2310 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2312 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2314 gomp_offload_icv_list
= new;
2318 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2319 And insert to splay tree the mapping between addresses from HOST_TABLE and
2320 from loaded target image. We rely in the host and device compiler
2321 emitting variable and functions in the same order. */
2324 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2325 const void *host_table
, const void *target_data
,
2326 bool is_register_lock
)
2328 void **host_func_table
= ((void ***) host_table
)[0];
2329 void **host_funcs_end
= ((void ***) host_table
)[1];
2330 void **host_var_table
= ((void ***) host_table
)[2];
2331 void **host_vars_end
= ((void ***) host_table
)[3];
2332 void **host_ind_func_table
= NULL
;
2333 void **host_ind_funcs_end
= NULL
;
2335 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version
))
2337 host_ind_func_table
= ((void ***) host_table
)[4];
2338 host_ind_funcs_end
= ((void ***) host_table
)[5];
2341 /* The func and ind_func tables contain only addresses, the var table
2342 contains addresses and corresponding sizes. */
2343 int num_funcs
= host_funcs_end
- host_func_table
;
2344 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2345 int num_ind_funcs
= (host_ind_funcs_end
- host_ind_func_table
);
2347 /* Load image to device and get target addresses for the image. */
2348 struct addr_pair
*target_table
= NULL
;
2349 uint64_t *rev_target_fn_table
= NULL
;
2350 int i
, num_target_entries
;
2352 /* With reverse offload, insert also target-host addresses. */
2353 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2356 = devicep
->load_image_func (devicep
->target_id
, version
,
2357 target_data
, &target_table
,
2358 rev_lookup
? &rev_target_fn_table
: NULL
,
2360 ? (uint64_t *) host_ind_func_table
: NULL
);
2362 if (num_target_entries
!= num_funcs
+ num_vars
2363 /* "+1" due to the additional ICV struct. */
2364 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2366 gomp_mutex_unlock (&devicep
->lock
);
2367 if (is_register_lock
)
2368 gomp_mutex_unlock (®ister_lock
);
2369 gomp_fatal ("Cannot map target functions or variables"
2370 " (expected %u, have %u)", num_funcs
+ num_vars
,
2371 num_target_entries
);
2374 /* Insert host-target address mapping into splay tree. */
2375 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2376 /* "+1" due to the additional ICV struct. */
2377 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2378 * sizeof (*tgt
->array
));
2379 if (rev_target_fn_table
)
2380 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2382 tgt
->rev_array
= NULL
;
2383 tgt
->refcount
= REFCOUNT_INFINITY
;
2386 tgt
->to_free
= NULL
;
2388 tgt
->list_count
= 0;
2389 tgt
->device_descr
= devicep
;
2390 splay_tree_node array
= tgt
->array
;
2391 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2393 for (i
= 0; i
< num_funcs
; i
++)
2395 splay_tree_key k
= &array
->key
;
2396 k
->host_start
= (uintptr_t) host_func_table
[i
];
2397 k
->host_end
= k
->host_start
+ 1;
2399 k
->tgt_offset
= target_table
[i
].start
;
2400 k
->refcount
= REFCOUNT_INFINITY
;
2401 k
->dynamic_refcount
= 0;
2404 array
->right
= NULL
;
2405 splay_tree_insert (&devicep
->mem_map
, array
);
2406 if (rev_target_fn_table
)
2408 reverse_splay_tree_key k2
= &rev_array
->key
;
2409 k2
->dev
= rev_target_fn_table
[i
];
2411 rev_array
->left
= NULL
;
2412 rev_array
->right
= NULL
;
2414 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2420 /* Most significant bit of the size in host and target tables marks
2421 "omp declare target link" variables. */
2422 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2423 const uintptr_t size_mask
= ~link_bit
;
2425 for (i
= 0; i
< num_vars
; i
++)
2427 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2428 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2429 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2431 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2433 gomp_mutex_unlock (&devicep
->lock
);
2434 if (is_register_lock
)
2435 gomp_mutex_unlock (®ister_lock
);
2436 gomp_fatal ("Cannot map target variables (size mismatch)");
2439 splay_tree_key k
= &array
->key
;
2440 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2442 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2444 k
->tgt_offset
= target_var
->start
;
2445 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2446 k
->dynamic_refcount
= 0;
2449 array
->right
= NULL
;
2450 splay_tree_insert (&devicep
->mem_map
, array
);
2454 /* Last entry is for a ICVs variable.
2455 Tolerate case where plugin does not return those entries. */
2456 if (num_funcs
+ num_vars
< num_target_entries
)
2458 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2460 /* Start address will be non-zero for the ICVs variable if
2461 the variable was found in this image. */
2462 if (var
->start
!= 0)
2464 /* The index of the devicep within devices[] is regarded as its
2465 'device number', which is different from the per-device type
2466 devicep->target_id. */
2467 int dev_num
= (int) (devicep
- &devices
[0]);
2468 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2469 size_t var_size
= var
->end
- var
->start
;
2470 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2472 gomp_mutex_unlock (&devicep
->lock
);
2473 if (is_register_lock
)
2474 gomp_mutex_unlock (®ister_lock
);
2475 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2478 /* Copy the ICVs variable to place on device memory, hereby
2479 actually designating its device number into effect. */
2480 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2481 var_size
, false, NULL
);
2482 splay_tree_key k
= &array
->key
;
2483 k
->host_start
= (uintptr_t) icvs
;
2485 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2487 k
->tgt_offset
= var
->start
;
2488 k
->refcount
= REFCOUNT_INFINITY
;
2489 k
->dynamic_refcount
= 0;
2492 array
->right
= NULL
;
2493 splay_tree_insert (&devicep
->mem_map
, array
);
2498 free (target_table
);
2501 /* Unload the mappings described by target_data from device DEVICE_P.
2502 The device must be locked. */
2505 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2507 const void *host_table
, const void *target_data
)
2509 void **host_func_table
= ((void ***) host_table
)[0];
2510 void **host_funcs_end
= ((void ***) host_table
)[1];
2511 void **host_var_table
= ((void ***) host_table
)[2];
2512 void **host_vars_end
= ((void ***) host_table
)[3];
2514 /* The func table contains only addresses, the var table contains addresses
2515 and corresponding sizes. */
2516 int num_funcs
= host_funcs_end
- host_func_table
;
2517 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2519 struct splay_tree_key_s k
;
2520 splay_tree_key node
= NULL
;
2522 /* Find mapping at start of node array */
2523 if (num_funcs
|| num_vars
)
2525 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2526 : (uintptr_t) host_var_table
[0]);
2527 k
.host_end
= k
.host_start
+ 1;
2528 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2531 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2533 gomp_mutex_unlock (&devicep
->lock
);
2534 gomp_fatal ("image unload fail");
2536 if (devicep
->mem_map_rev
.root
)
2538 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2540 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2541 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2542 free (node
->tgt
->rev_array
);
2543 devicep
->mem_map_rev
.root
= NULL
;
2546 /* Remove mappings from splay tree. */
2548 for (i
= 0; i
< num_funcs
; i
++)
2550 k
.host_start
= (uintptr_t) host_func_table
[i
];
2551 k
.host_end
= k
.host_start
+ 1;
2552 splay_tree_remove (&devicep
->mem_map
, &k
);
2555 /* Most significant bit of the size in host and target tables marks
2556 "omp declare target link" variables. */
2557 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2558 const uintptr_t size_mask
= ~link_bit
;
2559 bool is_tgt_unmapped
= false;
2561 for (i
= 0; i
< num_vars
; i
++)
2563 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2565 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2567 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2568 splay_tree_remove (&devicep
->mem_map
, &k
);
2571 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2572 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2576 if (node
&& !is_tgt_unmapped
)
2584 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2586 char *end
= buf
+ size
, *p
= buf
;
2587 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2588 p
+= snprintf (p
, end
- p
, "unified_address");
2589 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2590 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2591 (p
== buf
? "" : ", "));
2592 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2593 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2594 (p
== buf
? "" : ", "));
2597 /* This function should be called from every offload image while loading.
2598 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2599 the target, and DATA. */
2602 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2603 int target_type
, const void *data
)
2607 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2608 gomp_fatal ("Library too old for offload (version %u < %u)",
2609 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2612 const void *target_data
;
2613 if (GOMP_VERSION_LIB (version
) > 1)
2615 omp_req
= (int) (size_t) ((void **) data
)[0];
2616 target_data
= &((void **) data
)[1];
2624 gomp_mutex_lock (®ister_lock
);
2626 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2628 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2629 "reverse_offload")];
2630 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2631 "reverse_offload")];
2632 gomp_requires_to_name (buf2
, sizeof (buf2
),
2633 omp_req
!= GOMP_REQUIRES_TARGET_USED
2634 ? omp_req
: omp_requires_mask
);
2635 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2636 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2638 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2639 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2640 "in multiple compilation units: '%s' vs. '%s'",
2644 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2645 "some compilation units", buf2
);
2647 omp_requires_mask
= omp_req
;
2649 /* Load image to all initialized devices. */
2650 for (i
= 0; i
< num_devices
; i
++)
2652 struct gomp_device_descr
*devicep
= &devices
[i
];
2653 gomp_mutex_lock (&devicep
->lock
);
2654 if (devicep
->type
== target_type
2655 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2656 gomp_load_image_to_device (devicep
, version
,
2657 host_table
, target_data
, true);
2658 gomp_mutex_unlock (&devicep
->lock
);
2661 /* Insert image to array of pending images. */
2663 = gomp_realloc_unlock (offload_images
,
2664 (num_offload_images
+ 1)
2665 * sizeof (struct offload_image_descr
));
2666 offload_images
[num_offload_images
].version
= version
;
2667 offload_images
[num_offload_images
].type
= target_type
;
2668 offload_images
[num_offload_images
].host_table
= host_table
;
2669 offload_images
[num_offload_images
].target_data
= target_data
;
2671 num_offload_images
++;
2672 gomp_mutex_unlock (®ister_lock
);
2675 /* Legacy entry point. */
2678 GOMP_offload_register (const void *host_table
, int target_type
,
2679 const void *target_data
)
2681 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2684 /* This function should be called from every offload image while unloading.
2685 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2686 the target, and DATA. */
2689 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2690 int target_type
, const void *data
)
2694 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2695 gomp_fatal ("Library too old for offload (version %u < %u)",
2696 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2698 const void *target_data
;
2699 if (GOMP_VERSION_LIB (version
) > 1)
2700 target_data
= &((void **) data
)[1];
2704 gomp_mutex_lock (®ister_lock
);
2706 /* Unload image from all initialized devices. */
2707 for (i
= 0; i
< num_devices
; i
++)
2709 struct gomp_device_descr
*devicep
= &devices
[i
];
2710 gomp_mutex_lock (&devicep
->lock
);
2711 if (devicep
->type
== target_type
2712 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2713 gomp_unload_image_from_device (devicep
, version
,
2714 host_table
, target_data
);
2715 gomp_mutex_unlock (&devicep
->lock
);
2718 /* Remove image from array of pending images. */
2719 for (i
= 0; i
< num_offload_images
; i
++)
2720 if (offload_images
[i
].target_data
== target_data
)
2722 offload_images
[i
] = offload_images
[--num_offload_images
];
2726 gomp_mutex_unlock (®ister_lock
);
2729 /* Legacy entry point. */
2732 GOMP_offload_unregister (const void *host_table
, int target_type
,
2733 const void *target_data
)
2735 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2738 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2739 must be locked on entry, and remains locked on return. */
2741 attribute_hidden
void
2742 gomp_init_device (struct gomp_device_descr
*devicep
)
2745 if (!devicep
->init_device_func (devicep
->target_id
))
2747 gomp_mutex_unlock (&devicep
->lock
);
2748 gomp_fatal ("device initialization failed");
2751 /* Load to device all images registered by the moment. */
2752 for (i
= 0; i
< num_offload_images
; i
++)
2754 struct offload_image_descr
*image
= &offload_images
[i
];
2755 if (image
->type
== devicep
->type
)
2756 gomp_load_image_to_device (devicep
, image
->version
,
2757 image
->host_table
, image
->target_data
,
2761 /* Initialize OpenACC asynchronous queues. */
2762 goacc_init_asyncqueues (devicep
);
2764 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2767 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2768 must be locked on entry, and remains locked on return. */
2770 attribute_hidden
bool
2771 gomp_fini_device (struct gomp_device_descr
*devicep
)
2773 bool ret
= goacc_fini_asyncqueues (devicep
);
2774 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2775 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2779 attribute_hidden
void
2780 gomp_unload_device (struct gomp_device_descr
*devicep
)
2782 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2786 /* Unload from device all images registered at the moment. */
2787 for (i
= 0; i
< num_offload_images
; i
++)
2789 struct offload_image_descr
*image
= &offload_images
[i
];
2790 if (image
->type
== devicep
->type
)
2791 gomp_unload_image_from_device (devicep
, image
->version
,
2793 image
->target_data
);
2798 /* Host fallback for GOMP_target{,_ext} routines. */
2801 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2802 struct gomp_device_descr
*devicep
, void **args
)
2804 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2806 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2808 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2809 "be used for offloading");
2812 memset (thr
, '\0', sizeof (*thr
));
2813 if (gomp_places_list
)
2815 thr
->place
= old_thr
.place
;
2816 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2821 intptr_t id
= (intptr_t) *args
++, val
;
2822 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2823 val
= (intptr_t) *args
++;
2825 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2826 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2828 id
&= GOMP_TARGET_ARG_ID_MASK
;
2829 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2831 val
= val
> INT_MAX
? INT_MAX
: val
;
2833 gomp_icv (true)->thread_limit_var
= val
;
2838 gomp_free_thread (thr
);
2842 /* Calculate alignment and size requirements of a private copy of data shared
2843 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2846 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2847 unsigned short *kinds
, size_t *tgt_align
,
2851 for (i
= 0; i
< mapnum
; i
++)
2852 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2854 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2855 if (*tgt_align
< align
)
2857 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2858 *tgt_size
+= sizes
[i
];
2862 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2865 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2866 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2869 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2871 tgt
+= tgt_align
- al
;
2874 for (i
= 0; i
< mapnum
; i
++)
2875 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2877 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2878 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2879 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2880 hostaddrs
[i
] = tgt
+ tgt_size
;
2881 tgt_size
= tgt_size
+ sizes
[i
];
2882 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2884 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2890 /* Helper function of GOMP_target{,_ext} routines. */
2893 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2894 void (*host_fn
) (void *))
2896 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2897 return (void *) host_fn
;
2900 gomp_mutex_lock (&devicep
->lock
);
2901 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2903 gomp_mutex_unlock (&devicep
->lock
);
2907 struct splay_tree_key_s k
;
2908 k
.host_start
= (uintptr_t) host_fn
;
2909 k
.host_end
= k
.host_start
+ 1;
2910 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2911 gomp_mutex_unlock (&devicep
->lock
);
2915 return (void *) tgt_fn
->tgt_offset
;
2919 /* Called when encountering a target directive. If DEVICE
2920 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2921 GOMP_DEVICE_HOST_FALLBACK (or any value
2922 larger than last available hw device), use host fallback.
2923 FN is address of host code, UNUSED is part of the current ABI, but
2924 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2925 with MAPNUM entries, with addresses of the host objects,
2926 sizes of the host objects (resp. for pointer kind pointer bias
2927 and assumed sizeof (void *) size) and kinds. */
2930 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2931 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2932 unsigned char *kinds
)
2934 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2938 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2939 /* All shared memory devices should use the GOMP_target_ext function. */
2940 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2941 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2942 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2944 htab_t refcount_set
= htab_create (mapnum
);
2945 struct target_mem_desc
*tgt_vars
2946 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2947 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2948 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2950 htab_clear (refcount_set
);
2951 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2952 htab_free (refcount_set
);
2955 static inline unsigned int
2956 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2958 /* If we cannot run asynchronously, simply ignore nowait. */
2959 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2960 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2966 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2968 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2972 void *host_ptr
= &item
->icvs
;
2973 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2974 if (dev_ptr
!= NULL
)
2975 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2976 sizeof (struct gomp_offload_icvs
));
2979 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2980 and several arguments have been added:
2981 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2982 DEPEND is array of dependencies, see GOMP_task for details.
2984 ARGS is a pointer to an array consisting of a variable number of both
2985 device-independent and device-specific arguments, which can take one two
2986 elements where the first specifies for which device it is intended, the type
2987 and optionally also the value. If the value is not present in the first
2988 one, the whole second element the actual value. The last element of the
2989 array is a single NULL. Among the device independent can be for example
2990 NUM_TEAMS and THREAD_LIMIT.
2992 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2993 that value, or 1 if teams construct is not present, or 0, if
2994 teams construct does not have num_teams clause and so the choice is
2995 implementation defined, and -1 if it can't be determined on the host
2996 what value will GOMP_teams have on the device.
2997 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2998 body with that value, or 0, if teams construct does not have thread_limit
2999 clause or the teams construct is not present, or -1 if it can't be
3000 determined on the host what value will GOMP_teams have on the device. */
3003 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
3004 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3005 unsigned int flags
, void **depend
, void **args
)
3007 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3008 size_t tgt_align
= 0, tgt_size
= 0;
3009 bool fpc_done
= false;
3011 /* Obtain the original TEAMS and THREADS values from ARGS. */
3012 intptr_t orig_teams
= 1, orig_threads
= 0;
3013 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
3014 void **tmpargs
= args
;
3017 intptr_t id
= (intptr_t) *tmpargs
++, val
;
3018 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3020 val
= (intptr_t) *tmpargs
++;
3025 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
3029 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
3031 val
= val
> INT_MAX
? INT_MAX
: val
;
3032 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
3037 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
3044 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
3045 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3046 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3047 value could not be determined. No change.
3048 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3049 Set device-specific value.
3050 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3052 if (orig_teams
== -2)
3054 else if (orig_teams
== 0)
3056 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3058 new_teams
= item
->icvs
.nteams
;
3060 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3061 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3062 e.g. a THREAD_LIMIT clause. */
3063 if (orig_teams
> -2 && orig_threads
== 0)
3065 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3067 new_threads
= item
->icvs
.teams_thread_limit
;
3070 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3072 void **new_args
= args
;
3073 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3075 size_t tms_len
= (orig_teams
== new_teams
3077 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3079 size_t ths_len
= (orig_threads
== new_threads
3081 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3083 /* One additional item after the last arg must be NULL. */
3084 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3086 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3089 void **tmp_new_args
= new_args
;
3090 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3091 too if they have not been changed and skipped otherwise. */
3094 intptr_t id
= (intptr_t) *tmpargs
;
3095 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3096 && orig_teams
!= new_teams
)
3097 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3098 && orig_threads
!= new_threads
))
3101 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3106 *tmp_new_args
++ = *tmpargs
++;
3107 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3108 *tmp_new_args
++ = *tmpargs
++;
3112 /* Add the new TEAMS arg to the new args list if it has been changed. */
3113 if (orig_teams
!= new_teams
)
3115 intptr_t new_val
= new_teams
;
3118 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3119 | GOMP_TARGET_ARG_NUM_TEAMS
;
3120 *tmp_new_args
++ = (void *) new_val
;
3124 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3125 | GOMP_TARGET_ARG_NUM_TEAMS
);
3126 *tmp_new_args
++ = (void *) new_val
;
3130 /* Add the new THREADS arg to the new args list if it has been changed. */
3131 if (orig_threads
!= new_threads
)
3133 intptr_t new_val
= new_threads
;
3136 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3137 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3138 *tmp_new_args
++ = (void *) new_val
;
3142 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3143 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3144 *tmp_new_args
++ = (void *) new_val
;
3148 *tmp_new_args
= NULL
;
3151 flags
= clear_unsupported_flags (devicep
, flags
);
3153 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3155 struct gomp_thread
*thr
= gomp_thread ();
3156 /* Create a team if we don't have any around, as nowait
3157 target tasks make sense to run asynchronously even when
3158 outside of any parallel. */
3159 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3161 struct gomp_team
*team
= gomp_new_team (1);
3162 struct gomp_task
*task
= thr
->task
;
3163 struct gomp_task
**implicit_task
= &task
;
3164 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3165 team
->prev_ts
= thr
->ts
;
3166 thr
->ts
.team
= team
;
3167 thr
->ts
.team_id
= 0;
3168 thr
->ts
.work_share
= &team
->work_shares
[0];
3169 thr
->ts
.last_work_share
= NULL
;
3170 #ifdef HAVE_SYNC_BUILTINS
3171 thr
->ts
.single_count
= 0;
3173 thr
->ts
.static_trip
= 0;
3174 thr
->task
= &team
->implicit_task
[0];
3175 gomp_init_task (thr
->task
, NULL
, icv
);
3176 while (*implicit_task
3177 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3178 implicit_task
= &(*implicit_task
)->parent
;
3181 thr
->task
= *implicit_task
;
3183 free (*implicit_task
);
3184 thr
->task
= &team
->implicit_task
[0];
3187 pthread_setspecific (gomp_thread_destructor
, thr
);
3188 if (implicit_task
!= &task
)
3190 *implicit_task
= thr
->task
;
3195 && !thr
->task
->final_task
)
3197 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3198 sizes
, kinds
, flags
, depend
, new_args
,
3199 GOMP_TARGET_TASK_BEFORE_MAP
);
3204 /* If there are depend clauses, but nowait is not present
3205 (or we are in a final task), block the parent task until the
3206 dependencies are resolved and then just continue with the rest
3207 of the function as if it is a merged task. */
3210 struct gomp_thread
*thr
= gomp_thread ();
3211 if (thr
->task
&& thr
->task
->depend_hash
)
3213 /* If we might need to wait, copy firstprivate now. */
3214 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3215 &tgt_align
, &tgt_size
);
3218 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3219 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3220 tgt_align
, tgt_size
);
3223 gomp_task_maybe_wait_for_dependencies (depend
);
3229 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3230 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3231 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3235 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3236 &tgt_align
, &tgt_size
);
3239 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3240 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3241 tgt_align
, tgt_size
);
3244 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3248 struct target_mem_desc
*tgt_vars
;
3249 htab_t refcount_set
= NULL
;
3251 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3255 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3256 &tgt_align
, &tgt_size
);
3259 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3260 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3261 tgt_align
, tgt_size
);
3268 refcount_set
= htab_create (mapnum
);
3269 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3270 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3272 devicep
->run_func (devicep
->target_id
, fn_addr
,
3273 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3277 htab_clear (refcount_set
);
3278 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3281 htab_free (refcount_set
);
3283 /* Copy back ICVs from device to host.
3284 HOST_PTR is expected to exist since it was added in
3285 gomp_load_image_to_device if not already available. */
3286 gomp_copy_back_icvs (devicep
, device
);
3291 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3292 keeping track of all variable handling - assuming that reverse offload occurs
3293 ony very rarely. Downside is that the reverse search is slow. */
3295 struct gomp_splay_tree_rev_lookup_data
{
3296 uintptr_t tgt_start
;
3302 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3304 struct gomp_splay_tree_rev_lookup_data
*data
;
3305 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3306 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3308 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3312 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3313 if (key
->tgt
->list
[j
].key
== key
)
3315 assert (j
< key
->tgt
->list_count
);
3316 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3318 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3319 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3327 static inline splay_tree_key
3328 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3331 struct gomp_splay_tree_rev_lookup_data data
;
3333 data
.tgt_start
= tgt_start
;
3334 data
.tgt_end
= tgt_end
;
3336 if (tgt_start
!= tgt_end
)
3338 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3343 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3344 if (data
.key
!= NULL
|| zero_len
)
3349 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3356 bool present
, aligned
;
3360 /* Search just mapped reverse-offload data; returns index if found,
3364 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3365 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3366 uint64_t tgt_start
, uint64_t tgt_end
)
3368 const bool short_mapkind
= true;
3369 const int typemask
= short_mapkind
? 0xff : 0x7;
3371 for (i
= 0; i
< n
; i
++)
3373 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3374 == GOMP_MAP_STRUCT
);
3377 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3380 if (i
+ sizes
[i
] < n
)
3381 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3383 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3385 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3386 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3395 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3396 unsigned short *kinds
, uint64_t *sizes
,
3397 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3401 if (tgt_start
!= tgt_end
)
3402 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3403 tgt_start
, tgt_end
);
3405 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3406 tgt_start
, tgt_end
);
3407 if (i
< n
|| zero_len
)
3412 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3413 tgt_start
, tgt_end
);
3416 /* Handle reverse offload. This is called by the device plugins for a
3417 reverse offload; it is not called if the outer target runs on the host.
3418 The mapping is simplified device-affecting constructs (except for target
3419 with device(ancestor:1)) must not be encountered; in particular not
3420 target (enter/exit) data. */
3423 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3424 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3425 struct goacc_asyncqueue
*aq
)
3427 /* Return early if there is no offload code. */
3428 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3430 /* Currently, this fails because of calculate_firstprivate_requirements
3431 below; it could be fixed but additional code needs to be updated to
3432 handle 32bit hosts - thus, it is not worthwhile. */
3433 if (sizeof (void *) != sizeof (uint64_t))
3434 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3436 struct cpy_data
*cdata
= NULL
;
3439 unsigned short *kinds
;
3440 const bool short_mapkind
= true;
3441 const int typemask
= short_mapkind
? 0xff : 0x7;
3442 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3444 reverse_splay_tree_key n
;
3445 struct reverse_splay_tree_key_s k
;
3448 gomp_mutex_lock (&devicep
->lock
);
3449 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3450 gomp_mutex_unlock (&devicep
->lock
);
3453 gomp_fatal ("Cannot find reverse-offload function");
3454 void (*host_fn
) (void *) = (void (*) (void *)) n
->k
->host_start
;
3456 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3458 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3459 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3460 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3464 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3465 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3466 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3467 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3468 (const void *) (uintptr_t) devaddrs_ptr
,
3469 mapnum
* sizeof (uint64_t));
3470 gomp_copy_dev2host (devicep
, aq
, sizes
,
3471 (const void *) (uintptr_t) sizes_ptr
,
3472 mapnum
* sizeof (uint64_t));
3473 gomp_copy_dev2host (devicep
, aq
, kinds
,
3474 (const void *) (uintptr_t) kinds_ptr
,
3475 mapnum
* sizeof (unsigned short));
3476 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3477 exit (EXIT_FAILURE
);
3480 size_t tgt_align
= 0, tgt_size
= 0;
3482 /* If actually executed on 32bit systems, the casts lead to wrong code;
3483 but 32bit with offloading is not supported; see top of this function. */
3484 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3485 (void *) (uintptr_t) kinds
,
3486 &tgt_align
, &tgt_size
);
3490 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3491 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3493 tgt
+= tgt_align
- al
;
3495 for (uint64_t i
= 0; i
< mapnum
; i
++)
3496 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3497 && devaddrs
[i
] != 0)
3499 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3500 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3501 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3502 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3506 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3507 (void *) (uintptr_t) devaddrs
[i
],
3509 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3510 exit (EXIT_FAILURE
);
3512 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3513 tgt_size
= tgt_size
+ sizes
[i
];
3514 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3516 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3517 == GOMP_MAP_ATTACH
))
3519 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3520 = (uint64_t) devaddrs
[i
];
3526 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3528 size_t j
, struct_cpy
= 0;
3530 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3531 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3532 gomp_mutex_lock (&devicep
->lock
);
3533 for (uint64_t i
= 0; i
< mapnum
; i
++)
3535 if (devaddrs
[i
] == 0)
3538 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3541 case GOMP_MAP_FIRSTPRIVATE
:
3542 case GOMP_MAP_FIRSTPRIVATE_INT
:
3545 case GOMP_MAP_DELETE
:
3546 case GOMP_MAP_RELEASE
:
3547 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3548 /* Assume it is present; look it up - but ignore unless the
3549 present clause is there. */
3550 case GOMP_MAP_ALLOC
:
3552 case GOMP_MAP_FORCE_ALLOC
:
3553 case GOMP_MAP_FORCE_FROM
:
3554 case GOMP_MAP_ALWAYS_FROM
:
3556 case GOMP_MAP_TOFROM
:
3557 case GOMP_MAP_FORCE_TO
:
3558 case GOMP_MAP_FORCE_TOFROM
:
3559 case GOMP_MAP_ALWAYS_TO
:
3560 case GOMP_MAP_ALWAYS_TOFROM
:
3561 case GOMP_MAP_FORCE_PRESENT
:
3562 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3563 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3564 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3565 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3566 cdata
[i
].devaddr
= devaddrs
[i
];
3567 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3568 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3569 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3571 devaddrs
[i
] + sizes
[i
], zero_len
);
3575 cdata
[i
].present
= true;
3576 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3580 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3582 devaddrs
[i
] + sizes
[i
], zero_len
);
3583 cdata
[i
].present
= n2
!= NULL
;
3585 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3587 gomp_mutex_unlock (&devicep
->lock
);
3588 #ifdef HAVE_INTTYPES_H
3589 gomp_fatal ("present clause: no corresponding data on "
3590 "parent device at %p with size %"PRIu64
,
3591 (void *) (uintptr_t) devaddrs
[i
],
3592 (uint64_t) sizes
[i
]);
3594 gomp_fatal ("present clause: no corresponding data on "
3595 "parent device at %p with size %lu",
3596 (void *) (uintptr_t) devaddrs
[i
],
3597 (unsigned long) sizes
[i
]);
3601 else if (!cdata
[i
].present
3602 && kind
!= GOMP_MAP_DELETE
3603 && kind
!= GOMP_MAP_RELEASE
3604 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3606 cdata
[i
].aligned
= true;
3607 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3609 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3612 else if (n2
!= NULL
)
3613 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3614 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3615 if (((!cdata
[i
].present
|| struct_cpy
)
3616 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3617 || kind
== GOMP_MAP_FORCE_TO
3618 || kind
== GOMP_MAP_FORCE_TOFROM
3619 || GOMP_MAP_ALWAYS_TO_P (kind
))
3621 gomp_copy_dev2host (devicep
, aq
,
3622 (void *) (uintptr_t) devaddrs
[i
],
3623 (void *) (uintptr_t) cdata
[i
].devaddr
,
3625 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3627 gomp_mutex_unlock (&devicep
->lock
);
3628 exit (EXIT_FAILURE
);
3634 case GOMP_MAP_ATTACH
:
3635 case GOMP_MAP_POINTER
:
3636 case GOMP_MAP_ALWAYS_POINTER
:
3637 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3638 devaddrs
[i
] + sizes
[i
],
3639 devaddrs
[i
] + sizes
[i
]
3640 + sizeof (void*), false);
3641 cdata
[i
].present
= n2
!= NULL
;
3642 cdata
[i
].devaddr
= devaddrs
[i
];
3644 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3645 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3648 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3649 devaddrs
[i
] + sizes
[i
],
3650 devaddrs
[i
] + sizes
[i
]
3651 + sizeof (void*), false);
3654 cdata
[i
].present
= true;
3655 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3656 - cdata
[j
].devaddr
);
3659 if (!cdata
[i
].present
)
3660 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3661 /* Assume that when present, the pointer is already correct. */
3663 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3666 case GOMP_MAP_TO_PSET
:
3667 /* Assume that when present, the pointers are fine and no 'to:'
3669 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3670 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3672 cdata
[i
].present
= n2
!= NULL
;
3673 cdata
[i
].devaddr
= devaddrs
[i
];
3675 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3676 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3679 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3681 devaddrs
[i
] + sizes
[i
], false);
3684 cdata
[i
].present
= true;
3685 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3686 - cdata
[j
].devaddr
);
3689 if (!cdata
[i
].present
)
3691 cdata
[i
].aligned
= true;
3692 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3694 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3696 gomp_copy_dev2host (devicep
, aq
,
3697 (void *) (uintptr_t) devaddrs
[i
],
3698 (void *) (uintptr_t) cdata
[i
].devaddr
,
3700 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3702 gomp_mutex_unlock (&devicep
->lock
);
3703 exit (EXIT_FAILURE
);
3706 for (j
= i
+ 1; j
< mapnum
; j
++)
3708 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3709 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3710 && !GOMP_MAP_POINTER_P (kind
))
3712 if (devaddrs
[j
] < devaddrs
[i
])
3714 if (cdata
[i
].present
)
3716 if (devaddrs
[j
] == 0)
3718 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3723 /* Dereference devaddrs[j] to get the device addr. */
3724 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3725 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3727 cdata
[j
].present
= true;
3728 cdata
[j
].devaddr
= devaddrs
[j
];
3729 if (devaddrs
[j
] == 0)
3731 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3733 devaddrs
[j
] + sizeof (void*),
3736 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3737 - cdata
[k
].devaddr
);
3740 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3742 devaddrs
[j
] + sizeof (void*),
3746 gomp_mutex_unlock (&devicep
->lock
);
3747 gomp_fatal ("Pointer target wasn't mapped");
3749 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3750 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3752 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3753 = (void *) (uintptr_t) devaddrs
[j
];
3757 case GOMP_MAP_STRUCT
:
3758 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3759 devaddrs
[i
+ sizes
[i
]]
3760 + sizes
[i
+ sizes
[i
]], false);
3761 cdata
[i
].present
= n2
!= NULL
;
3762 cdata
[i
].devaddr
= devaddrs
[i
];
3763 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3766 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3768 + sizes
[i
+ sizes
[i
]]);
3769 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3770 cdata
[i
].aligned
= true;
3771 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3772 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3775 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3776 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3779 gomp_mutex_unlock (&devicep
->lock
);
3780 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3783 gomp_mutex_unlock (&devicep
->lock
);
3788 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3790 uint64_t struct_cpy
= 0;
3791 bool clean_struct
= false;
3792 for (uint64_t i
= 0; i
< mapnum
; i
++)
3794 if (cdata
[i
].devaddr
== 0)
3796 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3797 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3800 case GOMP_MAP_FORCE_FROM
:
3801 case GOMP_MAP_FORCE_TOFROM
:
3802 case GOMP_MAP_ALWAYS_FROM
:
3803 case GOMP_MAP_ALWAYS_TOFROM
:
3804 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3805 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3809 case GOMP_MAP_TOFROM
:
3812 gomp_copy_host2dev (devicep
, aq
,
3813 (void *) (uintptr_t) cdata
[i
].devaddr
,
3814 (void *) (uintptr_t) devaddrs
[i
],
3815 sizes
[i
], false, NULL
);
3816 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3817 exit (EXIT_FAILURE
);
3827 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3829 clean_struct
= true;
3830 struct_cpy
= sizes
[i
];
3832 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3833 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3834 else if (!cdata
[i
].present
)
3835 free ((void *) (uintptr_t) devaddrs
[i
]);
3838 for (uint64_t i
= 0; i
< mapnum
; i
++)
3839 if (!cdata
[i
].present
3840 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3841 == GOMP_MAP_STRUCT
))
3843 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3844 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3853 /* Host fallback for GOMP_target_data{,_ext} routines. */
3856 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3858 struct gomp_task_icv
*icv
= gomp_icv (false);
3860 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3862 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3863 "be used for offloading");
3865 if (icv
->target_data
)
3867 /* Even when doing a host fallback, if there are any active
3868 #pragma omp target data constructs, need to remember the
3869 new #pragma omp target data, otherwise GOMP_target_end_data
3870 would get out of sync. */
3871 struct target_mem_desc
*tgt
3872 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3873 NULL
, GOMP_MAP_VARS_DATA
);
3874 tgt
->prev
= icv
->target_data
;
3875 icv
->target_data
= tgt
;
3880 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3881 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3883 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3886 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3887 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3888 return gomp_target_data_fallback (devicep
);
3890 struct target_mem_desc
*tgt
3891 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3892 NULL
, GOMP_MAP_VARS_DATA
);
3893 struct gomp_task_icv
*icv
= gomp_icv (true);
3894 tgt
->prev
= icv
->target_data
;
3895 icv
->target_data
= tgt
;
3899 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3900 size_t *sizes
, unsigned short *kinds
)
3902 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3905 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3906 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3907 return gomp_target_data_fallback (devicep
);
3909 struct target_mem_desc
*tgt
3910 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3911 NULL
, GOMP_MAP_VARS_DATA
);
3912 struct gomp_task_icv
*icv
= gomp_icv (true);
3913 tgt
->prev
= icv
->target_data
;
3914 icv
->target_data
= tgt
;
3918 GOMP_target_end_data (void)
3920 struct gomp_task_icv
*icv
= gomp_icv (false);
3921 if (icv
->target_data
)
3923 struct target_mem_desc
*tgt
= icv
->target_data
;
3924 icv
->target_data
= tgt
->prev
;
3925 gomp_unmap_vars (tgt
, true, NULL
);
3930 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3931 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3933 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3936 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3937 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3940 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3944 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3945 size_t *sizes
, unsigned short *kinds
,
3946 unsigned int flags
, void **depend
)
3948 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3950 /* If there are depend clauses, but nowait is not present,
3951 block the parent task until the dependencies are resolved
3952 and then just continue with the rest of the function as if it
3953 is a merged task. Until we are able to schedule task during
3954 variable mapping or unmapping, ignore nowait if depend clauses
3958 struct gomp_thread
*thr
= gomp_thread ();
3959 if (thr
->task
&& thr
->task
->depend_hash
)
3961 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3963 && !thr
->task
->final_task
)
3965 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3966 mapnum
, hostaddrs
, sizes
, kinds
,
3967 flags
| GOMP_TARGET_FLAG_UPDATE
,
3968 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3973 struct gomp_team
*team
= thr
->ts
.team
;
3974 /* If parallel or taskgroup has been cancelled, don't start new
3976 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3978 if (gomp_team_barrier_cancelled (&team
->barrier
))
3980 if (thr
->task
->taskgroup
)
3982 if (thr
->task
->taskgroup
->cancelled
)
3984 if (thr
->task
->taskgroup
->workshare
3985 && thr
->task
->taskgroup
->prev
3986 && thr
->task
->taskgroup
->prev
->cancelled
)
3991 gomp_task_maybe_wait_for_dependencies (depend
);
3997 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3998 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4001 struct gomp_thread
*thr
= gomp_thread ();
4002 struct gomp_team
*team
= thr
->ts
.team
;
4003 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4004 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4006 if (gomp_team_barrier_cancelled (&team
->barrier
))
4008 if (thr
->task
->taskgroup
)
4010 if (thr
->task
->taskgroup
->cancelled
)
4012 if (thr
->task
->taskgroup
->workshare
4013 && thr
->task
->taskgroup
->prev
4014 && thr
->task
->taskgroup
->prev
->cancelled
)
4019 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
4023 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
4024 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
4025 htab_t
*refcount_set
)
4027 const int typemask
= 0xff;
4029 gomp_mutex_lock (&devicep
->lock
);
4030 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
4032 gomp_mutex_unlock (&devicep
->lock
);
4036 for (i
= 0; i
< mapnum
; i
++)
4037 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
4039 struct splay_tree_key_s cur_node
;
4040 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4041 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
4042 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4045 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
4050 splay_tree_key remove_vars
[mapnum
];
4052 for (i
= 0; i
< mapnum
; i
++)
4054 struct splay_tree_key_s cur_node
;
4055 unsigned char kind
= kinds
[i
] & typemask
;
4059 case GOMP_MAP_ALWAYS_FROM
:
4060 case GOMP_MAP_DELETE
:
4061 case GOMP_MAP_RELEASE
:
4062 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4063 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
4064 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4065 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
4066 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4067 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
4068 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
4069 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4073 bool delete_p
= (kind
== GOMP_MAP_DELETE
4074 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4075 bool do_copy
, do_remove
;
4076 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4079 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4080 || kind
== GOMP_MAP_ALWAYS_FROM
)
4082 if (k
->aux
&& k
->aux
->attach_count
)
4084 /* We have to be careful not to overwrite still attached
4085 pointers during the copyback to host. */
4086 uintptr_t addr
= k
->host_start
;
4087 while (addr
< k
->host_end
)
4089 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4090 if (k
->aux
->attach_count
[i
] == 0)
4091 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4092 (void *) (k
->tgt
->tgt_start
4094 + addr
- k
->host_start
),
4096 addr
+= sizeof (void *);
4100 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4101 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4102 + cur_node
.host_start
4104 cur_node
.host_end
- cur_node
.host_start
);
4107 /* Structure elements lists are removed altogether at once, which
4108 may cause immediate deallocation of the target_mem_desc, causing
4109 errors if we still have following element siblings to copy back.
4110 While we're at it, it also seems more disciplined to simply
4111 queue all removals together for processing below.
4113 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4114 not have this problem, since they maintain an additional
4115 tgt->refcount = 1 reference to the target_mem_desc to start with.
4118 remove_vars
[nrmvars
++] = k
;
4121 case GOMP_MAP_DETACH
:
4124 gomp_mutex_unlock (&devicep
->lock
);
4125 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4130 for (int i
= 0; i
< nrmvars
; i
++)
4131 gomp_remove_var (devicep
, remove_vars
[i
]);
4133 gomp_mutex_unlock (&devicep
->lock
);
4137 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4138 size_t *sizes
, unsigned short *kinds
,
4139 unsigned int flags
, void **depend
)
4141 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4143 /* If there are depend clauses, but nowait is not present,
4144 block the parent task until the dependencies are resolved
4145 and then just continue with the rest of the function as if it
4146 is a merged task. Until we are able to schedule task during
4147 variable mapping or unmapping, ignore nowait if depend clauses
4151 struct gomp_thread
*thr
= gomp_thread ();
4152 if (thr
->task
&& thr
->task
->depend_hash
)
4154 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4156 && !thr
->task
->final_task
)
4158 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4159 mapnum
, hostaddrs
, sizes
, kinds
,
4160 flags
, depend
, NULL
,
4161 GOMP_TARGET_TASK_DATA
))
4166 struct gomp_team
*team
= thr
->ts
.team
;
4167 /* If parallel or taskgroup has been cancelled, don't start new
4169 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4171 if (gomp_team_barrier_cancelled (&team
->barrier
))
4173 if (thr
->task
->taskgroup
)
4175 if (thr
->task
->taskgroup
->cancelled
)
4177 if (thr
->task
->taskgroup
->workshare
4178 && thr
->task
->taskgroup
->prev
4179 && thr
->task
->taskgroup
->prev
->cancelled
)
4184 gomp_task_maybe_wait_for_dependencies (depend
);
4190 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4191 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4194 struct gomp_thread
*thr
= gomp_thread ();
4195 struct gomp_team
*team
= thr
->ts
.team
;
4196 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4197 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4199 if (gomp_team_barrier_cancelled (&team
->barrier
))
4201 if (thr
->task
->taskgroup
)
4203 if (thr
->task
->taskgroup
->cancelled
)
4205 if (thr
->task
->taskgroup
->workshare
4206 && thr
->task
->taskgroup
->prev
4207 && thr
->task
->taskgroup
->prev
->cancelled
)
4212 htab_t refcount_set
= htab_create (mapnum
);
4214 /* The variables are mapped separately such that they can be released
4217 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4218 for (i
= 0; i
< mapnum
; i
++)
4219 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
4220 || (kinds
[i
] & 0xff) == GOMP_MAP_STRUCT_UNORD
)
4222 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4223 &kinds
[i
], true, &refcount_set
,
4224 GOMP_MAP_VARS_ENTER_DATA
);
4227 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4229 for (j
= i
+ 1; j
< mapnum
; j
++)
4230 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4231 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4233 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4234 &kinds
[i
], true, &refcount_set
,
4235 GOMP_MAP_VARS_ENTER_DATA
);
4238 else if (i
+ 1 < mapnum
4239 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4240 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4241 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4243 /* An attach operation must be processed together with the mapped
4244 base-pointer list item. */
4245 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4246 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4250 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4251 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4253 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4254 htab_free (refcount_set
);
4258 gomp_target_task_fn (void *data
)
4260 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4261 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4263 if (ttask
->fn
!= NULL
)
4267 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4268 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4269 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4271 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4272 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4277 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4280 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4284 void *actual_arguments
;
4285 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4288 actual_arguments
= ttask
->hostaddrs
;
4292 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4293 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4294 NULL
, GOMP_MAP_VARS_TARGET
);
4295 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4297 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4299 assert (devicep
->async_run_func
);
4300 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4301 ttask
->args
, (void *) ttask
);
4304 else if (devicep
== NULL
4305 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4306 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4310 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4311 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4312 ttask
->kinds
, true);
4315 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4316 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4317 for (i
= 0; i
< ttask
->mapnum
; i
++)
4318 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
4319 || (ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT_UNORD
)
4321 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4322 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4323 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4324 i
+= ttask
->sizes
[i
];
4327 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4328 &ttask
->kinds
[i
], true, &refcount_set
,
4329 GOMP_MAP_VARS_ENTER_DATA
);
4331 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4332 ttask
->kinds
, &refcount_set
);
4333 htab_free (refcount_set
);
4339 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4343 struct gomp_task_icv
*icv
= gomp_icv (true);
4344 icv
->thread_limit_var
4345 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4351 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4352 unsigned int thread_limit
, bool first
)
4354 struct gomp_thread
*thr
= gomp_thread ();
4359 struct gomp_task_icv
*icv
= gomp_icv (true);
4360 icv
->thread_limit_var
4361 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4363 (void) num_teams_high
;
4364 if (num_teams_low
== 0)
4366 thr
->num_teams
= num_teams_low
- 1;
4369 else if (thr
->team_num
== thr
->num_teams
)
4377 omp_target_alloc (size_t size
, int device_num
)
4379 if (device_num
== omp_initial_device
4380 || device_num
== gomp_get_num_devices ())
4381 return malloc (size
);
4383 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4384 if (devicep
== NULL
)
4387 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4388 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4389 return malloc (size
);
4391 gomp_mutex_lock (&devicep
->lock
);
4392 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4393 gomp_mutex_unlock (&devicep
->lock
);
4398 omp_target_free (void *device_ptr
, int device_num
)
4400 if (device_num
== omp_initial_device
4401 || device_num
== gomp_get_num_devices ())
4407 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4408 if (devicep
== NULL
|| device_ptr
== NULL
)
4411 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4412 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4418 gomp_mutex_lock (&devicep
->lock
);
4419 gomp_free_device_memory (devicep
, device_ptr
);
4420 gomp_mutex_unlock (&devicep
->lock
);
4424 omp_target_is_present (const void *ptr
, int device_num
)
4426 if (device_num
== omp_initial_device
4427 || device_num
== gomp_get_num_devices ())
4430 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4431 if (devicep
== NULL
)
4437 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4438 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4441 gomp_mutex_lock (&devicep
->lock
);
4442 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4443 struct splay_tree_key_s cur_node
;
4445 cur_node
.host_start
= (uintptr_t) ptr
;
4446 cur_node
.host_end
= cur_node
.host_start
;
4447 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4448 int ret
= n
!= NULL
;
4449 gomp_mutex_unlock (&devicep
->lock
);
4454 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4455 struct gomp_device_descr
**dst_devicep
,
4456 struct gomp_device_descr
**src_devicep
)
4458 if (dst_device_num
!= gomp_get_num_devices ()
4459 /* Above gomp_get_num_devices has to be called unconditionally. */
4460 && dst_device_num
!= omp_initial_device
)
4462 *dst_devicep
= resolve_device (dst_device_num
, false);
4463 if (*dst_devicep
== NULL
)
4466 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4467 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4468 *dst_devicep
= NULL
;
4471 if (src_device_num
!= num_devices_openmp
4472 && src_device_num
!= omp_initial_device
)
4474 *src_devicep
= resolve_device (src_device_num
, false);
4475 if (*src_devicep
== NULL
)
4478 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4479 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4480 *src_devicep
= NULL
;
4487 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4488 size_t dst_offset
, size_t src_offset
,
4489 struct gomp_device_descr
*dst_devicep
,
4490 struct gomp_device_descr
*src_devicep
)
4493 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4495 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4498 if (src_devicep
== NULL
)
4500 gomp_mutex_lock (&dst_devicep
->lock
);
4501 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4502 (char *) dst
+ dst_offset
,
4503 (char *) src
+ src_offset
, length
);
4504 gomp_mutex_unlock (&dst_devicep
->lock
);
4505 return (ret
? 0 : EINVAL
);
4507 if (dst_devicep
== NULL
)
4509 gomp_mutex_lock (&src_devicep
->lock
);
4510 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4511 (char *) dst
+ dst_offset
,
4512 (char *) src
+ src_offset
, length
);
4513 gomp_mutex_unlock (&src_devicep
->lock
);
4514 return (ret
? 0 : EINVAL
);
4516 if (src_devicep
== dst_devicep
)
4518 gomp_mutex_lock (&src_devicep
->lock
);
4519 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4520 (char *) dst
+ dst_offset
,
4521 (char *) src
+ src_offset
, length
);
4522 gomp_mutex_unlock (&src_devicep
->lock
);
4523 return (ret
? 0 : EINVAL
);
4529 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4530 size_t src_offset
, int dst_device_num
, int src_device_num
)
4532 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4533 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4534 &dst_devicep
, &src_devicep
);
4539 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4540 dst_devicep
, src_devicep
);
4552 struct gomp_device_descr
*dst_devicep
;
4553 struct gomp_device_descr
*src_devicep
;
4554 } omp_target_memcpy_data
;
4557 omp_target_memcpy_async_helper (void *args
)
4559 omp_target_memcpy_data
*a
= args
;
4560 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4561 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4562 gomp_fatal ("omp_target_memcpy failed");
4566 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4567 size_t dst_offset
, size_t src_offset
,
4568 int dst_device_num
, int src_device_num
,
4569 int depobj_count
, omp_depend_t
*depobj_list
)
4571 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4572 unsigned int flags
= 0;
4573 void *depend
[depobj_count
+ 5];
4575 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4576 &dst_devicep
, &src_devicep
);
4578 omp_target_memcpy_data s
= {
4582 .dst_offset
= dst_offset
,
4583 .src_offset
= src_offset
,
4584 .dst_devicep
= dst_devicep
,
4585 .src_devicep
= src_devicep
4591 if (depobj_count
> 0 && depobj_list
!= NULL
)
4593 flags
|= GOMP_TASK_FLAG_DEPEND
;
4595 depend
[1] = (void *) (uintptr_t) depobj_count
;
4596 depend
[2] = depend
[3] = depend
[4] = 0;
4597 for (i
= 0; i
< depobj_count
; ++i
)
4598 depend
[i
+ 5] = &depobj_list
[i
];
4601 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4602 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4608 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4609 int num_dims
, const size_t *volume
,
4610 const size_t *dst_offsets
,
4611 const size_t *src_offsets
,
4612 const size_t *dst_dimensions
,
4613 const size_t *src_dimensions
,
4614 struct gomp_device_descr
*dst_devicep
,
4615 struct gomp_device_descr
*src_devicep
,
4616 size_t *tmp_size
, void **tmp
)
4618 size_t dst_slice
= element_size
;
4619 size_t src_slice
= element_size
;
4620 size_t j
, dst_off
, src_off
, length
;
4625 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4626 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4627 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4629 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4631 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4635 else if (src_devicep
== NULL
)
4636 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4637 (char *) dst
+ dst_off
,
4638 (const char *) src
+ src_off
,
4640 else if (dst_devicep
== NULL
)
4641 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4642 (char *) dst
+ dst_off
,
4643 (const char *) src
+ src_off
,
4645 else if (src_devicep
== dst_devicep
)
4646 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4647 (char *) dst
+ dst_off
,
4648 (const char *) src
+ src_off
,
4655 *tmp
= malloc (length
);
4659 else if (*tmp_size
< length
)
4663 *tmp
= malloc (length
);
4667 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
, *tmp
,
4668 (const char *) src
+ src_off
,
4671 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4672 (char *) dst
+ dst_off
, *tmp
,
4675 return ret
? 0 : EINVAL
;
4678 /* host->device, device->host and intra device. */
4681 && src_devicep
== dst_devicep
4682 && src_devicep
->memcpy2d_func
)
4683 || (!src_devicep
!= !dst_devicep
4684 && ((src_devicep
&& src_devicep
->memcpy2d_func
)
4685 || (dst_devicep
&& dst_devicep
->memcpy2d_func
)))))
4687 size_t vol_sz1
, dst_sz1
, src_sz1
, dst_off_sz1
, src_off_sz1
;
4688 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4689 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4690 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4692 if (__builtin_mul_overflow (volume
[1], element_size
, &vol_sz1
)
4693 || __builtin_mul_overflow (dst_dimensions
[1], element_size
, &dst_sz1
)
4694 || __builtin_mul_overflow (src_dimensions
[1], element_size
, &src_sz1
)
4695 || __builtin_mul_overflow (dst_offsets
[1], element_size
, &dst_off_sz1
)
4696 || __builtin_mul_overflow (src_offsets
[1], element_size
,
4699 ret
= devp
->memcpy2d_func (dst_id
, src_id
, vol_sz1
, volume
[0],
4700 dst
, dst_off_sz1
, dst_offsets
[0], dst_sz1
,
4701 src
, src_off_sz1
, src_offsets
[0], src_sz1
);
4703 return ret
? 0 : EINVAL
;
4705 else if (num_dims
== 3
4707 && src_devicep
== dst_devicep
4708 && src_devicep
->memcpy3d_func
)
4709 || (!src_devicep
!= !dst_devicep
4710 && ((src_devicep
&& src_devicep
->memcpy3d_func
)
4711 || (dst_devicep
&& dst_devicep
->memcpy3d_func
)))))
4713 size_t vol_sz2
, dst_sz2
, src_sz2
, dst_off_sz2
, src_off_sz2
;
4714 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4715 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4716 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4718 if (__builtin_mul_overflow (volume
[2], element_size
, &vol_sz2
)
4719 || __builtin_mul_overflow (dst_dimensions
[2], element_size
, &dst_sz2
)
4720 || __builtin_mul_overflow (src_dimensions
[2], element_size
, &src_sz2
)
4721 || __builtin_mul_overflow (dst_offsets
[2], element_size
, &dst_off_sz2
)
4722 || __builtin_mul_overflow (src_offsets
[2], element_size
,
4725 ret
= devp
->memcpy3d_func (dst_id
, src_id
, vol_sz2
, volume
[1], volume
[0],
4726 dst
, dst_off_sz2
, dst_offsets
[1],
4727 dst_offsets
[0], dst_sz2
, dst_dimensions
[1],
4728 src
, src_off_sz2
, src_offsets
[1],
4729 src_offsets
[0], src_sz2
, src_dimensions
[1]);
4731 return ret
? 0 : EINVAL
;
4734 for (i
= 1; i
< num_dims
; i
++)
4735 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4736 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4738 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4739 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4741 for (j
= 0; j
< volume
[0]; j
++)
4743 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4744 (const char *) src
+ src_off
,
4745 element_size
, num_dims
- 1,
4746 volume
+ 1, dst_offsets
+ 1,
4747 src_offsets
+ 1, dst_dimensions
+ 1,
4748 src_dimensions
+ 1, dst_devicep
,
4749 src_devicep
, tmp_size
, tmp
);
4752 dst_off
+= dst_slice
;
4753 src_off
+= src_slice
;
4759 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4761 struct gomp_device_descr
**dst_devicep
,
4762 struct gomp_device_descr
**src_devicep
)
4767 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4768 dst_devicep
, src_devicep
);
4776 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4777 size_t element_size
, int num_dims
,
4778 const size_t *volume
, const size_t *dst_offsets
,
4779 const size_t *src_offsets
,
4780 const size_t *dst_dimensions
,
4781 const size_t *src_dimensions
,
4782 struct gomp_device_descr
*dst_devicep
,
4783 struct gomp_device_descr
*src_devicep
)
4785 size_t tmp_size
= 0;
4790 lock_src
= src_devicep
!= NULL
;
4791 lock_dst
= dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
;
4793 gomp_mutex_lock (&src_devicep
->lock
);
4795 gomp_mutex_lock (&dst_devicep
->lock
);
4796 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4797 volume
, dst_offsets
, src_offsets
,
4798 dst_dimensions
, src_dimensions
,
4799 dst_devicep
, src_devicep
,
4802 gomp_mutex_unlock (&src_devicep
->lock
);
4804 gomp_mutex_unlock (&dst_devicep
->lock
);
4812 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4813 int num_dims
, const size_t *volume
,
4814 const size_t *dst_offsets
,
4815 const size_t *src_offsets
,
4816 const size_t *dst_dimensions
,
4817 const size_t *src_dimensions
,
4818 int dst_device_num
, int src_device_num
)
4820 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4822 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4823 src_device_num
, &dst_devicep
,
4829 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4830 volume
, dst_offsets
, src_offsets
,
4831 dst_dimensions
, src_dimensions
,
4832 dst_devicep
, src_devicep
);
4841 size_t element_size
;
4842 const size_t *volume
;
4843 const size_t *dst_offsets
;
4844 const size_t *src_offsets
;
4845 const size_t *dst_dimensions
;
4846 const size_t *src_dimensions
;
4847 struct gomp_device_descr
*dst_devicep
;
4848 struct gomp_device_descr
*src_devicep
;
4850 } omp_target_memcpy_rect_data
;
4853 omp_target_memcpy_rect_async_helper (void *args
)
4855 omp_target_memcpy_rect_data
*a
= args
;
4856 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4857 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4858 a
->src_offsets
, a
->dst_dimensions
,
4859 a
->src_dimensions
, a
->dst_devicep
,
4862 gomp_fatal ("omp_target_memcpy_rect failed");
4866 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4867 int num_dims
, const size_t *volume
,
4868 const size_t *dst_offsets
,
4869 const size_t *src_offsets
,
4870 const size_t *dst_dimensions
,
4871 const size_t *src_dimensions
,
4872 int dst_device_num
, int src_device_num
,
4873 int depobj_count
, omp_depend_t
*depobj_list
)
4875 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4877 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4878 src_device_num
, &dst_devicep
,
4880 void *depend
[depobj_count
+ 5];
4883 omp_target_memcpy_rect_data s
= {
4886 .element_size
= element_size
,
4887 .num_dims
= num_dims
,
4889 .dst_offsets
= dst_offsets
,
4890 .src_offsets
= src_offsets
,
4891 .dst_dimensions
= dst_dimensions
,
4892 .src_dimensions
= src_dimensions
,
4893 .dst_devicep
= dst_devicep
,
4894 .src_devicep
= src_devicep
4900 if (depobj_count
> 0 && depobj_list
!= NULL
)
4902 flags
|= GOMP_TASK_FLAG_DEPEND
;
4904 depend
[1] = (void *) (uintptr_t) depobj_count
;
4905 depend
[2] = depend
[3] = depend
[4] = 0;
4906 for (i
= 0; i
< depobj_count
; ++i
)
4907 depend
[i
+ 5] = &depobj_list
[i
];
4910 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4911 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4917 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4918 size_t size
, size_t device_offset
, int device_num
)
4920 if (device_num
== omp_initial_device
4921 || device_num
== gomp_get_num_devices ())
4924 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4925 if (devicep
== NULL
)
4928 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4929 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4932 gomp_mutex_lock (&devicep
->lock
);
4934 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4935 struct splay_tree_key_s cur_node
;
4938 cur_node
.host_start
= (uintptr_t) host_ptr
;
4939 cur_node
.host_end
= cur_node
.host_start
+ size
;
4940 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4943 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4944 == (uintptr_t) device_ptr
+ device_offset
4945 && n
->host_start
<= cur_node
.host_start
4946 && n
->host_end
>= cur_node
.host_end
)
4951 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4952 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4956 tgt
->to_free
= NULL
;
4958 tgt
->list_count
= 0;
4959 tgt
->device_descr
= devicep
;
4960 splay_tree_node array
= tgt
->array
;
4961 splay_tree_key k
= &array
->key
;
4962 k
->host_start
= cur_node
.host_start
;
4963 k
->host_end
= cur_node
.host_end
;
4965 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4966 k
->refcount
= REFCOUNT_INFINITY
;
4967 k
->dynamic_refcount
= 0;
4970 array
->right
= NULL
;
4971 splay_tree_insert (&devicep
->mem_map
, array
);
4974 gomp_mutex_unlock (&devicep
->lock
);
4979 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4981 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4982 if (devicep
== NULL
)
4985 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4988 gomp_mutex_lock (&devicep
->lock
);
4990 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4991 struct splay_tree_key_s cur_node
;
4994 cur_node
.host_start
= (uintptr_t) ptr
;
4995 cur_node
.host_end
= cur_node
.host_start
;
4996 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4998 && n
->host_start
== cur_node
.host_start
4999 && n
->refcount
== REFCOUNT_INFINITY
5000 && n
->tgt
->tgt_start
== 0
5001 && n
->tgt
->to_free
== NULL
5002 && n
->tgt
->refcount
== 1
5003 && n
->tgt
->list_count
== 0)
5005 splay_tree_remove (&devicep
->mem_map
, n
);
5006 gomp_unmap_tgt (n
->tgt
);
5010 gomp_mutex_unlock (&devicep
->lock
);
5015 omp_get_mapped_ptr (const void *ptr
, int device_num
)
5017 if (device_num
== omp_initial_device
5018 || device_num
== omp_get_initial_device ())
5019 return (void *) ptr
;
5021 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5022 if (devicep
== NULL
)
5025 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5026 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5027 return (void *) ptr
;
5029 gomp_mutex_lock (&devicep
->lock
);
5031 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5032 struct splay_tree_key_s cur_node
;
5035 cur_node
.host_start
= (uintptr_t) ptr
;
5036 cur_node
.host_end
= cur_node
.host_start
;
5037 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
5041 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
5042 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
5045 gomp_mutex_unlock (&devicep
->lock
);
5051 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
5053 if (device_num
== omp_initial_device
5054 || device_num
== gomp_get_num_devices ())
5057 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5058 if (devicep
== NULL
)
5061 /* TODO: Unified shared memory must be handled when available. */
5063 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
5067 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
5070 if (device_num
== omp_initial_device
5071 || device_num
== gomp_get_num_devices ())
5072 return gomp_pause_host ();
5074 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5075 if (devicep
== NULL
)
5078 /* Do nothing for target devices for now. */
5083 omp_pause_resource_all (omp_pause_resource_t kind
)
5086 if (gomp_pause_host ())
5088 /* Do nothing for target devices for now. */
5092 ialias (omp_pause_resource
)
5093 ialias (omp_pause_resource_all
)
5095 #ifdef PLUGIN_SUPPORT
5097 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5099 The handles of the found functions are stored in the corresponding fields
5100 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5103 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
5104 const char *plugin_name
)
5106 const char *err
= NULL
, *last_missing
= NULL
;
5108 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
5110 #if OFFLOAD_DEFAULTED
5116 /* Check if all required functions are available in the plugin and store
5117 their handlers. None of the symbols can legitimately be NULL,
5118 so we don't need to check dlerror all the time. */
5120 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5122 /* Similar, but missing functions are not an error. Return false if
5123 failed, true otherwise. */
5124 #define DLSYM_OPT(f, n) \
5125 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5126 || (last_missing = #n, 0))
5129 if (device
->version_func () != GOMP_VERSION
)
5131 err
= "plugin version mismatch";
5138 DLSYM (get_num_devices
);
5139 DLSYM (init_device
);
5140 DLSYM (fini_device
);
5142 DLSYM (unload_image
);
5147 DLSYM_OPT (memcpy2d
, memcpy2d
);
5148 DLSYM_OPT (memcpy3d
, memcpy3d
);
5149 device
->capabilities
= device
->get_caps_func ();
5150 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5153 DLSYM_OPT (async_run
, async_run
);
5154 DLSYM_OPT (can_run
, can_run
);
5157 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5159 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
5160 || !DLSYM_OPT (openacc
.create_thread_data
,
5161 openacc_create_thread_data
)
5162 || !DLSYM_OPT (openacc
.destroy_thread_data
,
5163 openacc_destroy_thread_data
)
5164 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
5165 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
5166 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
5167 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
5168 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
5169 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5170 openacc_async_queue_callback
)
5171 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5172 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5173 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5174 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5176 /* Require all the OpenACC handlers if we have
5177 GOMP_OFFLOAD_CAP_OPENACC_200. */
5178 err
= "plugin missing OpenACC handler function";
5183 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5184 openacc_cuda_get_current_device
);
5185 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5186 openacc_cuda_get_current_context
);
5187 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5188 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5189 if (cuda
&& cuda
!= 4)
5191 /* Make sure all the CUDA functions are there if any of them are. */
5192 err
= "plugin missing OpenACC CUDA handler function";
5204 gomp_error ("while loading %s: %s", plugin_name
, err
);
5206 gomp_error ("missing function was %s", last_missing
);
5208 dlclose (plugin_handle
);
5213 /* This function finalizes all initialized devices. */
5216 gomp_target_fini (void)
5219 for (i
= 0; i
< num_devices
; i
++)
5222 struct gomp_device_descr
*devicep
= &devices
[i
];
5223 gomp_mutex_lock (&devicep
->lock
);
5224 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5225 ret
= gomp_fini_device (devicep
);
5226 gomp_mutex_unlock (&devicep
->lock
);
5228 gomp_fatal ("device finalization failed");
5232 /* This function initializes the runtime for offloading.
5233 It parses the list of offload plugins, and tries to load these.
5234 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5235 will be set, and the array DEVICES initialized, containing descriptors for
5236 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5240 gomp_target_init (void)
5242 const char *prefix
="libgomp-plugin-";
5243 const char *suffix
= SONAME_SUFFIX (1);
5244 const char *cur
, *next
;
5246 int i
, new_num_devs
;
5247 int num_devs
= 0, num_devs_openmp
;
5248 struct gomp_device_descr
*devs
= NULL
;
5250 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5253 cur
= OFFLOAD_PLUGINS
;
5257 struct gomp_device_descr current_device
;
5258 size_t prefix_len
, suffix_len
, cur_len
;
5260 next
= strchr (cur
, ',');
5262 prefix_len
= strlen (prefix
);
5263 cur_len
= next
? next
- cur
: strlen (cur
);
5264 suffix_len
= strlen (suffix
);
5266 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5273 memcpy (plugin_name
, prefix
, prefix_len
);
5274 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5275 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5277 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5279 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5280 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5281 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5284 int type
= current_device
.get_type_func ();
5285 for (int img
= 0; img
< num_offload_images
; img
++)
5286 if (type
== offload_images
[img
].type
)
5290 char buf
[sizeof ("unified_address, unified_shared_memory, "
5291 "reverse_offload")];
5292 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5293 char *name
= (char *) malloc (cur_len
+ 1);
5294 memcpy (name
, cur
, cur_len
);
5295 name
[cur_len
] = '\0';
5297 "%s devices present but 'omp requires %s' "
5298 "cannot be fulfilled\n", name
, buf
);
5302 else if (new_num_devs
>= 1)
5304 /* Augment DEVICES and NUM_DEVICES. */
5306 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5307 * sizeof (struct gomp_device_descr
));
5315 current_device
.name
= current_device
.get_name_func ();
5316 /* current_device.capabilities has already been set. */
5317 current_device
.type
= current_device
.get_type_func ();
5318 current_device
.mem_map
.root
= NULL
;
5319 current_device
.mem_map_rev
.root
= NULL
;
5320 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5321 for (i
= 0; i
< new_num_devs
; i
++)
5323 current_device
.target_id
= i
;
5324 devs
[num_devs
] = current_device
;
5325 gomp_mutex_init (&devs
[num_devs
].lock
);
5336 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5337 NUM_DEVICES_OPENMP. */
5338 struct gomp_device_descr
*devs_s
5339 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5346 num_devs_openmp
= 0;
5347 for (i
= 0; i
< num_devs
; i
++)
5348 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5349 devs_s
[num_devs_openmp
++] = devs
[i
];
5350 int num_devs_after_openmp
= num_devs_openmp
;
5351 for (i
= 0; i
< num_devs
; i
++)
5352 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5353 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5357 for (i
= 0; i
< num_devs
; i
++)
5359 /* The 'devices' array can be moved (by the realloc call) until we have
5360 found all the plugins, so registering with the OpenACC runtime (which
5361 takes a copy of the pointer argument) must be delayed until now. */
5362 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5363 goacc_register (&devs
[i
]);
5365 if (gomp_global_icv
.default_device_var
== INT_MIN
)
5367 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5368 struct gomp_icv_list
*none
;
5369 none
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX
);
5370 gomp_global_icv
.default_device_var
= (num_devs_openmp
5371 ? 0 : omp_invalid_device
);
5372 none
->icvs
.default_device_var
= gomp_global_icv
.default_device_var
;
5375 num_devices
= num_devs
;
5376 num_devices_openmp
= num_devs_openmp
;
5378 if (atexit (gomp_target_fini
) != 0)
5379 gomp_fatal ("atexit failed");
5382 #else /* PLUGIN_SUPPORT */
5383 /* If dlfcn.h is unavailable we always fallback to host execution.
5384 GOMP_target* routines are just stubs for this case. */
5386 gomp_target_init (void)
5389 #endif /* PLUGIN_SUPPORT */