1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type
;
56 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
57 static inline void htab_free (void *ptr
) { free (ptr
); }
60 ialias_redirect (GOMP_task
)
62 static inline hashval_t
63 htab_hash (hash_entry_type element
)
65 return hash_pointer ((void *) element
);
69 htab_eq (hash_entry_type x
, hash_entry_type y
)
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock
;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr
{
89 enum offload_target_type type
;
90 const void *host_table
;
91 const void *target_data
;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr
*offload_images
;
97 /* Total number of offload images. */
98 static int num_offload_images
;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr
*devices
;
103 /* Total number of available devices. */
104 static int num_devices
;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp
;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask
;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
115 gomp_realloc_unlock (void *old
, size_t size
)
117 void *ret
= realloc (old
, size
);
120 gomp_mutex_unlock (®ister_lock
);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
126 attribute_hidden
void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp
;
139 static struct gomp_device_descr
*
140 resolve_device (int device_id
, bool remapped
)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices
= gomp_get_num_devices ();
146 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
148 struct gomp_task_icv
*icv
= gomp_icv (false);
149 device_id
= icv
->default_device_var
;
155 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device
))
158 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id
== omp_invalid_device
)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
170 else if (device_id
>= num_devices
)
172 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id
!= num_devices
)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
180 gomp_mutex_lock (&devices
[device_id
].lock
);
181 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
182 gomp_init_device (&devices
[device_id
]);
183 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
185 gomp_mutex_unlock (&devices
[device_id
].lock
);
187 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
193 gomp_mutex_unlock (&devices
[device_id
].lock
);
195 return &devices
[device_id
];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
202 if (key
->host_start
!= key
->host_end
)
203 return splay_tree_lookup (mem_map
, key
);
206 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
211 n
= splay_tree_lookup (mem_map
, key
);
215 return splay_tree_lookup (mem_map
, key
);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
221 return reverse_splay_tree_lookup (mem_map_rev
, key
);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
227 if (key
->host_start
!= key
->host_end
)
228 return splay_tree_lookup (mem_map
, key
);
231 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
237 gomp_device_copy (struct gomp_device_descr
*devicep
,
238 bool (*copy_func
) (int, void *, const void *, size_t),
239 const char *dst
, void *dstaddr
,
240 const char *src
, const void *srcaddr
,
243 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
245 gomp_mutex_unlock (&devicep
->lock
);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
252 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
253 bool (*copy_func
) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue
*),
255 const char *dst
, void *dstaddr
,
256 const char *src
, const void *srcaddr
,
257 const void *srcaddr_orig
,
258 size_t size
, struct goacc_asyncqueue
*aq
)
260 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
262 gomp_mutex_unlock (&devicep
->lock
);
263 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src
, srcaddr_orig
, srcaddr_orig
+ size
,
268 src
, srcaddr
, srcaddr
+ size
,
269 dst
, dstaddr
, dstaddr
+ size
);
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src
, srcaddr
, srcaddr
+ size
,
274 dst
, dstaddr
, dstaddr
+ size
);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
292 struct target_mem_desc
*tgt
;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk
*chunks
;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
328 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
332 if (cbuf
->chunk_cnt
< 0)
334 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
336 cbuf
->chunk_cnt
= -1;
339 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
341 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf
->use_cnt
== 1)
351 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
352 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
361 gomp_to_device_kind_p (int kind
)
367 case GOMP_MAP_FORCE_ALLOC
:
368 case GOMP_MAP_FORCE_FROM
:
369 case GOMP_MAP_ALWAYS_FROM
:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
371 case GOMP_MAP_FORCE_PRESENT
:
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
383 attribute_hidden
void
384 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
385 struct goacc_asyncqueue
*aq
,
386 void *d
, const void *h
, size_t sz
,
387 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
391 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
392 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
395 long last
= cbuf
->chunk_cnt
- 1;
396 while (first
<= last
)
398 long middle
= (first
+ last
) >> 1;
399 if (cbuf
->chunks
[middle
].end
<= doff
)
401 else if (cbuf
->chunks
[middle
].start
<= doff
)
403 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
405 gomp_mutex_unlock (&devicep
->lock
);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq
!= NULL
, 0))
414 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
424 if (__builtin_expect (aq
!= NULL
, 0))
426 void *h_buf
= (void *) h
;
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
434 h_buf
= gomp_malloc (sz
);
435 memcpy (h_buf
, h
, sz
);
437 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
438 "dev", d
, "host", h_buf
, h
, sz
, aq
);
440 /* Free once the transfer has completed. */
441 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
444 gomp_device_copy (devicep
, devicep
->host2dev_func
,
445 "dev", d
, "host", h
, sz
);
448 attribute_hidden
void
449 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
450 struct goacc_asyncqueue
*aq
,
451 void *h
, const void *d
, size_t sz
)
453 if (__builtin_expect (aq
!= NULL
, 0))
454 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
455 "host", h
, "dev", d
, NULL
, sz
, aq
);
457 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
461 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
463 if (!devicep
->free_func (devicep
->target_id
, devptr
))
465 gomp_mutex_unlock (&devicep
->lock
);
466 gomp_fatal ("error in freeing device memory block at %p", devptr
);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
477 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
479 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
482 uintptr_t *refcount_ptr
= &k
->refcount
;
484 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
485 refcount_ptr
= &k
->structelem_refcount
;
486 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
487 refcount_ptr
= k
->structelem_refcount_ptr
;
491 if (htab_find (*refcount_set
, refcount_ptr
))
493 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
494 *slot
= refcount_ptr
;
501 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
502 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
503 track already seen refcounts, and only adjust the value if refcount is not
504 yet contained in the set (like gomp_increment_refcount).
506 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
507 it is already zero and we know we decremented it earlier. This signals that
508 associated maps should be copied back to host.
510 *DO_REMOVE is set to true when we this is the first handling of this refcount
511 and we are setting it to zero. This signals a removal of this key from the
514 Copy and removal are separated due to cases like handling of structure
515 elements, e.g. each map of a structure element representing a possible copy
516 out of a structure field has to be handled individually, but we only signal
517 removal for one (the first encountered) sibing map. */
520 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
521 bool *do_copy
, bool *do_remove
)
523 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
525 *do_copy
= *do_remove
= false;
529 uintptr_t *refcount_ptr
= &k
->refcount
;
531 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
532 refcount_ptr
= &k
->structelem_refcount
;
533 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
534 refcount_ptr
= k
->structelem_refcount_ptr
;
536 bool new_encountered_refcount
;
537 bool set_to_zero
= false;
538 bool is_zero
= false;
540 uintptr_t orig_refcount
= *refcount_ptr
;
544 if (htab_find (*refcount_set
, refcount_ptr
))
546 new_encountered_refcount
= false;
550 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
551 *slot
= refcount_ptr
;
552 new_encountered_refcount
= true;
555 /* If no refcount_set being used, assume all keys are being decremented
556 for the first time. */
557 new_encountered_refcount
= true;
561 else if (*refcount_ptr
> 0)
565 if (*refcount_ptr
== 0)
567 if (orig_refcount
> 0)
573 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
574 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
577 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
578 gomp_map_0len_lookup found oldn for newn.
579 Helper function of gomp_map_vars. */
582 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
583 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
584 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
585 unsigned char kind
, bool always_to_flag
, bool implicit
,
586 struct gomp_coalesce_buf
*cbuf
,
587 htab_t
*refcount_set
)
589 assert (kind
!= GOMP_MAP_ATTACH
590 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
593 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
594 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
595 tgt_var
->is_attach
= false;
596 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
598 /* For implicit maps, old contained in new is valid. */
599 bool implicit_subset
= (implicit
600 && newn
->host_start
<= oldn
->host_start
601 && oldn
->host_end
<= newn
->host_end
);
603 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
605 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
607 if (GOMP_MAP_FORCE_P (kind
)
608 /* For implicit maps, old contained in new is valid. */
610 /* Otherwise, new contained inside old is considered valid. */
611 || (oldn
->host_start
<= newn
->host_start
612 && newn
->host_end
<= oldn
->host_end
)))
614 gomp_mutex_unlock (&devicep
->lock
);
615 gomp_fatal ("Trying to map into device [%p..%p) object when "
616 "[%p..%p) is already mapped",
617 (void *) newn
->host_start
, (void *) newn
->host_end
,
618 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
621 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
623 /* Implicit + always should not happen. If this does occur, below
624 address/length adjustment is a TODO. */
625 assert (!implicit_subset
);
627 if (oldn
->aux
&& oldn
->aux
->attach_count
)
629 /* We have to be careful not to overwrite still attached pointers
630 during the copyback to host. */
631 uintptr_t addr
= newn
->host_start
;
632 while (addr
< newn
->host_end
)
634 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
635 if (oldn
->aux
->attach_count
[i
] == 0)
636 gomp_copy_host2dev (devicep
, aq
,
637 (void *) (oldn
->tgt
->tgt_start
639 + addr
- oldn
->host_start
),
641 sizeof (void *), false, cbuf
);
642 addr
+= sizeof (void *);
646 gomp_copy_host2dev (devicep
, aq
,
647 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
648 + newn
->host_start
- oldn
->host_start
),
649 (void *) newn
->host_start
,
650 newn
->host_end
- newn
->host_start
, false, cbuf
);
653 gomp_increment_refcount (oldn
, refcount_set
);
657 get_kind (bool short_mapkind
, void *kinds
, int idx
)
660 return ((unsigned char *) kinds
)[idx
];
662 int val
= ((unsigned short *) kinds
)[idx
];
663 if (GOMP_MAP_IMPLICIT_P (val
))
664 val
&= ~GOMP_MAP_IMPLICIT
;
670 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
675 int val
= ((unsigned short *) kinds
)[idx
];
676 return GOMP_MAP_IMPLICIT_P (val
);
680 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
681 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
682 struct gomp_coalesce_buf
*cbuf
,
683 bool allow_zero_length_array_sections
)
685 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
686 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
687 struct splay_tree_key_s cur_node
;
689 cur_node
.host_start
= host_ptr
;
690 if (cur_node
.host_start
== (uintptr_t) NULL
)
692 cur_node
.tgt_offset
= (uintptr_t) NULL
;
693 gomp_copy_host2dev (devicep
, aq
,
694 (void *) (tgt
->tgt_start
+ target_offset
),
695 (void *) &cur_node
.tgt_offset
, sizeof (void *),
699 /* Add bias to the pointer value. */
700 cur_node
.host_start
+= bias
;
701 cur_node
.host_end
= cur_node
.host_start
;
702 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
705 if (allow_zero_length_array_sections
)
706 cur_node
.tgt_offset
= cur_node
.host_start
;
709 gomp_mutex_unlock (&devicep
->lock
);
710 gomp_fatal ("Pointer target of array section wasn't mapped");
715 cur_node
.host_start
-= n
->host_start
;
717 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
718 /* At this point tgt_offset is target address of the
719 array section. Now subtract bias to get what we want
720 to initialize the pointer with. */
721 cur_node
.tgt_offset
-= bias
;
723 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
724 (void *) &cur_node
.tgt_offset
, sizeof (void *),
729 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
730 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
731 size_t first
, size_t i
, void **hostaddrs
,
732 size_t *sizes
, void *kinds
,
733 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
735 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
736 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
737 struct splay_tree_key_s cur_node
;
740 const bool short_mapkind
= true;
741 const int typemask
= short_mapkind
? 0xff : 0x7;
743 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
744 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
745 splay_tree_key n2
= gomp_map_0len_lookup (mem_map
, &cur_node
);
746 kind
= get_kind (short_mapkind
, kinds
, i
);
747 implicit
= get_implicit (short_mapkind
, kinds
, i
);
750 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
752 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
753 kind
& typemask
, false, implicit
, cbuf
,
759 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
761 cur_node
.host_start
--;
762 n2
= splay_tree_lookup (mem_map
, &cur_node
);
763 cur_node
.host_start
++;
766 && n2
->host_start
- n
->host_start
767 == n2
->tgt_offset
- n
->tgt_offset
)
769 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
770 kind
& typemask
, false, implicit
, cbuf
,
776 n2
= splay_tree_lookup (mem_map
, &cur_node
);
780 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
782 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
783 kind
& typemask
, false, implicit
, cbuf
,
788 gomp_mutex_unlock (&devicep
->lock
);
789 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
790 "other mapped elements from the same structure weren't mapped "
791 "together with it", (void *) cur_node
.host_start
,
792 (void *) cur_node
.host_end
);
795 attribute_hidden
void
796 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
797 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
798 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
799 struct gomp_coalesce_buf
*cbufp
,
800 bool allow_zero_length_array_sections
)
802 struct splay_tree_key_s s
;
807 gomp_mutex_unlock (&devicep
->lock
);
808 gomp_fatal ("enclosing struct not mapped for attach");
811 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
812 /* We might have a pointer in a packed struct: however we cannot have more
813 than one such pointer in each pointer-sized portion of the struct, so
815 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
818 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
820 if (!n
->aux
->attach_count
)
822 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
824 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
825 n
->aux
->attach_count
[idx
]++;
828 gomp_mutex_unlock (&devicep
->lock
);
829 gomp_fatal ("attach count overflow");
832 if (n
->aux
->attach_count
[idx
] == 1)
834 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
836 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
840 if ((void *) target
== NULL
)
842 /* As a special case, allow attaching NULL host pointers. This
843 allows e.g. unassociated Fortran pointers to be mapped
848 "%s: attaching NULL host pointer, target %p "
849 "(struct base %p)\n", __FUNCTION__
, (void *) devptr
,
850 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
));
852 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
853 sizeof (void *), true, cbufp
);
858 s
.host_start
= target
+ bias
;
859 s
.host_end
= s
.host_start
+ 1;
860 tn
= splay_tree_lookup (mem_map
, &s
);
864 if (allow_zero_length_array_sections
)
865 /* When allowing attachment to zero-length array sections, we
866 copy the host pointer when the target region is not mapped. */
870 gomp_mutex_unlock (&devicep
->lock
);
871 gomp_fatal ("pointer target not mapped for attach");
875 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
878 "%s: attaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
880 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
882 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
883 sizeof (void *), true, cbufp
);
886 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
887 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
890 attribute_hidden
void
891 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
892 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
893 uintptr_t detach_from
, bool finalize
,
894 struct gomp_coalesce_buf
*cbufp
)
900 gomp_mutex_unlock (&devicep
->lock
);
901 gomp_fatal ("enclosing struct not mapped for detach");
904 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
906 if (!n
->aux
|| !n
->aux
->attach_count
)
908 gomp_mutex_unlock (&devicep
->lock
);
909 gomp_fatal ("no attachment counters for struct");
913 n
->aux
->attach_count
[idx
] = 1;
915 if (n
->aux
->attach_count
[idx
] == 0)
917 gomp_mutex_unlock (&devicep
->lock
);
918 gomp_fatal ("attach count underflow");
921 n
->aux
->attach_count
[idx
]--;
923 if (n
->aux
->attach_count
[idx
] == 0)
925 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
927 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
930 "%s: detaching host %p, target %p (struct base %p) to %p\n",
931 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
932 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
935 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
936 sizeof (void *), true, cbufp
);
939 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
940 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
943 attribute_hidden
uintptr_t
944 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
946 if (tgt
->list
[i
].key
!= NULL
)
947 return tgt
->list
[i
].key
->tgt
->tgt_start
948 + tgt
->list
[i
].key
->tgt_offset
949 + tgt
->list
[i
].offset
;
951 switch (tgt
->list
[i
].offset
)
954 return (uintptr_t) hostaddrs
[i
];
960 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
961 + tgt
->list
[i
+ 1].key
->tgt_offset
962 + tgt
->list
[i
+ 1].offset
963 + (uintptr_t) hostaddrs
[i
]
964 - (uintptr_t) hostaddrs
[i
+ 1];
967 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
971 static inline __attribute__((always_inline
)) struct target_mem_desc
*
972 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
973 struct goacc_asyncqueue
*aq
, size_t mapnum
,
974 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
975 void *kinds
, bool short_mapkind
,
976 htab_t
*refcount_set
,
977 enum gomp_map_vars_kind pragma_kind
)
979 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
980 bool has_firstprivate
= false;
981 bool has_always_ptrset
= false;
982 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
983 const int rshift
= short_mapkind
? 8 : 3;
984 const int typemask
= short_mapkind
? 0xff : 0x7;
985 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
986 struct splay_tree_key_s cur_node
;
987 struct target_mem_desc
*tgt
988 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
989 tgt
->list_count
= mapnum
;
990 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
991 tgt
->device_descr
= devicep
;
993 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
1002 tgt_align
= sizeof (void *);
1005 cbuf
.chunk_cnt
= -1;
1008 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1010 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
1011 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1014 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1016 size_t align
= 4 * sizeof (void *);
1018 tgt_size
= mapnum
* sizeof (void *);
1020 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1021 cbuf
.chunks
[0].start
= 0;
1022 cbuf
.chunks
[0].end
= tgt_size
;
1025 gomp_mutex_lock (&devicep
->lock
);
1026 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1028 gomp_mutex_unlock (&devicep
->lock
);
1033 for (i
= 0; i
< mapnum
; i
++)
1035 int kind
= get_kind (short_mapkind
, kinds
, i
);
1036 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1037 if (hostaddrs
[i
] == NULL
1038 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1040 tgt
->list
[i
].key
= NULL
;
1041 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1044 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1045 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1047 tgt
->list
[i
].key
= NULL
;
1050 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1051 on a separate construct prior to using use_device_{addr,ptr}.
1052 In OpenMP 5.0, map directives need to be ordered by the
1053 middle-end before the use_device_* clauses. If
1054 !not_found_cnt, all mappings requested (if any) are already
1055 mapped, so use_device_{addr,ptr} can be resolved right away.
1056 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1057 now but would succeed after performing the mappings in the
1058 following loop. We can't defer this always to the second
1059 loop, because it is not even invoked when !not_found_cnt
1060 after the first loop. */
1061 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1062 cur_node
.host_end
= cur_node
.host_start
;
1063 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1066 cur_node
.host_start
-= n
->host_start
;
1068 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1069 + cur_node
.host_start
);
1071 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1073 gomp_mutex_unlock (&devicep
->lock
);
1074 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1076 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1077 /* If not present, continue using the host address. */
1080 __builtin_unreachable ();
1081 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1084 tgt
->list
[i
].offset
= 0;
1087 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
1088 || (kind
& typemask
) == GOMP_MAP_STRUCT_UNORD
)
1090 size_t first
= i
+ 1;
1091 size_t last
= i
+ sizes
[i
];
1092 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1093 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1095 tgt
->list
[i
].key
= NULL
;
1096 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1097 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1100 size_t align
= (size_t) 1 << (kind
>> rshift
);
1101 if (tgt_align
< align
)
1103 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1104 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1105 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1106 not_found_cnt
+= last
- i
;
1107 for (i
= first
; i
<= last
; i
++)
1109 tgt
->list
[i
].key
= NULL
;
1111 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1114 gomp_coalesce_buf_add (&cbuf
,
1115 tgt_size
- cur_node
.host_end
1116 + (uintptr_t) hostaddrs
[i
],
1122 for (i
= first
; i
<= last
; i
++)
1123 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1124 sizes
, kinds
, NULL
, refcount_set
);
1128 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1130 tgt
->list
[i
].key
= NULL
;
1131 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1132 has_firstprivate
= true;
1135 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1136 || ((kind
& typemask
)
1137 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1139 tgt
->list
[i
].key
= NULL
;
1140 has_firstprivate
= true;
1143 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1144 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1145 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1147 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1148 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1150 tgt
->list
[i
].key
= NULL
;
1152 size_t align
= (size_t) 1 << (kind
>> rshift
);
1153 if (tgt_align
< align
)
1155 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1157 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1158 cur_node
.host_end
- cur_node
.host_start
);
1159 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1160 has_firstprivate
= true;
1164 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1166 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1169 tgt
->list
[i
].key
= NULL
;
1170 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1175 n
= splay_tree_lookup (mem_map
, &cur_node
);
1176 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1178 int always_to_cnt
= 0;
1179 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1181 bool has_nullptr
= false;
1183 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1184 if (n
->tgt
->list
[j
].key
== n
)
1186 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1189 if (n
->tgt
->list_count
== 0)
1191 /* 'declare target'; assume has_nullptr; it could also be
1192 statically assigned pointer, but that it should be to
1193 the equivalent variable on the host. */
1194 assert (n
->refcount
== REFCOUNT_INFINITY
);
1198 assert (j
< n
->tgt
->list_count
);
1199 /* Re-map the data if there is an 'always' modifier or if it a
1200 null pointer was there and non a nonnull has been found; that
1201 permits transparent re-mapping for Fortran array descriptors
1202 which were previously mapped unallocated. */
1203 for (j
= i
+ 1; j
< mapnum
; j
++)
1205 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1206 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1208 || !GOMP_MAP_POINTER_P (ptr_kind
)
1209 || *(void **) hostaddrs
[j
] == NULL
))
1211 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1212 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1213 > cur_node
.host_end
))
1217 has_always_ptrset
= true;
1222 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1223 kind
& typemask
, always_to_cnt
> 0, implicit
,
1224 NULL
, refcount_set
);
1229 tgt
->list
[i
].key
= NULL
;
1231 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1233 /* Not present, hence, skip entry - including its MAP_POINTER,
1235 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1237 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1238 == GOMP_MAP_POINTER
))
1241 tgt
->list
[i
].key
= NULL
;
1242 tgt
->list
[i
].offset
= 0;
1246 size_t align
= (size_t) 1 << (kind
>> rshift
);
1248 if (tgt_align
< align
)
1250 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1252 && gomp_to_device_kind_p (kind
& typemask
))
1253 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1254 cur_node
.host_end
- cur_node
.host_start
);
1255 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1256 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1260 for (j
= i
+ 1; j
< mapnum
; j
++)
1261 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1262 kinds
, j
)) & typemask
))
1263 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1265 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1266 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1267 > cur_node
.host_end
))
1271 tgt
->list
[j
].key
= NULL
;
1282 gomp_mutex_unlock (&devicep
->lock
);
1283 gomp_fatal ("unexpected aggregation");
1285 tgt
->to_free
= devaddrs
[0];
1286 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1287 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1289 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1291 /* Allocate tgt_align aligned tgt_size block of memory. */
1292 /* FIXME: Perhaps change interface to allocate properly aligned
1294 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1295 tgt_size
+ tgt_align
- 1);
1298 gomp_mutex_unlock (&devicep
->lock
);
1299 gomp_fatal ("device memory allocation fail");
1302 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1303 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1304 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1306 if (cbuf
.use_cnt
== 1)
1308 if (cbuf
.chunk_cnt
> 0)
1311 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1321 tgt
->to_free
= NULL
;
1327 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1328 tgt_size
= mapnum
* sizeof (void *);
1331 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1334 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1335 splay_tree_node array
= tgt
->array
;
1336 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1337 uintptr_t field_tgt_base
= 0;
1338 splay_tree_key field_tgt_structelem_first
= NULL
;
1340 for (i
= 0; i
< mapnum
; i
++)
1341 if (has_always_ptrset
1343 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1344 == GOMP_MAP_TO_PSET
)
1346 splay_tree_key k
= tgt
->list
[i
].key
;
1347 bool has_nullptr
= false;
1349 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1350 if (k
->tgt
->list
[j
].key
== k
)
1352 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1355 if (k
->tgt
->list_count
== 0)
1358 assert (j
< k
->tgt
->list_count
);
1360 tgt
->list
[i
].has_null_ptr_assoc
= false;
1361 for (j
= i
+ 1; j
< mapnum
; j
++)
1363 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1364 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1366 || !GOMP_MAP_POINTER_P (ptr_kind
)
1367 || *(void **) hostaddrs
[j
] == NULL
))
1369 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1370 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1375 if (*(void **) hostaddrs
[j
] == NULL
)
1376 tgt
->list
[i
].has_null_ptr_assoc
= true;
1377 tgt
->list
[j
].key
= k
;
1378 tgt
->list
[j
].copy_from
= false;
1379 tgt
->list
[j
].always_copy_from
= false;
1380 tgt
->list
[j
].is_attach
= false;
1381 gomp_increment_refcount (k
, refcount_set
);
1382 gomp_map_pointer (k
->tgt
, aq
,
1383 (uintptr_t) *(void **) hostaddrs
[j
],
1384 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1386 sizes
[j
], cbufp
, false);
1391 else if (tgt
->list
[i
].key
== NULL
)
1393 int kind
= get_kind (short_mapkind
, kinds
, i
);
1394 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1395 if (hostaddrs
[i
] == NULL
)
1397 switch (kind
& typemask
)
1399 size_t align
, len
, first
, last
;
1401 case GOMP_MAP_FIRSTPRIVATE
:
1402 align
= (size_t) 1 << (kind
>> rshift
);
1403 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1404 tgt
->list
[i
].offset
= tgt_size
;
1406 gomp_copy_host2dev (devicep
, aq
,
1407 (void *) (tgt
->tgt_start
+ tgt_size
),
1408 (void *) hostaddrs
[i
], len
, false, cbufp
);
1409 /* Save device address in hostaddr to permit latter availablity
1410 when doing a deep-firstprivate with pointer attach. */
1411 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1414 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1415 firstprivate to hostaddrs[i+1], which is assumed to contain a
1419 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1421 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1422 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1424 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1425 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1426 this probably needs revision for 'aq' usage. */
1428 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1429 sizeof (void *), false, cbufp
);
1433 case GOMP_MAP_FIRSTPRIVATE_INT
:
1434 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1436 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1437 /* The OpenACC 'host_data' construct only allows 'use_device'
1438 "mapping" clauses, so in the first loop, 'not_found_cnt'
1439 must always have been zero, so all OpenACC 'use_device'
1440 clauses have already been handled. (We can only easily test
1441 'use_device' with 'if_present' clause here.) */
1442 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1443 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1444 code conceptually simple, similar to the first loop. */
1445 case GOMP_MAP_USE_DEVICE_PTR
:
1446 if (tgt
->list
[i
].offset
== 0)
1448 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1449 cur_node
.host_end
= cur_node
.host_start
;
1450 n
= gomp_map_lookup (mem_map
, &cur_node
);
1453 cur_node
.host_start
-= n
->host_start
;
1455 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1456 + cur_node
.host_start
);
1458 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1460 gomp_mutex_unlock (&devicep
->lock
);
1461 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1463 else if ((kind
& typemask
)
1464 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1465 /* If not present, continue using the host address. */
1468 __builtin_unreachable ();
1469 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1472 case GOMP_MAP_STRUCT_UNORD
:
1475 void *first
= hostaddrs
[i
+ 1];
1476 for (size_t j
= i
+ 1; j
< i
+ sizes
[i
]; j
++)
1477 if (hostaddrs
[j
+ 1] != first
)
1479 gomp_mutex_unlock (&devicep
->lock
);
1480 gomp_fatal ("Mapped array elements must be the "
1481 "same (%p vs %p)", first
,
1486 case GOMP_MAP_STRUCT
:
1488 last
= i
+ sizes
[i
];
1489 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1490 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1492 if (tgt
->list
[first
].key
!= NULL
)
1494 if (sizes
[last
] == 0)
1495 cur_node
.host_end
++;
1496 n
= splay_tree_lookup (mem_map
, &cur_node
);
1497 if (sizes
[last
] == 0)
1498 cur_node
.host_end
--;
1499 if (n
== NULL
&& cur_node
.host_start
== cur_node
.host_end
)
1501 gomp_mutex_unlock (&devicep
->lock
);
1502 gomp_fatal ("Struct pointer member not mapped (%p)",
1503 (void*) hostaddrs
[first
]);
1507 size_t align
= (size_t) 1 << (kind
>> rshift
);
1508 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1509 - (uintptr_t) hostaddrs
[i
];
1510 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1511 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1512 - (uintptr_t) hostaddrs
[i
];
1513 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1514 field_tgt_offset
= tgt_size
;
1515 field_tgt_clear
= last
;
1516 field_tgt_structelem_first
= NULL
;
1517 tgt_size
+= cur_node
.host_end
1518 - (uintptr_t) hostaddrs
[first
];
1521 for (i
= first
; i
<= last
; i
++)
1522 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1523 sizes
, kinds
, cbufp
, refcount_set
);
1526 case GOMP_MAP_ALWAYS_POINTER
:
1527 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1528 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1529 n
= splay_tree_lookup (mem_map
, &cur_node
);
1531 || n
->host_start
> cur_node
.host_start
1532 || n
->host_end
< cur_node
.host_end
)
1534 gomp_mutex_unlock (&devicep
->lock
);
1535 gomp_fatal ("always pointer not mapped");
1538 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1539 != GOMP_MAP_ALWAYS_POINTER
))
1540 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1541 if (cur_node
.tgt_offset
)
1542 cur_node
.tgt_offset
-= sizes
[i
];
1543 gomp_copy_host2dev (devicep
, aq
,
1544 (void *) (n
->tgt
->tgt_start
1546 + cur_node
.host_start
1548 (void *) &cur_node
.tgt_offset
,
1549 sizeof (void *), true, cbufp
);
1550 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1551 + cur_node
.host_start
- n
->host_start
;
1553 case GOMP_MAP_IF_PRESENT
:
1554 /* Not present - otherwise handled above. Skip over its
1555 MAP_POINTER as well. */
1557 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1558 == GOMP_MAP_POINTER
))
1561 case GOMP_MAP_ATTACH
:
1562 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1564 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1565 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1566 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1569 tgt
->list
[i
].key
= n
;
1570 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1571 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1572 tgt
->list
[i
].copy_from
= false;
1573 tgt
->list
[i
].always_copy_from
= false;
1574 tgt
->list
[i
].is_attach
= true;
1575 /* OpenACC 'attach'/'detach' doesn't affect
1576 structured/dynamic reference counts ('n->refcount',
1577 'n->dynamic_refcount'). */
1580 = ((kind
& typemask
)
1581 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1582 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1583 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1586 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1588 gomp_mutex_unlock (&devicep
->lock
);
1589 gomp_fatal ("outer struct not mapped for attach");
1596 splay_tree_key k
= &array
->key
;
1597 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1598 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1599 k
->host_end
= k
->host_start
+ sizes
[i
];
1601 k
->host_end
= k
->host_start
+ sizeof (void *);
1602 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1603 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1605 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1607 /* For this condition to be true, there must be a
1608 duplicate struct element mapping. This can happen with
1609 GOMP_MAP_STRUCT_UNORD mappings, for example. */
1610 tgt
->list
[i
].key
= n
;
1613 assert ((n
->refcount
& REFCOUNT_STRUCTELEM
) != 0);
1614 assert (field_tgt_structelem_first
!= NULL
);
1616 if (i
== field_tgt_clear
)
1618 n
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1619 field_tgt_structelem_first
= NULL
;
1622 if (i
== field_tgt_clear
)
1623 field_tgt_clear
= FIELD_TGT_EMPTY
;
1624 gomp_increment_refcount (n
, refcount_set
);
1625 tgt
->list
[i
].copy_from
1626 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1627 tgt
->list
[i
].always_copy_from
1628 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1629 tgt
->list
[i
].is_attach
= false;
1630 tgt
->list
[i
].offset
= 0;
1631 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1634 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1635 kind
& typemask
, false, implicit
,
1636 cbufp
, refcount_set
);
1641 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1643 /* Replace target address of the pointer with target address
1644 of mapped object in the splay tree. */
1645 splay_tree_remove (mem_map
, n
);
1647 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1648 k
->aux
->link_key
= n
;
1650 size_t align
= (size_t) 1 << (kind
>> rshift
);
1651 tgt
->list
[i
].key
= k
;
1654 k
->dynamic_refcount
= 0;
1655 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1657 k
->tgt_offset
= k
->host_start
- field_tgt_base
1661 k
->refcount
= REFCOUNT_STRUCTELEM
;
1662 if (field_tgt_structelem_first
== NULL
)
1664 /* Set to first structure element of sequence. */
1665 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1666 field_tgt_structelem_first
= k
;
1669 /* Point to refcount of leading element, but do not
1671 k
->structelem_refcount_ptr
1672 = &field_tgt_structelem_first
->structelem_refcount
;
1674 if (i
== field_tgt_clear
)
1676 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1677 field_tgt_structelem_first
= NULL
;
1680 if (i
== field_tgt_clear
)
1681 field_tgt_clear
= FIELD_TGT_EMPTY
;
1685 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1686 k
->tgt_offset
= tgt_size
;
1687 tgt_size
+= k
->host_end
- k
->host_start
;
1689 /* First increment, from 0 to 1. gomp_increment_refcount
1690 encapsulates the different increment cases, so use this
1691 instead of directly setting 1 during initialization. */
1692 gomp_increment_refcount (k
, refcount_set
);
1694 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1695 tgt
->list
[i
].always_copy_from
1696 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1697 tgt
->list
[i
].is_attach
= false;
1698 tgt
->list
[i
].offset
= 0;
1699 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1702 array
->right
= NULL
;
1703 splay_tree_insert (mem_map
, array
);
1704 switch (kind
& typemask
)
1706 case GOMP_MAP_ALLOC
:
1708 case GOMP_MAP_FORCE_ALLOC
:
1709 case GOMP_MAP_FORCE_FROM
:
1710 case GOMP_MAP_ALWAYS_FROM
:
1713 case GOMP_MAP_TOFROM
:
1714 case GOMP_MAP_FORCE_TO
:
1715 case GOMP_MAP_FORCE_TOFROM
:
1716 case GOMP_MAP_ALWAYS_TO
:
1717 case GOMP_MAP_ALWAYS_TOFROM
:
1718 gomp_copy_host2dev (devicep
, aq
,
1719 (void *) (tgt
->tgt_start
1721 (void *) k
->host_start
,
1722 k
->host_end
- k
->host_start
,
1725 case GOMP_MAP_POINTER
:
1726 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1728 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1729 k
->tgt_offset
, sizes
[i
], cbufp
,
1731 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1733 case GOMP_MAP_TO_PSET
:
1734 gomp_copy_host2dev (devicep
, aq
,
1735 (void *) (tgt
->tgt_start
1737 (void *) k
->host_start
,
1738 k
->host_end
- k
->host_start
,
1740 tgt
->list
[i
].has_null_ptr_assoc
= false;
1742 for (j
= i
+ 1; j
< mapnum
; j
++)
1744 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1746 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1747 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1749 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1750 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1755 tgt
->list
[j
].key
= k
;
1756 tgt
->list
[j
].copy_from
= false;
1757 tgt
->list
[j
].always_copy_from
= false;
1758 tgt
->list
[j
].is_attach
= false;
1759 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1760 /* For OpenMP, the use of refcount_sets causes
1761 errors if we set k->refcount = 1 above but also
1762 increment it again here, for decrementing will
1763 not properly match, since we decrement only once
1764 for each key's refcount. Therefore avoid this
1765 increment for OpenMP constructs. */
1767 gomp_increment_refcount (k
, refcount_set
);
1768 gomp_map_pointer (tgt
, aq
,
1769 (uintptr_t) *(void **) hostaddrs
[j
],
1771 + ((uintptr_t) hostaddrs
[j
]
1773 sizes
[j
], cbufp
, false);
1778 case GOMP_MAP_FORCE_PRESENT
:
1779 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1780 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1781 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1783 /* We already looked up the memory region above and it
1785 size_t size
= k
->host_end
- k
->host_start
;
1786 gomp_mutex_unlock (&devicep
->lock
);
1787 #ifdef HAVE_INTTYPES_H
1788 gomp_fatal ("present clause: not present on the device "
1789 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
1790 "dev: %d)", (void *) k
->host_start
,
1791 (uint64_t) size
, (uint64_t) size
,
1792 devicep
->target_id
);
1794 gomp_fatal ("present clause: not present on the device "
1795 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1796 (void *) k
->host_start
,
1797 (unsigned long) size
, (unsigned long) size
,
1798 devicep
->target_id
);
1802 case GOMP_MAP_FORCE_DEVICEPTR
:
1803 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1804 gomp_copy_host2dev (devicep
, aq
,
1805 (void *) (tgt
->tgt_start
1807 (void *) k
->host_start
,
1808 sizeof (void *), false, cbufp
);
1811 gomp_mutex_unlock (&devicep
->lock
);
1812 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1816 if (k
->aux
&& k
->aux
->link_key
)
1818 /* Set link pointer on target to the device address of the
1820 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1821 /* We intentionally do not use coalescing here, as it's not
1822 data allocated by the current call to this function. */
1823 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1824 &tgt_addr
, sizeof (void *), true, NULL
);
1831 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1833 for (i
= 0; i
< mapnum
; i
++)
1835 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1836 gomp_copy_host2dev (devicep
, aq
,
1837 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1838 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1846 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1847 gomp_copy_host2dev (devicep
, aq
,
1848 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1849 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1850 - cbuf
.chunks
[0].start
),
1851 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1854 /* Free once the transfer has completed. */
1855 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1862 /* If the variable from "omp target enter data" map-list was already mapped,
1863 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1865 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1871 gomp_mutex_unlock (&devicep
->lock
);
1875 static struct target_mem_desc
*
1876 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1877 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1878 bool short_mapkind
, htab_t
*refcount_set
,
1879 enum gomp_map_vars_kind pragma_kind
)
1881 /* This management of a local refcount_set is for convenience of callers
1882 who do not share a refcount_set over multiple map/unmap uses. */
1883 htab_t local_refcount_set
= NULL
;
1884 if (refcount_set
== NULL
)
1886 local_refcount_set
= htab_create (mapnum
);
1887 refcount_set
= &local_refcount_set
;
1890 struct target_mem_desc
*tgt
;
1891 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1892 sizes
, kinds
, short_mapkind
, refcount_set
,
1894 if (local_refcount_set
)
1895 htab_free (local_refcount_set
);
1900 attribute_hidden
struct target_mem_desc
*
1901 goacc_map_vars (struct gomp_device_descr
*devicep
,
1902 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1903 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1904 void *kinds
, bool short_mapkind
,
1905 enum gomp_map_vars_kind pragma_kind
)
1907 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1908 sizes
, kinds
, short_mapkind
, NULL
,
1909 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1913 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1915 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1917 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1924 gomp_unref_tgt (void *ptr
)
1926 bool is_tgt_unmapped
= false;
1928 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1930 if (tgt
->refcount
> 1)
1934 gomp_unmap_tgt (tgt
);
1935 is_tgt_unmapped
= true;
1938 return is_tgt_unmapped
;
1942 gomp_unref_tgt_void (void *ptr
)
1944 (void) gomp_unref_tgt (ptr
);
1948 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1950 splay_tree_remove (sp
, k
);
1953 if (k
->aux
->link_key
)
1954 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1955 if (k
->aux
->attach_count
)
1956 free (k
->aux
->attach_count
);
1962 static inline __attribute__((always_inline
)) bool
1963 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1964 struct goacc_asyncqueue
*aq
)
1966 bool is_tgt_unmapped
= false;
1968 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1970 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1971 /* Infer the splay_tree_key of the first structelem key using the
1972 pointer to the first structleme_refcount. */
1973 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1974 - offsetof (struct splay_tree_key_s
,
1975 structelem_refcount
));
1976 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1978 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1979 with the splay_tree_keys embedded inside. */
1980 splay_tree_node node
=
1981 (splay_tree_node
) ((char *) k
1982 - offsetof (struct splay_tree_node_s
, key
));
1985 /* Starting from the _FIRST key, and continue for all following
1987 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1988 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1995 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1998 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2001 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
2002 return is_tgt_unmapped
;
2005 attribute_hidden
bool
2006 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
2008 return gomp_remove_var_internal (devicep
, k
, NULL
);
2011 /* Remove a variable asynchronously. This actually removes the variable
2012 mapping immediately, but retains the linked target_mem_desc until the
2013 asynchronous operation has completed (as it may still refer to target
2014 memory). The device lock must be held before entry, and remains locked on
2017 attribute_hidden
void
2018 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2019 struct goacc_asyncqueue
*aq
)
2021 (void) gomp_remove_var_internal (devicep
, k
, aq
);
2024 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2025 variables back from device to host: if it is false, it is assumed that this
2026 has been done already. */
2028 static inline __attribute__((always_inline
)) void
2029 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2030 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
2032 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
2034 if (tgt
->list_count
== 0)
2040 gomp_mutex_lock (&devicep
->lock
);
2041 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2043 gomp_mutex_unlock (&devicep
->lock
);
2051 /* We must perform detachments before any copies back to the host. */
2052 for (i
= 0; i
< tgt
->list_count
; i
++)
2054 splay_tree_key k
= tgt
->list
[i
].key
;
2056 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2057 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2058 + tgt
->list
[i
].offset
,
2062 for (i
= 0; i
< tgt
->list_count
; i
++)
2064 splay_tree_key k
= tgt
->list
[i
].key
;
2068 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2069 counts ('n->refcount', 'n->dynamic_refcount'). */
2070 if (tgt
->list
[i
].is_attach
)
2073 bool do_copy
, do_remove
;
2074 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2076 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2077 || tgt
->list
[i
].always_copy_from
)
2078 gomp_copy_dev2host (devicep
, aq
,
2079 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2080 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2081 + tgt
->list
[i
].offset
),
2082 tgt
->list
[i
].length
);
2085 struct target_mem_desc
*k_tgt
= k
->tgt
;
2086 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2087 /* It would be bad if TGT got unmapped while we're still iterating
2088 over its LIST_COUNT, and also expect to use it in the following
2090 assert (!is_tgt_unmapped
2096 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2099 gomp_unref_tgt ((void *) tgt
);
2101 gomp_mutex_unlock (&devicep
->lock
);
2105 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2106 htab_t
*refcount_set
)
2108 /* This management of a local refcount_set is for convenience of callers
2109 who do not share a refcount_set over multiple map/unmap uses. */
2110 htab_t local_refcount_set
= NULL
;
2111 if (refcount_set
== NULL
)
2113 local_refcount_set
= htab_create (tgt
->list_count
);
2114 refcount_set
= &local_refcount_set
;
2117 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2119 if (local_refcount_set
)
2120 htab_free (local_refcount_set
);
2123 attribute_hidden
void
2124 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2125 struct goacc_asyncqueue
*aq
)
2127 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2131 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2132 size_t *sizes
, void *kinds
, bool short_mapkind
)
2135 struct splay_tree_key_s cur_node
;
2136 const int typemask
= short_mapkind
? 0xff : 0x7;
2144 gomp_mutex_lock (&devicep
->lock
);
2145 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2147 gomp_mutex_unlock (&devicep
->lock
);
2151 for (i
= 0; i
< mapnum
; i
++)
2154 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2155 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2156 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2159 int kind
= get_kind (short_mapkind
, kinds
, i
);
2160 if (n
->host_start
> cur_node
.host_start
2161 || n
->host_end
< cur_node
.host_end
)
2163 gomp_mutex_unlock (&devicep
->lock
);
2164 gomp_fatal ("Trying to update [%p..%p) object when "
2165 "only [%p..%p) is mapped",
2166 (void *) cur_node
.host_start
,
2167 (void *) cur_node
.host_end
,
2168 (void *) n
->host_start
,
2169 (void *) n
->host_end
);
2172 if (n
->aux
&& n
->aux
->attach_count
)
2174 uintptr_t addr
= cur_node
.host_start
;
2175 while (addr
< cur_node
.host_end
)
2177 /* We have to be careful not to overwrite still attached
2178 pointers during host<->device updates. */
2179 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2180 if (n
->aux
->attach_count
[i
] == 0)
2182 void *devaddr
= (void *) (n
->tgt
->tgt_start
2184 + addr
- n
->host_start
);
2185 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2186 gomp_copy_host2dev (devicep
, NULL
,
2187 devaddr
, (void *) addr
,
2188 sizeof (void *), false, NULL
);
2189 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2190 gomp_copy_dev2host (devicep
, NULL
,
2191 (void *) addr
, devaddr
,
2194 addr
+= sizeof (void *);
2199 void *hostaddr
= (void *) cur_node
.host_start
;
2200 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2201 + cur_node
.host_start
2203 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2205 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2206 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2208 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2209 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2214 int kind
= get_kind (short_mapkind
, kinds
, i
);
2216 if (GOMP_MAP_PRESENT_P (kind
))
2218 /* We already looked up the memory region above and it
2220 gomp_mutex_unlock (&devicep
->lock
);
2221 #ifdef HAVE_INTTYPES_H
2222 gomp_fatal ("present clause: not present on the device "
2223 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
2224 "dev: %d)", (void *) hostaddrs
[i
],
2225 (uint64_t) sizes
[i
], (uint64_t) sizes
[i
],
2226 devicep
->target_id
);
2228 gomp_fatal ("present clause: not present on the device "
2229 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2230 (void *) hostaddrs
[i
], (unsigned long) sizes
[i
],
2231 (unsigned long) sizes
[i
], devicep
->target_id
);
2236 gomp_mutex_unlock (&devicep
->lock
);
2239 static struct gomp_offload_icv_list
*
2240 gomp_get_offload_icv_item (int dev_num
)
2242 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2243 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2249 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2250 depending on the device num and the variable hierarchy
2251 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2252 device and thus no item with that device number is contained in
2253 gomp_offload_icv_list, then a new item is created and added to the list. */
2255 static struct gomp_offload_icvs
*
2256 get_gomp_offload_icvs (int dev_num
)
2258 struct gomp_icv_list
*dev
2259 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2260 struct gomp_icv_list
*all
2261 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2262 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2263 struct gomp_offload_icv_list
*offload_icvs
2264 = gomp_get_offload_icv_item (dev_num
);
2266 if (offload_icvs
!= NULL
)
2267 return &offload_icvs
->icvs
;
2269 struct gomp_offload_icv_list
*new
2270 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2272 new->device_num
= dev_num
;
2273 new->icvs
.device_num
= dev_num
;
2274 new->next
= gomp_offload_icv_list
;
2276 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2277 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2278 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2279 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2280 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2281 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2283 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2286 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2287 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2288 else if (dev
!= NULL
2289 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2290 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2291 else if (all
!= NULL
2292 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2293 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2295 new->icvs
.teams_thread_limit
2296 = gomp_default_icv_values
.teams_thread_limit_var
;
2299 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2300 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2301 else if (dev
!= NULL
2302 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2303 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2304 else if (all
!= NULL
2305 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2306 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2308 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2310 gomp_offload_icv_list
= new;
2314 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2315 And insert to splay tree the mapping between addresses from HOST_TABLE and
2316 from loaded target image. We rely in the host and device compiler
2317 emitting variable and functions in the same order. */
2320 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2321 const void *host_table
, const void *target_data
,
2322 bool is_register_lock
)
2324 void **host_func_table
= ((void ***) host_table
)[0];
2325 void **host_funcs_end
= ((void ***) host_table
)[1];
2326 void **host_var_table
= ((void ***) host_table
)[2];
2327 void **host_vars_end
= ((void ***) host_table
)[3];
2328 void **host_ind_func_table
= NULL
;
2329 void **host_ind_funcs_end
= NULL
;
2331 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version
))
2333 host_ind_func_table
= ((void ***) host_table
)[4];
2334 host_ind_funcs_end
= ((void ***) host_table
)[5];
2337 /* The func and ind_func tables contain only addresses, the var table
2338 contains addresses and corresponding sizes. */
2339 int num_funcs
= host_funcs_end
- host_func_table
;
2340 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2341 int num_ind_funcs
= (host_ind_funcs_end
- host_ind_func_table
);
2343 /* Load image to device and get target addresses for the image. */
2344 struct addr_pair
*target_table
= NULL
;
2345 uint64_t *rev_target_fn_table
= NULL
;
2346 int i
, num_target_entries
;
2348 /* With reverse offload, insert also target-host addresses. */
2349 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2352 = devicep
->load_image_func (devicep
->target_id
, version
,
2353 target_data
, &target_table
,
2354 rev_lookup
? &rev_target_fn_table
: NULL
,
2356 ? (uint64_t *) host_ind_func_table
: NULL
);
2358 if (num_target_entries
!= num_funcs
+ num_vars
2359 /* "+1" due to the additional ICV struct. */
2360 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2362 gomp_mutex_unlock (&devicep
->lock
);
2363 if (is_register_lock
)
2364 gomp_mutex_unlock (®ister_lock
);
2365 gomp_fatal ("Cannot map target functions or variables"
2366 " (expected %u, have %u)", num_funcs
+ num_vars
,
2367 num_target_entries
);
2370 /* Insert host-target address mapping into splay tree. */
2371 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2372 /* "+1" due to the additional ICV struct. */
2373 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2374 * sizeof (*tgt
->array
));
2375 if (rev_target_fn_table
)
2376 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2378 tgt
->rev_array
= NULL
;
2379 tgt
->refcount
= REFCOUNT_INFINITY
;
2382 tgt
->to_free
= NULL
;
2384 tgt
->list_count
= 0;
2385 tgt
->device_descr
= devicep
;
2386 splay_tree_node array
= tgt
->array
;
2387 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2389 for (i
= 0; i
< num_funcs
; i
++)
2391 splay_tree_key k
= &array
->key
;
2392 k
->host_start
= (uintptr_t) host_func_table
[i
];
2393 k
->host_end
= k
->host_start
+ 1;
2395 k
->tgt_offset
= target_table
[i
].start
;
2396 k
->refcount
= REFCOUNT_INFINITY
;
2397 k
->dynamic_refcount
= 0;
2400 array
->right
= NULL
;
2401 splay_tree_insert (&devicep
->mem_map
, array
);
2402 if (rev_target_fn_table
)
2404 reverse_splay_tree_key k2
= &rev_array
->key
;
2405 k2
->dev
= rev_target_fn_table
[i
];
2407 rev_array
->left
= NULL
;
2408 rev_array
->right
= NULL
;
2410 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2416 /* Most significant bit of the size in host and target tables marks
2417 "omp declare target link" variables. */
2418 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2419 const uintptr_t size_mask
= ~link_bit
;
2421 for (i
= 0; i
< num_vars
; i
++)
2423 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2424 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2425 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2427 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2429 gomp_mutex_unlock (&devicep
->lock
);
2430 if (is_register_lock
)
2431 gomp_mutex_unlock (®ister_lock
);
2432 gomp_fatal ("Cannot map target variables (size mismatch)");
2435 splay_tree_key k
= &array
->key
;
2436 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2438 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2440 k
->tgt_offset
= target_var
->start
;
2441 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2442 k
->dynamic_refcount
= 0;
2445 array
->right
= NULL
;
2446 splay_tree_insert (&devicep
->mem_map
, array
);
2450 /* Last entry is for a ICVs variable.
2451 Tolerate case where plugin does not return those entries. */
2452 if (num_funcs
+ num_vars
< num_target_entries
)
2454 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2456 /* Start address will be non-zero for the ICVs variable if
2457 the variable was found in this image. */
2458 if (var
->start
!= 0)
2460 /* The index of the devicep within devices[] is regarded as its
2461 'device number', which is different from the per-device type
2462 devicep->target_id. */
2463 int dev_num
= (int) (devicep
- &devices
[0]);
2464 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2465 size_t var_size
= var
->end
- var
->start
;
2466 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2468 gomp_mutex_unlock (&devicep
->lock
);
2469 if (is_register_lock
)
2470 gomp_mutex_unlock (®ister_lock
);
2471 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2474 /* Copy the ICVs variable to place on device memory, hereby
2475 actually designating its device number into effect. */
2476 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2477 var_size
, false, NULL
);
2478 splay_tree_key k
= &array
->key
;
2479 k
->host_start
= (uintptr_t) icvs
;
2481 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2483 k
->tgt_offset
= var
->start
;
2484 k
->refcount
= REFCOUNT_INFINITY
;
2485 k
->dynamic_refcount
= 0;
2488 array
->right
= NULL
;
2489 splay_tree_insert (&devicep
->mem_map
, array
);
2494 free (target_table
);
2497 /* Unload the mappings described by target_data from device DEVICE_P.
2498 The device must be locked. */
2501 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2503 const void *host_table
, const void *target_data
)
2505 void **host_func_table
= ((void ***) host_table
)[0];
2506 void **host_funcs_end
= ((void ***) host_table
)[1];
2507 void **host_var_table
= ((void ***) host_table
)[2];
2508 void **host_vars_end
= ((void ***) host_table
)[3];
2510 /* The func table contains only addresses, the var table contains addresses
2511 and corresponding sizes. */
2512 int num_funcs
= host_funcs_end
- host_func_table
;
2513 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2515 struct splay_tree_key_s k
;
2516 splay_tree_key node
= NULL
;
2518 /* Find mapping at start of node array */
2519 if (num_funcs
|| num_vars
)
2521 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2522 : (uintptr_t) host_var_table
[0]);
2523 k
.host_end
= k
.host_start
+ 1;
2524 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2527 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2529 gomp_mutex_unlock (&devicep
->lock
);
2530 gomp_fatal ("image unload fail");
2532 if (devicep
->mem_map_rev
.root
)
2534 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2536 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2537 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2538 free (node
->tgt
->rev_array
);
2539 devicep
->mem_map_rev
.root
= NULL
;
2542 /* Remove mappings from splay tree. */
2544 for (i
= 0; i
< num_funcs
; i
++)
2546 k
.host_start
= (uintptr_t) host_func_table
[i
];
2547 k
.host_end
= k
.host_start
+ 1;
2548 splay_tree_remove (&devicep
->mem_map
, &k
);
2551 /* Most significant bit of the size in host and target tables marks
2552 "omp declare target link" variables. */
2553 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2554 const uintptr_t size_mask
= ~link_bit
;
2555 bool is_tgt_unmapped
= false;
2557 for (i
= 0; i
< num_vars
; i
++)
2559 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2561 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2563 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2564 splay_tree_remove (&devicep
->mem_map
, &k
);
2567 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2568 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2572 if (node
&& !is_tgt_unmapped
)
2580 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2582 char *end
= buf
+ size
, *p
= buf
;
2583 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2584 p
+= snprintf (p
, end
- p
, "unified_address");
2585 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2586 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2587 (p
== buf
? "" : ", "));
2588 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2589 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2590 (p
== buf
? "" : ", "));
2593 /* This function should be called from every offload image while loading.
2594 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2595 the target, and DATA. */
2598 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2599 int target_type
, const void *data
)
2603 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2604 gomp_fatal ("Library too old for offload (version %u < %u)",
2605 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2608 const void *target_data
;
2609 if (GOMP_VERSION_LIB (version
) > 1)
2611 omp_req
= (int) (size_t) ((void **) data
)[0];
2612 target_data
= &((void **) data
)[1];
2620 gomp_mutex_lock (®ister_lock
);
2622 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2624 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2625 "reverse_offload")];
2626 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2627 "reverse_offload")];
2628 gomp_requires_to_name (buf2
, sizeof (buf2
),
2629 omp_req
!= GOMP_REQUIRES_TARGET_USED
2630 ? omp_req
: omp_requires_mask
);
2631 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2632 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2634 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2635 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2636 "in multiple compilation units: '%s' vs. '%s'",
2640 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2641 "some compilation units", buf2
);
2643 omp_requires_mask
= omp_req
;
2645 /* Load image to all initialized devices. */
2646 for (i
= 0; i
< num_devices
; i
++)
2648 struct gomp_device_descr
*devicep
= &devices
[i
];
2649 gomp_mutex_lock (&devicep
->lock
);
2650 if (devicep
->type
== target_type
2651 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2652 gomp_load_image_to_device (devicep
, version
,
2653 host_table
, target_data
, true);
2654 gomp_mutex_unlock (&devicep
->lock
);
2657 /* Insert image to array of pending images. */
2659 = gomp_realloc_unlock (offload_images
,
2660 (num_offload_images
+ 1)
2661 * sizeof (struct offload_image_descr
));
2662 offload_images
[num_offload_images
].version
= version
;
2663 offload_images
[num_offload_images
].type
= target_type
;
2664 offload_images
[num_offload_images
].host_table
= host_table
;
2665 offload_images
[num_offload_images
].target_data
= target_data
;
2667 num_offload_images
++;
2668 gomp_mutex_unlock (®ister_lock
);
2671 /* Legacy entry point. */
2674 GOMP_offload_register (const void *host_table
, int target_type
,
2675 const void *target_data
)
2677 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2680 /* This function should be called from every offload image while unloading.
2681 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2682 the target, and DATA. */
2685 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2686 int target_type
, const void *data
)
2690 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2691 gomp_fatal ("Library too old for offload (version %u < %u)",
2692 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2694 const void *target_data
;
2695 if (GOMP_VERSION_LIB (version
) > 1)
2696 target_data
= &((void **) data
)[1];
2700 gomp_mutex_lock (®ister_lock
);
2702 /* Unload image from all initialized devices. */
2703 for (i
= 0; i
< num_devices
; i
++)
2705 struct gomp_device_descr
*devicep
= &devices
[i
];
2706 gomp_mutex_lock (&devicep
->lock
);
2707 if (devicep
->type
== target_type
2708 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2709 gomp_unload_image_from_device (devicep
, version
,
2710 host_table
, target_data
);
2711 gomp_mutex_unlock (&devicep
->lock
);
2714 /* Remove image from array of pending images. */
2715 for (i
= 0; i
< num_offload_images
; i
++)
2716 if (offload_images
[i
].target_data
== target_data
)
2718 offload_images
[i
] = offload_images
[--num_offload_images
];
2722 gomp_mutex_unlock (®ister_lock
);
2725 /* Legacy entry point. */
2728 GOMP_offload_unregister (const void *host_table
, int target_type
,
2729 const void *target_data
)
2731 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2734 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2735 must be locked on entry, and remains locked on return. */
2737 attribute_hidden
void
2738 gomp_init_device (struct gomp_device_descr
*devicep
)
2741 if (!devicep
->init_device_func (devicep
->target_id
))
2743 gomp_mutex_unlock (&devicep
->lock
);
2744 gomp_fatal ("device initialization failed");
2747 /* Load to device all images registered by the moment. */
2748 for (i
= 0; i
< num_offload_images
; i
++)
2750 struct offload_image_descr
*image
= &offload_images
[i
];
2751 if (image
->type
== devicep
->type
)
2752 gomp_load_image_to_device (devicep
, image
->version
,
2753 image
->host_table
, image
->target_data
,
2757 /* Initialize OpenACC asynchronous queues. */
2758 goacc_init_asyncqueues (devicep
);
2760 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2763 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2764 must be locked on entry, and remains locked on return. */
2766 attribute_hidden
bool
2767 gomp_fini_device (struct gomp_device_descr
*devicep
)
2769 bool ret
= goacc_fini_asyncqueues (devicep
);
2770 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2771 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2775 attribute_hidden
void
2776 gomp_unload_device (struct gomp_device_descr
*devicep
)
2778 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2782 /* Unload from device all images registered at the moment. */
2783 for (i
= 0; i
< num_offload_images
; i
++)
2785 struct offload_image_descr
*image
= &offload_images
[i
];
2786 if (image
->type
== devicep
->type
)
2787 gomp_unload_image_from_device (devicep
, image
->version
,
2789 image
->target_data
);
2794 /* Host fallback for GOMP_target{,_ext} routines. */
2797 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2798 struct gomp_device_descr
*devicep
, void **args
)
2800 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2802 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2804 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2805 "be used for offloading");
2808 memset (thr
, '\0', sizeof (*thr
));
2809 if (gomp_places_list
)
2811 thr
->place
= old_thr
.place
;
2812 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2817 intptr_t id
= (intptr_t) *args
++, val
;
2818 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2819 val
= (intptr_t) *args
++;
2821 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2822 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2824 id
&= GOMP_TARGET_ARG_ID_MASK
;
2825 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2827 val
= val
> INT_MAX
? INT_MAX
: val
;
2829 gomp_icv (true)->thread_limit_var
= val
;
2834 gomp_free_thread (thr
);
2838 /* Calculate alignment and size requirements of a private copy of data shared
2839 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2842 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2843 unsigned short *kinds
, size_t *tgt_align
,
2847 for (i
= 0; i
< mapnum
; i
++)
2848 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2850 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2851 if (*tgt_align
< align
)
2853 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2854 *tgt_size
+= sizes
[i
];
2858 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2861 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2862 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2865 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2867 tgt
+= tgt_align
- al
;
2870 for (i
= 0; i
< mapnum
; i
++)
2871 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2873 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2874 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2875 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2876 hostaddrs
[i
] = tgt
+ tgt_size
;
2877 tgt_size
= tgt_size
+ sizes
[i
];
2878 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2880 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2886 /* Helper function of GOMP_target{,_ext} routines. */
2889 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2890 void (*host_fn
) (void *))
2892 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2893 return (void *) host_fn
;
2896 gomp_mutex_lock (&devicep
->lock
);
2897 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2899 gomp_mutex_unlock (&devicep
->lock
);
2903 struct splay_tree_key_s k
;
2904 k
.host_start
= (uintptr_t) host_fn
;
2905 k
.host_end
= k
.host_start
+ 1;
2906 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2907 gomp_mutex_unlock (&devicep
->lock
);
2911 return (void *) tgt_fn
->tgt_offset
;
2915 /* Called when encountering a target directive. If DEVICE
2916 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2917 GOMP_DEVICE_HOST_FALLBACK (or any value
2918 larger than last available hw device), use host fallback.
2919 FN is address of host code, UNUSED is part of the current ABI, but
2920 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2921 with MAPNUM entries, with addresses of the host objects,
2922 sizes of the host objects (resp. for pointer kind pointer bias
2923 and assumed sizeof (void *) size) and kinds. */
2926 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2927 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2928 unsigned char *kinds
)
2930 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2934 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2935 /* All shared memory devices should use the GOMP_target_ext function. */
2936 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2937 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2938 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2940 htab_t refcount_set
= htab_create (mapnum
);
2941 struct target_mem_desc
*tgt_vars
2942 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2943 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2944 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2946 htab_clear (refcount_set
);
2947 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2948 htab_free (refcount_set
);
2951 static inline unsigned int
2952 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2954 /* If we cannot run asynchronously, simply ignore nowait. */
2955 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2956 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2962 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2964 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2968 void *host_ptr
= &item
->icvs
;
2969 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2970 if (dev_ptr
!= NULL
)
2971 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2972 sizeof (struct gomp_offload_icvs
));
2975 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2976 and several arguments have been added:
2977 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2978 DEPEND is array of dependencies, see GOMP_task for details.
2980 ARGS is a pointer to an array consisting of a variable number of both
2981 device-independent and device-specific arguments, which can take one two
2982 elements where the first specifies for which device it is intended, the type
2983 and optionally also the value. If the value is not present in the first
2984 one, the whole second element the actual value. The last element of the
2985 array is a single NULL. Among the device independent can be for example
2986 NUM_TEAMS and THREAD_LIMIT.
2988 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2989 that value, or 1 if teams construct is not present, or 0, if
2990 teams construct does not have num_teams clause and so the choice is
2991 implementation defined, and -1 if it can't be determined on the host
2992 what value will GOMP_teams have on the device.
2993 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2994 body with that value, or 0, if teams construct does not have thread_limit
2995 clause or the teams construct is not present, or -1 if it can't be
2996 determined on the host what value will GOMP_teams have on the device. */
2999 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
3000 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3001 unsigned int flags
, void **depend
, void **args
)
3003 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3004 size_t tgt_align
= 0, tgt_size
= 0;
3005 bool fpc_done
= false;
3007 /* Obtain the original TEAMS and THREADS values from ARGS. */
3008 intptr_t orig_teams
= 1, orig_threads
= 0;
3009 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
3010 void **tmpargs
= args
;
3013 intptr_t id
= (intptr_t) *tmpargs
++, val
;
3014 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3016 val
= (intptr_t) *tmpargs
++;
3021 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
3025 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
3027 val
= val
> INT_MAX
? INT_MAX
: val
;
3028 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
3033 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
3040 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
3041 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3042 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3043 value could not be determined. No change.
3044 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3045 Set device-specific value.
3046 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3048 if (orig_teams
== -2)
3050 else if (orig_teams
== 0)
3052 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3054 new_teams
= item
->icvs
.nteams
;
3056 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3057 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3058 e.g. a THREAD_LIMIT clause. */
3059 if (orig_teams
> -2 && orig_threads
== 0)
3061 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3063 new_threads
= item
->icvs
.teams_thread_limit
;
3066 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3068 void **new_args
= args
;
3069 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3071 size_t tms_len
= (orig_teams
== new_teams
3073 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3075 size_t ths_len
= (orig_threads
== new_threads
3077 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3079 /* One additional item after the last arg must be NULL. */
3080 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3082 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3085 void **tmp_new_args
= new_args
;
3086 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3087 too if they have not been changed and skipped otherwise. */
3090 intptr_t id
= (intptr_t) *tmpargs
;
3091 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3092 && orig_teams
!= new_teams
)
3093 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3094 && orig_threads
!= new_threads
))
3097 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3102 *tmp_new_args
++ = *tmpargs
++;
3103 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3104 *tmp_new_args
++ = *tmpargs
++;
3108 /* Add the new TEAMS arg to the new args list if it has been changed. */
3109 if (orig_teams
!= new_teams
)
3111 intptr_t new_val
= new_teams
;
3114 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3115 | GOMP_TARGET_ARG_NUM_TEAMS
;
3116 *tmp_new_args
++ = (void *) new_val
;
3120 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3121 | GOMP_TARGET_ARG_NUM_TEAMS
);
3122 *tmp_new_args
++ = (void *) new_val
;
3126 /* Add the new THREADS arg to the new args list if it has been changed. */
3127 if (orig_threads
!= new_threads
)
3129 intptr_t new_val
= new_threads
;
3132 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3133 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3134 *tmp_new_args
++ = (void *) new_val
;
3138 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3139 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3140 *tmp_new_args
++ = (void *) new_val
;
3144 *tmp_new_args
= NULL
;
3147 flags
= clear_unsupported_flags (devicep
, flags
);
3149 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3151 struct gomp_thread
*thr
= gomp_thread ();
3152 /* Create a team if we don't have any around, as nowait
3153 target tasks make sense to run asynchronously even when
3154 outside of any parallel. */
3155 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3157 struct gomp_team
*team
= gomp_new_team (1);
3158 struct gomp_task
*task
= thr
->task
;
3159 struct gomp_task
**implicit_task
= &task
;
3160 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3161 team
->prev_ts
= thr
->ts
;
3162 thr
->ts
.team
= team
;
3163 thr
->ts
.team_id
= 0;
3164 thr
->ts
.work_share
= &team
->work_shares
[0];
3165 thr
->ts
.last_work_share
= NULL
;
3166 #ifdef HAVE_SYNC_BUILTINS
3167 thr
->ts
.single_count
= 0;
3169 thr
->ts
.static_trip
= 0;
3170 thr
->task
= &team
->implicit_task
[0];
3171 gomp_init_task (thr
->task
, NULL
, icv
);
3172 while (*implicit_task
3173 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3174 implicit_task
= &(*implicit_task
)->parent
;
3177 thr
->task
= *implicit_task
;
3179 free (*implicit_task
);
3180 thr
->task
= &team
->implicit_task
[0];
3183 pthread_setspecific (gomp_thread_destructor
, thr
);
3184 if (implicit_task
!= &task
)
3186 *implicit_task
= thr
->task
;
3191 && !thr
->task
->final_task
)
3193 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3194 sizes
, kinds
, flags
, depend
, new_args
,
3195 GOMP_TARGET_TASK_BEFORE_MAP
);
3200 /* If there are depend clauses, but nowait is not present
3201 (or we are in a final task), block the parent task until the
3202 dependencies are resolved and then just continue with the rest
3203 of the function as if it is a merged task. */
3206 struct gomp_thread
*thr
= gomp_thread ();
3207 if (thr
->task
&& thr
->task
->depend_hash
)
3209 /* If we might need to wait, copy firstprivate now. */
3210 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3211 &tgt_align
, &tgt_size
);
3214 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3215 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3216 tgt_align
, tgt_size
);
3219 gomp_task_maybe_wait_for_dependencies (depend
);
3225 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3226 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3227 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3231 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3232 &tgt_align
, &tgt_size
);
3235 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3236 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3237 tgt_align
, tgt_size
);
3240 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3244 struct target_mem_desc
*tgt_vars
;
3245 htab_t refcount_set
= NULL
;
3247 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3251 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3252 &tgt_align
, &tgt_size
);
3255 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3256 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3257 tgt_align
, tgt_size
);
3264 refcount_set
= htab_create (mapnum
);
3265 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3266 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3268 devicep
->run_func (devicep
->target_id
, fn_addr
,
3269 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3273 htab_clear (refcount_set
);
3274 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3277 htab_free (refcount_set
);
3279 /* Copy back ICVs from device to host.
3280 HOST_PTR is expected to exist since it was added in
3281 gomp_load_image_to_device if not already available. */
3282 gomp_copy_back_icvs (devicep
, device
);
3287 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3288 keeping track of all variable handling - assuming that reverse offload occurs
3289 ony very rarely. Downside is that the reverse search is slow. */
3291 struct gomp_splay_tree_rev_lookup_data
{
3292 uintptr_t tgt_start
;
3298 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3300 struct gomp_splay_tree_rev_lookup_data
*data
;
3301 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3302 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3304 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3308 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3309 if (key
->tgt
->list
[j
].key
== key
)
3311 assert (j
< key
->tgt
->list_count
);
3312 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3314 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3315 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3323 static inline splay_tree_key
3324 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3327 struct gomp_splay_tree_rev_lookup_data data
;
3329 data
.tgt_start
= tgt_start
;
3330 data
.tgt_end
= tgt_end
;
3332 if (tgt_start
!= tgt_end
)
3334 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3339 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3340 if (data
.key
!= NULL
|| zero_len
)
3345 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3352 bool present
, aligned
;
3356 /* Search just mapped reverse-offload data; returns index if found,
3360 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3361 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3362 uint64_t tgt_start
, uint64_t tgt_end
)
3364 const bool short_mapkind
= true;
3365 const int typemask
= short_mapkind
? 0xff : 0x7;
3367 for (i
= 0; i
< n
; i
++)
3369 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3370 == GOMP_MAP_STRUCT
);
3373 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3376 if (i
+ sizes
[i
] < n
)
3377 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3379 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3381 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3382 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3391 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3392 unsigned short *kinds
, uint64_t *sizes
,
3393 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3397 if (tgt_start
!= tgt_end
)
3398 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3399 tgt_start
, tgt_end
);
3401 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3402 tgt_start
, tgt_end
);
3403 if (i
< n
|| zero_len
)
3408 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3409 tgt_start
, tgt_end
);
3412 /* Handle reverse offload. This is called by the device plugins for a
3413 reverse offload; it is not called if the outer target runs on the host.
3414 The mapping is simplified device-affecting constructs (except for target
3415 with device(ancestor:1)) must not be encountered; in particular not
3416 target (enter/exit) data. */
3419 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3420 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3421 struct goacc_asyncqueue
*aq
)
3423 /* Return early if there is no offload code. */
3424 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3426 /* Currently, this fails because of calculate_firstprivate_requirements
3427 below; it could be fixed but additional code needs to be updated to
3428 handle 32bit hosts - thus, it is not worthwhile. */
3429 if (sizeof (void *) != sizeof (uint64_t))
3430 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3432 struct cpy_data
*cdata
= NULL
;
3435 unsigned short *kinds
;
3436 const bool short_mapkind
= true;
3437 const int typemask
= short_mapkind
? 0xff : 0x7;
3438 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3440 reverse_splay_tree_key n
;
3441 struct reverse_splay_tree_key_s k
;
3444 gomp_mutex_lock (&devicep
->lock
);
3445 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3446 gomp_mutex_unlock (&devicep
->lock
);
3449 gomp_fatal ("Cannot find reverse-offload function");
3450 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3452 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3454 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3455 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3456 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3460 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3461 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3462 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3463 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3464 (const void *) (uintptr_t) devaddrs_ptr
,
3465 mapnum
* sizeof (uint64_t));
3466 gomp_copy_dev2host (devicep
, aq
, sizes
,
3467 (const void *) (uintptr_t) sizes_ptr
,
3468 mapnum
* sizeof (uint64_t));
3469 gomp_copy_dev2host (devicep
, aq
, kinds
,
3470 (const void *) (uintptr_t) kinds_ptr
,
3471 mapnum
* sizeof (unsigned short));
3472 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3473 exit (EXIT_FAILURE
);
3476 size_t tgt_align
= 0, tgt_size
= 0;
3478 /* If actually executed on 32bit systems, the casts lead to wrong code;
3479 but 32bit with offloading is not supported; see top of this function. */
3480 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3481 (void *) (uintptr_t) kinds
,
3482 &tgt_align
, &tgt_size
);
3486 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3487 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3489 tgt
+= tgt_align
- al
;
3491 for (uint64_t i
= 0; i
< mapnum
; i
++)
3492 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3493 && devaddrs
[i
] != 0)
3495 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3496 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3497 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3498 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3502 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3503 (void *) (uintptr_t) devaddrs
[i
],
3505 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3506 exit (EXIT_FAILURE
);
3508 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3509 tgt_size
= tgt_size
+ sizes
[i
];
3510 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3512 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3513 == GOMP_MAP_ATTACH
))
3515 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3516 = (uint64_t) devaddrs
[i
];
3522 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3524 size_t j
, struct_cpy
= 0;
3526 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3527 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3528 gomp_mutex_lock (&devicep
->lock
);
3529 for (uint64_t i
= 0; i
< mapnum
; i
++)
3531 if (devaddrs
[i
] == 0)
3534 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3537 case GOMP_MAP_FIRSTPRIVATE
:
3538 case GOMP_MAP_FIRSTPRIVATE_INT
:
3541 case GOMP_MAP_DELETE
:
3542 case GOMP_MAP_RELEASE
:
3543 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3544 /* Assume it is present; look it up - but ignore unless the
3545 present clause is there. */
3546 case GOMP_MAP_ALLOC
:
3548 case GOMP_MAP_FORCE_ALLOC
:
3549 case GOMP_MAP_FORCE_FROM
:
3550 case GOMP_MAP_ALWAYS_FROM
:
3552 case GOMP_MAP_TOFROM
:
3553 case GOMP_MAP_FORCE_TO
:
3554 case GOMP_MAP_FORCE_TOFROM
:
3555 case GOMP_MAP_ALWAYS_TO
:
3556 case GOMP_MAP_ALWAYS_TOFROM
:
3557 case GOMP_MAP_FORCE_PRESENT
:
3558 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3559 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3560 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3561 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3562 cdata
[i
].devaddr
= devaddrs
[i
];
3563 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3564 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3565 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3567 devaddrs
[i
] + sizes
[i
], zero_len
);
3571 cdata
[i
].present
= true;
3572 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3576 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3578 devaddrs
[i
] + sizes
[i
], zero_len
);
3579 cdata
[i
].present
= n2
!= NULL
;
3581 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3583 gomp_mutex_unlock (&devicep
->lock
);
3584 #ifdef HAVE_INTTYPES_H
3585 gomp_fatal ("present clause: no corresponding data on "
3586 "parent device at %p with size %"PRIu64
,
3587 (void *) (uintptr_t) devaddrs
[i
],
3588 (uint64_t) sizes
[i
]);
3590 gomp_fatal ("present clause: no corresponding data on "
3591 "parent device at %p with size %lu",
3592 (void *) (uintptr_t) devaddrs
[i
],
3593 (unsigned long) sizes
[i
]);
3597 else if (!cdata
[i
].present
3598 && kind
!= GOMP_MAP_DELETE
3599 && kind
!= GOMP_MAP_RELEASE
3600 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3602 cdata
[i
].aligned
= true;
3603 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3605 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3608 else if (n2
!= NULL
)
3609 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3610 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3611 if (((!cdata
[i
].present
|| struct_cpy
)
3612 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3613 || kind
== GOMP_MAP_FORCE_TO
3614 || kind
== GOMP_MAP_FORCE_TOFROM
3615 || GOMP_MAP_ALWAYS_TO_P (kind
))
3617 gomp_copy_dev2host (devicep
, aq
,
3618 (void *) (uintptr_t) devaddrs
[i
],
3619 (void *) (uintptr_t) cdata
[i
].devaddr
,
3621 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3623 gomp_mutex_unlock (&devicep
->lock
);
3624 exit (EXIT_FAILURE
);
3630 case GOMP_MAP_ATTACH
:
3631 case GOMP_MAP_POINTER
:
3632 case GOMP_MAP_ALWAYS_POINTER
:
3633 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3634 devaddrs
[i
] + sizes
[i
],
3635 devaddrs
[i
] + sizes
[i
]
3636 + sizeof (void*), false);
3637 cdata
[i
].present
= n2
!= NULL
;
3638 cdata
[i
].devaddr
= devaddrs
[i
];
3640 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3641 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3644 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3645 devaddrs
[i
] + sizes
[i
],
3646 devaddrs
[i
] + sizes
[i
]
3647 + sizeof (void*), false);
3650 cdata
[i
].present
= true;
3651 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3652 - cdata
[j
].devaddr
);
3655 if (!cdata
[i
].present
)
3656 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3657 /* Assume that when present, the pointer is already correct. */
3659 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3662 case GOMP_MAP_TO_PSET
:
3663 /* Assume that when present, the pointers are fine and no 'to:'
3665 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3666 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3668 cdata
[i
].present
= n2
!= NULL
;
3669 cdata
[i
].devaddr
= devaddrs
[i
];
3671 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3672 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3675 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3677 devaddrs
[i
] + sizes
[i
], false);
3680 cdata
[i
].present
= true;
3681 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3682 - cdata
[j
].devaddr
);
3685 if (!cdata
[i
].present
)
3687 cdata
[i
].aligned
= true;
3688 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3690 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3692 gomp_copy_dev2host (devicep
, aq
,
3693 (void *) (uintptr_t) devaddrs
[i
],
3694 (void *) (uintptr_t) cdata
[i
].devaddr
,
3696 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3698 gomp_mutex_unlock (&devicep
->lock
);
3699 exit (EXIT_FAILURE
);
3702 for (j
= i
+ 1; j
< mapnum
; j
++)
3704 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3705 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3706 && !GOMP_MAP_POINTER_P (kind
))
3708 if (devaddrs
[j
] < devaddrs
[i
])
3710 if (cdata
[i
].present
)
3712 if (devaddrs
[j
] == 0)
3714 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3719 /* Dereference devaddrs[j] to get the device addr. */
3720 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3721 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3723 cdata
[j
].present
= true;
3724 cdata
[j
].devaddr
= devaddrs
[j
];
3725 if (devaddrs
[j
] == 0)
3727 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3729 devaddrs
[j
] + sizeof (void*),
3732 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3733 - cdata
[k
].devaddr
);
3736 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3738 devaddrs
[j
] + sizeof (void*),
3742 gomp_mutex_unlock (&devicep
->lock
);
3743 gomp_fatal ("Pointer target wasn't mapped");
3745 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3746 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3748 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3749 = (void *) (uintptr_t) devaddrs
[j
];
3753 case GOMP_MAP_STRUCT
:
3754 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3755 devaddrs
[i
+ sizes
[i
]]
3756 + sizes
[i
+ sizes
[i
]], false);
3757 cdata
[i
].present
= n2
!= NULL
;
3758 cdata
[i
].devaddr
= devaddrs
[i
];
3759 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3762 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3764 + sizes
[i
+ sizes
[i
]]);
3765 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3766 cdata
[i
].aligned
= true;
3767 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3768 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3771 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3772 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3775 gomp_mutex_unlock (&devicep
->lock
);
3776 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3779 gomp_mutex_unlock (&devicep
->lock
);
3784 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3786 uint64_t struct_cpy
= 0;
3787 bool clean_struct
= false;
3788 for (uint64_t i
= 0; i
< mapnum
; i
++)
3790 if (cdata
[i
].devaddr
== 0)
3792 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3793 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3796 case GOMP_MAP_FORCE_FROM
:
3797 case GOMP_MAP_FORCE_TOFROM
:
3798 case GOMP_MAP_ALWAYS_FROM
:
3799 case GOMP_MAP_ALWAYS_TOFROM
:
3800 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3801 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3805 case GOMP_MAP_TOFROM
:
3808 gomp_copy_host2dev (devicep
, aq
,
3809 (void *) (uintptr_t) cdata
[i
].devaddr
,
3810 (void *) (uintptr_t) devaddrs
[i
],
3811 sizes
[i
], false, NULL
);
3812 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3813 exit (EXIT_FAILURE
);
3823 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3825 clean_struct
= true;
3826 struct_cpy
= sizes
[i
];
3828 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3829 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3830 else if (!cdata
[i
].present
)
3831 free ((void *) (uintptr_t) devaddrs
[i
]);
3834 for (uint64_t i
= 0; i
< mapnum
; i
++)
3835 if (!cdata
[i
].present
3836 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3837 == GOMP_MAP_STRUCT
))
3839 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3840 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3849 /* Host fallback for GOMP_target_data{,_ext} routines. */
3852 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3854 struct gomp_task_icv
*icv
= gomp_icv (false);
3856 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3858 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3859 "be used for offloading");
3861 if (icv
->target_data
)
3863 /* Even when doing a host fallback, if there are any active
3864 #pragma omp target data constructs, need to remember the
3865 new #pragma omp target data, otherwise GOMP_target_end_data
3866 would get out of sync. */
3867 struct target_mem_desc
*tgt
3868 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3869 NULL
, GOMP_MAP_VARS_DATA
);
3870 tgt
->prev
= icv
->target_data
;
3871 icv
->target_data
= tgt
;
3876 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3877 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3879 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3882 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3883 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3884 return gomp_target_data_fallback (devicep
);
3886 struct target_mem_desc
*tgt
3887 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3888 NULL
, GOMP_MAP_VARS_DATA
);
3889 struct gomp_task_icv
*icv
= gomp_icv (true);
3890 tgt
->prev
= icv
->target_data
;
3891 icv
->target_data
= tgt
;
3895 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3896 size_t *sizes
, unsigned short *kinds
)
3898 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3901 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3902 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3903 return gomp_target_data_fallback (devicep
);
3905 struct target_mem_desc
*tgt
3906 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3907 NULL
, GOMP_MAP_VARS_DATA
);
3908 struct gomp_task_icv
*icv
= gomp_icv (true);
3909 tgt
->prev
= icv
->target_data
;
3910 icv
->target_data
= tgt
;
3914 GOMP_target_end_data (void)
3916 struct gomp_task_icv
*icv
= gomp_icv (false);
3917 if (icv
->target_data
)
3919 struct target_mem_desc
*tgt
= icv
->target_data
;
3920 icv
->target_data
= tgt
->prev
;
3921 gomp_unmap_vars (tgt
, true, NULL
);
3926 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3927 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3929 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3932 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3933 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3936 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3940 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3941 size_t *sizes
, unsigned short *kinds
,
3942 unsigned int flags
, void **depend
)
3944 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3946 /* If there are depend clauses, but nowait is not present,
3947 block the parent task until the dependencies are resolved
3948 and then just continue with the rest of the function as if it
3949 is a merged task. Until we are able to schedule task during
3950 variable mapping or unmapping, ignore nowait if depend clauses
3954 struct gomp_thread
*thr
= gomp_thread ();
3955 if (thr
->task
&& thr
->task
->depend_hash
)
3957 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3959 && !thr
->task
->final_task
)
3961 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3962 mapnum
, hostaddrs
, sizes
, kinds
,
3963 flags
| GOMP_TARGET_FLAG_UPDATE
,
3964 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3969 struct gomp_team
*team
= thr
->ts
.team
;
3970 /* If parallel or taskgroup has been cancelled, don't start new
3972 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3974 if (gomp_team_barrier_cancelled (&team
->barrier
))
3976 if (thr
->task
->taskgroup
)
3978 if (thr
->task
->taskgroup
->cancelled
)
3980 if (thr
->task
->taskgroup
->workshare
3981 && thr
->task
->taskgroup
->prev
3982 && thr
->task
->taskgroup
->prev
->cancelled
)
3987 gomp_task_maybe_wait_for_dependencies (depend
);
3993 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3994 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3997 struct gomp_thread
*thr
= gomp_thread ();
3998 struct gomp_team
*team
= thr
->ts
.team
;
3999 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4000 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4002 if (gomp_team_barrier_cancelled (&team
->barrier
))
4004 if (thr
->task
->taskgroup
)
4006 if (thr
->task
->taskgroup
->cancelled
)
4008 if (thr
->task
->taskgroup
->workshare
4009 && thr
->task
->taskgroup
->prev
4010 && thr
->task
->taskgroup
->prev
->cancelled
)
4015 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
4019 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
4020 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
4021 htab_t
*refcount_set
)
4023 const int typemask
= 0xff;
4025 gomp_mutex_lock (&devicep
->lock
);
4026 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
4028 gomp_mutex_unlock (&devicep
->lock
);
4032 for (i
= 0; i
< mapnum
; i
++)
4033 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
4035 struct splay_tree_key_s cur_node
;
4036 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4037 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
4038 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4041 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
4046 splay_tree_key remove_vars
[mapnum
];
4048 for (i
= 0; i
< mapnum
; i
++)
4050 struct splay_tree_key_s cur_node
;
4051 unsigned char kind
= kinds
[i
] & typemask
;
4055 case GOMP_MAP_ALWAYS_FROM
:
4056 case GOMP_MAP_DELETE
:
4057 case GOMP_MAP_RELEASE
:
4058 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4059 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
4060 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4061 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
4062 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4063 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
4064 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
4065 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4069 bool delete_p
= (kind
== GOMP_MAP_DELETE
4070 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4071 bool do_copy
, do_remove
;
4072 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4075 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4076 || kind
== GOMP_MAP_ALWAYS_FROM
)
4078 if (k
->aux
&& k
->aux
->attach_count
)
4080 /* We have to be careful not to overwrite still attached
4081 pointers during the copyback to host. */
4082 uintptr_t addr
= k
->host_start
;
4083 while (addr
< k
->host_end
)
4085 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4086 if (k
->aux
->attach_count
[i
] == 0)
4087 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4088 (void *) (k
->tgt
->tgt_start
4090 + addr
- k
->host_start
),
4092 addr
+= sizeof (void *);
4096 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4097 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4098 + cur_node
.host_start
4100 cur_node
.host_end
- cur_node
.host_start
);
4103 /* Structure elements lists are removed altogether at once, which
4104 may cause immediate deallocation of the target_mem_desc, causing
4105 errors if we still have following element siblings to copy back.
4106 While we're at it, it also seems more disciplined to simply
4107 queue all removals together for processing below.
4109 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4110 not have this problem, since they maintain an additional
4111 tgt->refcount = 1 reference to the target_mem_desc to start with.
4114 remove_vars
[nrmvars
++] = k
;
4117 case GOMP_MAP_DETACH
:
4120 gomp_mutex_unlock (&devicep
->lock
);
4121 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4126 for (int i
= 0; i
< nrmvars
; i
++)
4127 gomp_remove_var (devicep
, remove_vars
[i
]);
4129 gomp_mutex_unlock (&devicep
->lock
);
4133 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4134 size_t *sizes
, unsigned short *kinds
,
4135 unsigned int flags
, void **depend
)
4137 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4139 /* If there are depend clauses, but nowait is not present,
4140 block the parent task until the dependencies are resolved
4141 and then just continue with the rest of the function as if it
4142 is a merged task. Until we are able to schedule task during
4143 variable mapping or unmapping, ignore nowait if depend clauses
4147 struct gomp_thread
*thr
= gomp_thread ();
4148 if (thr
->task
&& thr
->task
->depend_hash
)
4150 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4152 && !thr
->task
->final_task
)
4154 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4155 mapnum
, hostaddrs
, sizes
, kinds
,
4156 flags
, depend
, NULL
,
4157 GOMP_TARGET_TASK_DATA
))
4162 struct gomp_team
*team
= thr
->ts
.team
;
4163 /* If parallel or taskgroup has been cancelled, don't start new
4165 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4167 if (gomp_team_barrier_cancelled (&team
->barrier
))
4169 if (thr
->task
->taskgroup
)
4171 if (thr
->task
->taskgroup
->cancelled
)
4173 if (thr
->task
->taskgroup
->workshare
4174 && thr
->task
->taskgroup
->prev
4175 && thr
->task
->taskgroup
->prev
->cancelled
)
4180 gomp_task_maybe_wait_for_dependencies (depend
);
4186 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4187 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4190 struct gomp_thread
*thr
= gomp_thread ();
4191 struct gomp_team
*team
= thr
->ts
.team
;
4192 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4193 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4195 if (gomp_team_barrier_cancelled (&team
->barrier
))
4197 if (thr
->task
->taskgroup
)
4199 if (thr
->task
->taskgroup
->cancelled
)
4201 if (thr
->task
->taskgroup
->workshare
4202 && thr
->task
->taskgroup
->prev
4203 && thr
->task
->taskgroup
->prev
->cancelled
)
4208 htab_t refcount_set
= htab_create (mapnum
);
4210 /* The variables are mapped separately such that they can be released
4213 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4214 for (i
= 0; i
< mapnum
; i
++)
4215 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
4216 || (kinds
[i
] & 0xff) == GOMP_MAP_STRUCT_UNORD
)
4218 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4219 &kinds
[i
], true, &refcount_set
,
4220 GOMP_MAP_VARS_ENTER_DATA
);
4223 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4225 for (j
= i
+ 1; j
< mapnum
; j
++)
4226 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4227 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4229 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4230 &kinds
[i
], true, &refcount_set
,
4231 GOMP_MAP_VARS_ENTER_DATA
);
4234 else if (i
+ 1 < mapnum
4235 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4236 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4237 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4239 /* An attach operation must be processed together with the mapped
4240 base-pointer list item. */
4241 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4242 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4246 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4247 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4249 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4250 htab_free (refcount_set
);
4254 gomp_target_task_fn (void *data
)
4256 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4257 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4259 if (ttask
->fn
!= NULL
)
4263 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4264 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4265 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4267 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4268 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4273 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4276 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4280 void *actual_arguments
;
4281 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4284 actual_arguments
= ttask
->hostaddrs
;
4288 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4289 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4290 NULL
, GOMP_MAP_VARS_TARGET
);
4291 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4293 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4295 assert (devicep
->async_run_func
);
4296 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4297 ttask
->args
, (void *) ttask
);
4300 else if (devicep
== NULL
4301 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4302 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4306 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4307 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4308 ttask
->kinds
, true);
4311 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4312 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4313 for (i
= 0; i
< ttask
->mapnum
; i
++)
4314 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
4315 || (ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT_UNORD
)
4317 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4318 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4319 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4320 i
+= ttask
->sizes
[i
];
4323 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4324 &ttask
->kinds
[i
], true, &refcount_set
,
4325 GOMP_MAP_VARS_ENTER_DATA
);
4327 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4328 ttask
->kinds
, &refcount_set
);
4329 htab_free (refcount_set
);
4335 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4339 struct gomp_task_icv
*icv
= gomp_icv (true);
4340 icv
->thread_limit_var
4341 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4347 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4348 unsigned int thread_limit
, bool first
)
4350 struct gomp_thread
*thr
= gomp_thread ();
4355 struct gomp_task_icv
*icv
= gomp_icv (true);
4356 icv
->thread_limit_var
4357 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4359 (void) num_teams_high
;
4360 if (num_teams_low
== 0)
4362 thr
->num_teams
= num_teams_low
- 1;
4365 else if (thr
->team_num
== thr
->num_teams
)
4373 omp_target_alloc (size_t size
, int device_num
)
4375 if (device_num
== omp_initial_device
4376 || device_num
== gomp_get_num_devices ())
4377 return malloc (size
);
4379 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4380 if (devicep
== NULL
)
4383 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4384 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4385 return malloc (size
);
4387 gomp_mutex_lock (&devicep
->lock
);
4388 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4389 gomp_mutex_unlock (&devicep
->lock
);
4394 omp_target_free (void *device_ptr
, int device_num
)
4396 if (device_num
== omp_initial_device
4397 || device_num
== gomp_get_num_devices ())
4403 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4404 if (devicep
== NULL
|| device_ptr
== NULL
)
4407 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4408 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4414 gomp_mutex_lock (&devicep
->lock
);
4415 gomp_free_device_memory (devicep
, device_ptr
);
4416 gomp_mutex_unlock (&devicep
->lock
);
4420 omp_target_is_present (const void *ptr
, int device_num
)
4422 if (device_num
== omp_initial_device
4423 || device_num
== gomp_get_num_devices ())
4426 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4427 if (devicep
== NULL
)
4433 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4434 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4437 gomp_mutex_lock (&devicep
->lock
);
4438 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4439 struct splay_tree_key_s cur_node
;
4441 cur_node
.host_start
= (uintptr_t) ptr
;
4442 cur_node
.host_end
= cur_node
.host_start
;
4443 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4444 int ret
= n
!= NULL
;
4445 gomp_mutex_unlock (&devicep
->lock
);
4450 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4451 struct gomp_device_descr
**dst_devicep
,
4452 struct gomp_device_descr
**src_devicep
)
4454 if (dst_device_num
!= gomp_get_num_devices ()
4455 /* Above gomp_get_num_devices has to be called unconditionally. */
4456 && dst_device_num
!= omp_initial_device
)
4458 *dst_devicep
= resolve_device (dst_device_num
, false);
4459 if (*dst_devicep
== NULL
)
4462 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4463 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4464 *dst_devicep
= NULL
;
4467 if (src_device_num
!= num_devices_openmp
4468 && src_device_num
!= omp_initial_device
)
4470 *src_devicep
= resolve_device (src_device_num
, false);
4471 if (*src_devicep
== NULL
)
4474 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4475 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4476 *src_devicep
= NULL
;
4483 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4484 size_t dst_offset
, size_t src_offset
,
4485 struct gomp_device_descr
*dst_devicep
,
4486 struct gomp_device_descr
*src_devicep
)
4489 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4491 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4494 if (src_devicep
== NULL
)
4496 gomp_mutex_lock (&dst_devicep
->lock
);
4497 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4498 (char *) dst
+ dst_offset
,
4499 (char *) src
+ src_offset
, length
);
4500 gomp_mutex_unlock (&dst_devicep
->lock
);
4501 return (ret
? 0 : EINVAL
);
4503 if (dst_devicep
== NULL
)
4505 gomp_mutex_lock (&src_devicep
->lock
);
4506 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4507 (char *) dst
+ dst_offset
,
4508 (char *) src
+ src_offset
, length
);
4509 gomp_mutex_unlock (&src_devicep
->lock
);
4510 return (ret
? 0 : EINVAL
);
4512 if (src_devicep
== dst_devicep
)
4514 gomp_mutex_lock (&src_devicep
->lock
);
4515 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4516 (char *) dst
+ dst_offset
,
4517 (char *) src
+ src_offset
, length
);
4518 gomp_mutex_unlock (&src_devicep
->lock
);
4519 return (ret
? 0 : EINVAL
);
4525 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4526 size_t src_offset
, int dst_device_num
, int src_device_num
)
4528 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4529 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4530 &dst_devicep
, &src_devicep
);
4535 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4536 dst_devicep
, src_devicep
);
4548 struct gomp_device_descr
*dst_devicep
;
4549 struct gomp_device_descr
*src_devicep
;
4550 } omp_target_memcpy_data
;
4553 omp_target_memcpy_async_helper (void *args
)
4555 omp_target_memcpy_data
*a
= args
;
4556 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4557 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4558 gomp_fatal ("omp_target_memcpy failed");
4562 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4563 size_t dst_offset
, size_t src_offset
,
4564 int dst_device_num
, int src_device_num
,
4565 int depobj_count
, omp_depend_t
*depobj_list
)
4567 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4568 unsigned int flags
= 0;
4569 void *depend
[depobj_count
+ 5];
4571 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4572 &dst_devicep
, &src_devicep
);
4574 omp_target_memcpy_data s
= {
4578 .dst_offset
= dst_offset
,
4579 .src_offset
= src_offset
,
4580 .dst_devicep
= dst_devicep
,
4581 .src_devicep
= src_devicep
4587 if (depobj_count
> 0 && depobj_list
!= NULL
)
4589 flags
|= GOMP_TASK_FLAG_DEPEND
;
4591 depend
[1] = (void *) (uintptr_t) depobj_count
;
4592 depend
[2] = depend
[3] = depend
[4] = 0;
4593 for (i
= 0; i
< depobj_count
; ++i
)
4594 depend
[i
+ 5] = &depobj_list
[i
];
4597 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4598 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4604 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4605 int num_dims
, const size_t *volume
,
4606 const size_t *dst_offsets
,
4607 const size_t *src_offsets
,
4608 const size_t *dst_dimensions
,
4609 const size_t *src_dimensions
,
4610 struct gomp_device_descr
*dst_devicep
,
4611 struct gomp_device_descr
*src_devicep
,
4612 size_t *tmp_size
, void **tmp
)
4614 size_t dst_slice
= element_size
;
4615 size_t src_slice
= element_size
;
4616 size_t j
, dst_off
, src_off
, length
;
4621 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4622 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4623 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4625 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4627 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4631 else if (src_devicep
== NULL
)
4632 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4633 (char *) dst
+ dst_off
,
4634 (const char *) src
+ src_off
,
4636 else if (dst_devicep
== NULL
)
4637 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4638 (char *) dst
+ dst_off
,
4639 (const char *) src
+ src_off
,
4641 else if (src_devicep
== dst_devicep
)
4642 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4643 (char *) dst
+ dst_off
,
4644 (const char *) src
+ src_off
,
4651 *tmp
= malloc (length
);
4655 else if (*tmp_size
< length
)
4659 *tmp
= malloc (length
);
4663 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
, *tmp
,
4664 (const char *) src
+ src_off
,
4667 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4668 (char *) dst
+ dst_off
, *tmp
,
4671 return ret
? 0 : EINVAL
;
4674 /* host->device, device->host and intra device. */
4677 && src_devicep
== dst_devicep
4678 && src_devicep
->memcpy2d_func
)
4679 || (!src_devicep
!= !dst_devicep
4680 && ((src_devicep
&& src_devicep
->memcpy2d_func
)
4681 || (dst_devicep
&& dst_devicep
->memcpy2d_func
)))))
4683 size_t vol_sz1
, dst_sz1
, src_sz1
, dst_off_sz1
, src_off_sz1
;
4684 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4685 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4686 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4688 if (__builtin_mul_overflow (volume
[1], element_size
, &vol_sz1
)
4689 || __builtin_mul_overflow (dst_dimensions
[1], element_size
, &dst_sz1
)
4690 || __builtin_mul_overflow (src_dimensions
[1], element_size
, &src_sz1
)
4691 || __builtin_mul_overflow (dst_offsets
[1], element_size
, &dst_off_sz1
)
4692 || __builtin_mul_overflow (src_offsets
[1], element_size
,
4695 ret
= devp
->memcpy2d_func (dst_id
, src_id
, vol_sz1
, volume
[0],
4696 dst
, dst_off_sz1
, dst_offsets
[0], dst_sz1
,
4697 src
, src_off_sz1
, src_offsets
[0], src_sz1
);
4699 return ret
? 0 : EINVAL
;
4701 else if (num_dims
== 3
4703 && src_devicep
== dst_devicep
4704 && src_devicep
->memcpy3d_func
)
4705 || (!src_devicep
!= !dst_devicep
4706 && ((src_devicep
&& src_devicep
->memcpy3d_func
)
4707 || (dst_devicep
&& dst_devicep
->memcpy3d_func
)))))
4709 size_t vol_sz2
, dst_sz2
, src_sz2
, dst_off_sz2
, src_off_sz2
;
4710 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4711 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4712 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4714 if (__builtin_mul_overflow (volume
[2], element_size
, &vol_sz2
)
4715 || __builtin_mul_overflow (dst_dimensions
[2], element_size
, &dst_sz2
)
4716 || __builtin_mul_overflow (src_dimensions
[2], element_size
, &src_sz2
)
4717 || __builtin_mul_overflow (dst_offsets
[2], element_size
, &dst_off_sz2
)
4718 || __builtin_mul_overflow (src_offsets
[2], element_size
,
4721 ret
= devp
->memcpy3d_func (dst_id
, src_id
, vol_sz2
, volume
[1], volume
[0],
4722 dst
, dst_off_sz2
, dst_offsets
[1],
4723 dst_offsets
[0], dst_sz2
, dst_dimensions
[1],
4724 src
, src_off_sz2
, src_offsets
[1],
4725 src_offsets
[0], src_sz2
, src_dimensions
[1]);
4727 return ret
? 0 : EINVAL
;
4730 for (i
= 1; i
< num_dims
; i
++)
4731 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4732 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4734 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4735 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4737 for (j
= 0; j
< volume
[0]; j
++)
4739 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4740 (const char *) src
+ src_off
,
4741 element_size
, num_dims
- 1,
4742 volume
+ 1, dst_offsets
+ 1,
4743 src_offsets
+ 1, dst_dimensions
+ 1,
4744 src_dimensions
+ 1, dst_devicep
,
4745 src_devicep
, tmp_size
, tmp
);
4748 dst_off
+= dst_slice
;
4749 src_off
+= src_slice
;
4755 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4757 struct gomp_device_descr
**dst_devicep
,
4758 struct gomp_device_descr
**src_devicep
)
4763 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4764 dst_devicep
, src_devicep
);
4772 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4773 size_t element_size
, int num_dims
,
4774 const size_t *volume
, const size_t *dst_offsets
,
4775 const size_t *src_offsets
,
4776 const size_t *dst_dimensions
,
4777 const size_t *src_dimensions
,
4778 struct gomp_device_descr
*dst_devicep
,
4779 struct gomp_device_descr
*src_devicep
)
4781 size_t tmp_size
= 0;
4786 lock_src
= src_devicep
!= NULL
;
4787 lock_dst
= dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
;
4789 gomp_mutex_lock (&src_devicep
->lock
);
4791 gomp_mutex_lock (&dst_devicep
->lock
);
4792 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4793 volume
, dst_offsets
, src_offsets
,
4794 dst_dimensions
, src_dimensions
,
4795 dst_devicep
, src_devicep
,
4798 gomp_mutex_unlock (&src_devicep
->lock
);
4800 gomp_mutex_unlock (&dst_devicep
->lock
);
4808 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4809 int num_dims
, const size_t *volume
,
4810 const size_t *dst_offsets
,
4811 const size_t *src_offsets
,
4812 const size_t *dst_dimensions
,
4813 const size_t *src_dimensions
,
4814 int dst_device_num
, int src_device_num
)
4816 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4818 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4819 src_device_num
, &dst_devicep
,
4825 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4826 volume
, dst_offsets
, src_offsets
,
4827 dst_dimensions
, src_dimensions
,
4828 dst_devicep
, src_devicep
);
4837 size_t element_size
;
4838 const size_t *volume
;
4839 const size_t *dst_offsets
;
4840 const size_t *src_offsets
;
4841 const size_t *dst_dimensions
;
4842 const size_t *src_dimensions
;
4843 struct gomp_device_descr
*dst_devicep
;
4844 struct gomp_device_descr
*src_devicep
;
4846 } omp_target_memcpy_rect_data
;
4849 omp_target_memcpy_rect_async_helper (void *args
)
4851 omp_target_memcpy_rect_data
*a
= args
;
4852 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4853 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4854 a
->src_offsets
, a
->dst_dimensions
,
4855 a
->src_dimensions
, a
->dst_devicep
,
4858 gomp_fatal ("omp_target_memcpy_rect failed");
4862 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4863 int num_dims
, const size_t *volume
,
4864 const size_t *dst_offsets
,
4865 const size_t *src_offsets
,
4866 const size_t *dst_dimensions
,
4867 const size_t *src_dimensions
,
4868 int dst_device_num
, int src_device_num
,
4869 int depobj_count
, omp_depend_t
*depobj_list
)
4871 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4873 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4874 src_device_num
, &dst_devicep
,
4876 void *depend
[depobj_count
+ 5];
4879 omp_target_memcpy_rect_data s
= {
4882 .element_size
= element_size
,
4883 .num_dims
= num_dims
,
4885 .dst_offsets
= dst_offsets
,
4886 .src_offsets
= src_offsets
,
4887 .dst_dimensions
= dst_dimensions
,
4888 .src_dimensions
= src_dimensions
,
4889 .dst_devicep
= dst_devicep
,
4890 .src_devicep
= src_devicep
4896 if (depobj_count
> 0 && depobj_list
!= NULL
)
4898 flags
|= GOMP_TASK_FLAG_DEPEND
;
4900 depend
[1] = (void *) (uintptr_t) depobj_count
;
4901 depend
[2] = depend
[3] = depend
[4] = 0;
4902 for (i
= 0; i
< depobj_count
; ++i
)
4903 depend
[i
+ 5] = &depobj_list
[i
];
4906 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4907 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4913 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4914 size_t size
, size_t device_offset
, int device_num
)
4916 if (device_num
== omp_initial_device
4917 || device_num
== gomp_get_num_devices ())
4920 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4921 if (devicep
== NULL
)
4924 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4925 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4928 gomp_mutex_lock (&devicep
->lock
);
4930 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4931 struct splay_tree_key_s cur_node
;
4934 cur_node
.host_start
= (uintptr_t) host_ptr
;
4935 cur_node
.host_end
= cur_node
.host_start
+ size
;
4936 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4939 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4940 == (uintptr_t) device_ptr
+ device_offset
4941 && n
->host_start
<= cur_node
.host_start
4942 && n
->host_end
>= cur_node
.host_end
)
4947 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4948 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4952 tgt
->to_free
= NULL
;
4954 tgt
->list_count
= 0;
4955 tgt
->device_descr
= devicep
;
4956 splay_tree_node array
= tgt
->array
;
4957 splay_tree_key k
= &array
->key
;
4958 k
->host_start
= cur_node
.host_start
;
4959 k
->host_end
= cur_node
.host_end
;
4961 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4962 k
->refcount
= REFCOUNT_INFINITY
;
4963 k
->dynamic_refcount
= 0;
4966 array
->right
= NULL
;
4967 splay_tree_insert (&devicep
->mem_map
, array
);
4970 gomp_mutex_unlock (&devicep
->lock
);
4975 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4977 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4978 if (devicep
== NULL
)
4981 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4984 gomp_mutex_lock (&devicep
->lock
);
4986 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4987 struct splay_tree_key_s cur_node
;
4990 cur_node
.host_start
= (uintptr_t) ptr
;
4991 cur_node
.host_end
= cur_node
.host_start
;
4992 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4994 && n
->host_start
== cur_node
.host_start
4995 && n
->refcount
== REFCOUNT_INFINITY
4996 && n
->tgt
->tgt_start
== 0
4997 && n
->tgt
->to_free
== NULL
4998 && n
->tgt
->refcount
== 1
4999 && n
->tgt
->list_count
== 0)
5001 splay_tree_remove (&devicep
->mem_map
, n
);
5002 gomp_unmap_tgt (n
->tgt
);
5006 gomp_mutex_unlock (&devicep
->lock
);
5011 omp_get_mapped_ptr (const void *ptr
, int device_num
)
5013 if (device_num
== omp_initial_device
5014 || device_num
== omp_get_initial_device ())
5015 return (void *) ptr
;
5017 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5018 if (devicep
== NULL
)
5021 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5022 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5023 return (void *) ptr
;
5025 gomp_mutex_lock (&devicep
->lock
);
5027 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5028 struct splay_tree_key_s cur_node
;
5031 cur_node
.host_start
= (uintptr_t) ptr
;
5032 cur_node
.host_end
= cur_node
.host_start
;
5033 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
5037 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
5038 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
5041 gomp_mutex_unlock (&devicep
->lock
);
5047 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
5049 if (device_num
== omp_initial_device
5050 || device_num
== gomp_get_num_devices ())
5053 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5054 if (devicep
== NULL
)
5057 /* TODO: Unified shared memory must be handled when available. */
5059 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
5063 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
5066 if (device_num
== omp_initial_device
5067 || device_num
== gomp_get_num_devices ())
5068 return gomp_pause_host ();
5070 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5071 if (devicep
== NULL
)
5074 /* Do nothing for target devices for now. */
5079 omp_pause_resource_all (omp_pause_resource_t kind
)
5082 if (gomp_pause_host ())
5084 /* Do nothing for target devices for now. */
5088 ialias (omp_pause_resource
)
5089 ialias (omp_pause_resource_all
)
5091 #ifdef PLUGIN_SUPPORT
5093 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5095 The handles of the found functions are stored in the corresponding fields
5096 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5099 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
5100 const char *plugin_name
)
5102 const char *err
= NULL
, *last_missing
= NULL
;
5104 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
5106 #if OFFLOAD_DEFAULTED
5112 /* Check if all required functions are available in the plugin and store
5113 their handlers. None of the symbols can legitimately be NULL,
5114 so we don't need to check dlerror all the time. */
5116 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5118 /* Similar, but missing functions are not an error. Return false if
5119 failed, true otherwise. */
5120 #define DLSYM_OPT(f, n) \
5121 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5122 || (last_missing = #n, 0))
5125 if (device
->version_func () != GOMP_VERSION
)
5127 err
= "plugin version mismatch";
5134 DLSYM (get_num_devices
);
5135 DLSYM (init_device
);
5136 DLSYM (fini_device
);
5138 DLSYM (unload_image
);
5143 DLSYM_OPT (memcpy2d
, memcpy2d
);
5144 DLSYM_OPT (memcpy3d
, memcpy3d
);
5145 device
->capabilities
= device
->get_caps_func ();
5146 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5149 DLSYM_OPT (async_run
, async_run
);
5150 DLSYM_OPT (can_run
, can_run
);
5153 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5155 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
5156 || !DLSYM_OPT (openacc
.create_thread_data
,
5157 openacc_create_thread_data
)
5158 || !DLSYM_OPT (openacc
.destroy_thread_data
,
5159 openacc_destroy_thread_data
)
5160 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
5161 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
5162 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
5163 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
5164 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
5165 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5166 openacc_async_queue_callback
)
5167 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5168 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5169 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5170 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5172 /* Require all the OpenACC handlers if we have
5173 GOMP_OFFLOAD_CAP_OPENACC_200. */
5174 err
= "plugin missing OpenACC handler function";
5179 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5180 openacc_cuda_get_current_device
);
5181 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5182 openacc_cuda_get_current_context
);
5183 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5184 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5185 if (cuda
&& cuda
!= 4)
5187 /* Make sure all the CUDA functions are there if any of them are. */
5188 err
= "plugin missing OpenACC CUDA handler function";
5200 gomp_error ("while loading %s: %s", plugin_name
, err
);
5202 gomp_error ("missing function was %s", last_missing
);
5204 dlclose (plugin_handle
);
5209 /* This function finalizes all initialized devices. */
5212 gomp_target_fini (void)
5215 for (i
= 0; i
< num_devices
; i
++)
5218 struct gomp_device_descr
*devicep
= &devices
[i
];
5219 gomp_mutex_lock (&devicep
->lock
);
5220 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5221 ret
= gomp_fini_device (devicep
);
5222 gomp_mutex_unlock (&devicep
->lock
);
5224 gomp_fatal ("device finalization failed");
5228 /* This function initializes the runtime for offloading.
5229 It parses the list of offload plugins, and tries to load these.
5230 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5231 will be set, and the array DEVICES initialized, containing descriptors for
5232 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5236 gomp_target_init (void)
5238 const char *prefix
="libgomp-plugin-";
5239 const char *suffix
= SONAME_SUFFIX (1);
5240 const char *cur
, *next
;
5242 int i
, new_num_devs
;
5243 int num_devs
= 0, num_devs_openmp
;
5244 struct gomp_device_descr
*devs
= NULL
;
5246 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5249 cur
= OFFLOAD_PLUGINS
;
5253 struct gomp_device_descr current_device
;
5254 size_t prefix_len
, suffix_len
, cur_len
;
5256 next
= strchr (cur
, ',');
5258 prefix_len
= strlen (prefix
);
5259 cur_len
= next
? next
- cur
: strlen (cur
);
5260 suffix_len
= strlen (suffix
);
5262 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5269 memcpy (plugin_name
, prefix
, prefix_len
);
5270 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5271 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5273 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5275 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5276 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5277 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5280 int type
= current_device
.get_type_func ();
5281 for (int img
= 0; img
< num_offload_images
; img
++)
5282 if (type
== offload_images
[img
].type
)
5286 char buf
[sizeof ("unified_address, unified_shared_memory, "
5287 "reverse_offload")];
5288 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5289 char *name
= (char *) malloc (cur_len
+ 1);
5290 memcpy (name
, cur
, cur_len
);
5291 name
[cur_len
] = '\0';
5293 "%s devices present but 'omp requires %s' "
5294 "cannot be fulfilled\n", name
, buf
);
5298 else if (new_num_devs
>= 1)
5300 /* Augment DEVICES and NUM_DEVICES. */
5302 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5303 * sizeof (struct gomp_device_descr
));
5311 current_device
.name
= current_device
.get_name_func ();
5312 /* current_device.capabilities has already been set. */
5313 current_device
.type
= current_device
.get_type_func ();
5314 current_device
.mem_map
.root
= NULL
;
5315 current_device
.mem_map_rev
.root
= NULL
;
5316 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5317 for (i
= 0; i
< new_num_devs
; i
++)
5319 current_device
.target_id
= i
;
5320 devs
[num_devs
] = current_device
;
5321 gomp_mutex_init (&devs
[num_devs
].lock
);
5332 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5333 NUM_DEVICES_OPENMP. */
5334 struct gomp_device_descr
*devs_s
5335 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5342 num_devs_openmp
= 0;
5343 for (i
= 0; i
< num_devs
; i
++)
5344 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5345 devs_s
[num_devs_openmp
++] = devs
[i
];
5346 int num_devs_after_openmp
= num_devs_openmp
;
5347 for (i
= 0; i
< num_devs
; i
++)
5348 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5349 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5353 for (i
= 0; i
< num_devs
; i
++)
5355 /* The 'devices' array can be moved (by the realloc call) until we have
5356 found all the plugins, so registering with the OpenACC runtime (which
5357 takes a copy of the pointer argument) must be delayed until now. */
5358 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5359 goacc_register (&devs
[i
]);
5361 if (gomp_global_icv
.default_device_var
== INT_MIN
)
5363 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5364 struct gomp_icv_list
*none
;
5365 none
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX
);
5366 gomp_global_icv
.default_device_var
= (num_devs_openmp
5367 ? 0 : omp_invalid_device
);
5368 none
->icvs
.default_device_var
= gomp_global_icv
.default_device_var
;
5371 num_devices
= num_devs
;
5372 num_devices_openmp
= num_devs_openmp
;
5374 if (atexit (gomp_target_fini
) != 0)
5375 gomp_fatal ("atexit failed");
5378 #else /* PLUGIN_SUPPORT */
5379 /* If dlfcn.h is unavailable we always fallback to host execution.
5380 GOMP_target* routines are just stubs for this case. */
5382 gomp_target_init (void)
5385 #endif /* PLUGIN_SUPPORT */