1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 typedef uintptr_t *hash_entry_type
;
48 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
49 static inline void htab_free (void *ptr
) { free (ptr
); }
52 static inline hashval_t
53 htab_hash (hash_entry_type element
)
55 return hash_pointer ((void *) element
);
59 htab_eq (hash_entry_type x
, hash_entry_type y
)
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock
;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr
{
79 enum offload_target_type type
;
80 const void *host_table
;
81 const void *target_data
;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr
*offload_images
;
87 /* Total number of offload images. */
88 static int num_offload_images
;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr
*devices
;
93 /* Total number of available devices. */
94 static int num_devices
;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp
;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
102 gomp_realloc_unlock (void *old
, size_t size
)
104 void *ret
= realloc (old
, size
);
107 gomp_mutex_unlock (®ister_lock
);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
113 attribute_hidden
void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp
;
126 static struct gomp_device_descr
*
127 resolve_device (int device_id
)
129 if (device_id
== GOMP_DEVICE_ICV
)
131 struct gomp_task_icv
*icv
= gomp_icv (false);
132 device_id
= icv
->default_device_var
;
135 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
137 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
139 && device_id
!= num_devices_openmp
)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
146 gomp_mutex_lock (&devices
[device_id
].lock
);
147 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
148 gomp_init_device (&devices
[device_id
]);
149 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
151 gomp_mutex_unlock (&devices
[device_id
].lock
);
153 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
159 gomp_mutex_unlock (&devices
[device_id
].lock
);
161 return &devices
[device_id
];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
168 if (key
->host_start
!= key
->host_end
)
169 return splay_tree_lookup (mem_map
, key
);
172 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
177 n
= splay_tree_lookup (mem_map
, key
);
181 return splay_tree_lookup (mem_map
, key
);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
187 if (key
->host_start
!= key
->host_end
)
188 return splay_tree_lookup (mem_map
, key
);
191 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
197 gomp_device_copy (struct gomp_device_descr
*devicep
,
198 bool (*copy_func
) (int, void *, const void *, size_t),
199 const char *dst
, void *dstaddr
,
200 const char *src
, const void *srcaddr
,
203 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
205 gomp_mutex_unlock (&devicep
->lock
);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
212 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
213 bool (*copy_func
) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue
*),
215 const char *dst
, void *dstaddr
,
216 const char *src
, const void *srcaddr
,
217 const void *srcaddr_orig
,
218 size_t size
, struct goacc_asyncqueue
*aq
)
220 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
222 gomp_mutex_unlock (&devicep
->lock
);
223 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src
, srcaddr_orig
, srcaddr_orig
+ size
,
228 src
, srcaddr
, srcaddr
+ size
,
229 dst
, dstaddr
, dstaddr
+ size
);
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src
, srcaddr
, srcaddr
+ size
,
234 dst
, dstaddr
, dstaddr
+ size
);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
252 struct target_mem_desc
*tgt
;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk
*chunks
;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
290 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
294 if (cbuf
->chunk_cnt
< 0)
296 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
298 cbuf
->chunk_cnt
= -1;
301 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
303 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf
->use_cnt
== 1)
313 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
314 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
323 gomp_to_device_kind_p (int kind
)
329 case GOMP_MAP_FORCE_ALLOC
:
330 case GOMP_MAP_FORCE_FROM
:
331 case GOMP_MAP_ALWAYS_FROM
:
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
343 attribute_hidden
void
344 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
345 struct goacc_asyncqueue
*aq
,
346 void *d
, const void *h
, size_t sz
,
347 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
349 if (__builtin_expect (aq
!= NULL
, 0))
351 /* See 'gomp_coalesce_buf_add'. */
354 void *h_buf
= (void *) h
;
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf
= gomp_malloc (sz
);
362 memcpy (h_buf
, h
, sz
);
364 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
365 "dev", d
, "host", h_buf
, h
, sz
, aq
);
367 /* Free temporary buffer once the transfer has completed. */
368 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
375 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
376 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
379 long last
= cbuf
->chunk_cnt
- 1;
380 while (first
<= last
)
382 long middle
= (first
+ last
) >> 1;
383 if (cbuf
->chunks
[middle
].end
<= doff
)
385 else if (cbuf
->chunks
[middle
].start
<= doff
)
387 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
389 gomp_mutex_unlock (&devicep
->lock
);
390 gomp_fatal ("internal libgomp cbuf error");
392 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
402 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
405 attribute_hidden
void
406 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
407 struct goacc_asyncqueue
*aq
,
408 void *h
, const void *d
, size_t sz
)
410 if (__builtin_expect (aq
!= NULL
, 0))
411 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
412 "host", h
, "dev", d
, NULL
, sz
, aq
);
414 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
418 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
420 if (!devicep
->free_func (devicep
->target_id
, devptr
))
422 gomp_mutex_unlock (&devicep
->lock
);
423 gomp_fatal ("error in freeing device memory block at %p", devptr
);
427 /* Increment reference count of a splay_tree_key region K by 1.
428 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429 increment the value if refcount is not yet contained in the set (used for
430 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431 once for each construct). */
434 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
436 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
439 uintptr_t *refcount_ptr
= &k
->refcount
;
441 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
442 refcount_ptr
= &k
->structelem_refcount
;
443 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
444 refcount_ptr
= k
->structelem_refcount_ptr
;
448 if (htab_find (*refcount_set
, refcount_ptr
))
450 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
451 *slot
= refcount_ptr
;
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460 track already seen refcounts, and only adjust the value if refcount is not
461 yet contained in the set (like gomp_increment_refcount).
463 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464 it is already zero and we know we decremented it earlier. This signals that
465 associated maps should be copied back to host.
467 *DO_REMOVE is set to true when we this is the first handling of this refcount
468 and we are setting it to zero. This signals a removal of this key from the
471 Copy and removal are separated due to cases like handling of structure
472 elements, e.g. each map of a structure element representing a possible copy
473 out of a structure field has to be handled individually, but we only signal
474 removal for one (the first encountered) sibing map. */
477 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
478 bool *do_copy
, bool *do_remove
)
480 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
482 *do_copy
= *do_remove
= false;
486 uintptr_t *refcount_ptr
= &k
->refcount
;
488 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
489 refcount_ptr
= &k
->structelem_refcount
;
490 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
491 refcount_ptr
= k
->structelem_refcount_ptr
;
493 bool new_encountered_refcount
;
494 bool set_to_zero
= false;
495 bool is_zero
= false;
497 uintptr_t orig_refcount
= *refcount_ptr
;
501 if (htab_find (*refcount_set
, refcount_ptr
))
503 new_encountered_refcount
= false;
507 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
508 *slot
= refcount_ptr
;
509 new_encountered_refcount
= true;
512 /* If no refcount_set being used, assume all keys are being decremented
513 for the first time. */
514 new_encountered_refcount
= true;
518 else if (*refcount_ptr
> 0)
522 if (*refcount_ptr
== 0)
524 if (orig_refcount
> 0)
530 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
531 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535 gomp_map_0len_lookup found oldn for newn.
536 Helper function of gomp_map_vars. */
539 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
540 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
541 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
542 unsigned char kind
, bool always_to_flag
, bool implicit
,
543 struct gomp_coalesce_buf
*cbuf
,
544 htab_t
*refcount_set
)
546 assert (kind
!= GOMP_MAP_ATTACH
547 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
550 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
551 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
552 tgt_var
->is_attach
= false;
553 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
555 /* For implicit maps, old contained in new is valid. */
556 bool implicit_subset
= (implicit
557 && newn
->host_start
<= oldn
->host_start
558 && oldn
->host_end
<= newn
->host_end
);
560 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
562 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
564 if ((kind
& GOMP_MAP_FLAG_FORCE
)
565 /* For implicit maps, old contained in new is valid. */
567 /* Otherwise, new contained inside old is considered valid. */
568 || (oldn
->host_start
<= newn
->host_start
569 && newn
->host_end
<= oldn
->host_end
)))
571 gomp_mutex_unlock (&devicep
->lock
);
572 gomp_fatal ("Trying to map into device [%p..%p) object when "
573 "[%p..%p) is already mapped",
574 (void *) newn
->host_start
, (void *) newn
->host_end
,
575 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
578 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
580 /* Implicit + always should not happen. If this does occur, below
581 address/length adjustment is a TODO. */
582 assert (!implicit_subset
);
584 if (oldn
->aux
&& oldn
->aux
->attach_count
)
586 /* We have to be careful not to overwrite still attached pointers
587 during the copyback to host. */
588 uintptr_t addr
= newn
->host_start
;
589 while (addr
< newn
->host_end
)
591 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
592 if (oldn
->aux
->attach_count
[i
] == 0)
593 gomp_copy_host2dev (devicep
, aq
,
594 (void *) (oldn
->tgt
->tgt_start
596 + addr
- oldn
->host_start
),
598 sizeof (void *), false, cbuf
);
599 addr
+= sizeof (void *);
603 gomp_copy_host2dev (devicep
, aq
,
604 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
605 + newn
->host_start
- oldn
->host_start
),
606 (void *) newn
->host_start
,
607 newn
->host_end
- newn
->host_start
, false, cbuf
);
610 gomp_increment_refcount (oldn
, refcount_set
);
614 get_kind (bool short_mapkind
, void *kinds
, int idx
)
617 return ((unsigned char *) kinds
)[idx
];
619 int val
= ((unsigned short *) kinds
)[idx
];
620 if (GOMP_MAP_IMPLICIT_P (val
))
621 val
&= ~GOMP_MAP_IMPLICIT
;
627 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
632 int val
= ((unsigned short *) kinds
)[idx
];
633 return GOMP_MAP_IMPLICIT_P (val
);
637 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
638 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
639 struct gomp_coalesce_buf
*cbuf
,
640 bool allow_zero_length_array_sections
)
642 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
643 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
644 struct splay_tree_key_s cur_node
;
646 cur_node
.host_start
= host_ptr
;
647 if (cur_node
.host_start
== (uintptr_t) NULL
)
649 cur_node
.tgt_offset
= (uintptr_t) NULL
;
650 gomp_copy_host2dev (devicep
, aq
,
651 (void *) (tgt
->tgt_start
+ target_offset
),
652 (void *) &cur_node
.tgt_offset
, sizeof (void *),
656 /* Add bias to the pointer value. */
657 cur_node
.host_start
+= bias
;
658 cur_node
.host_end
= cur_node
.host_start
;
659 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
662 if (allow_zero_length_array_sections
)
663 cur_node
.tgt_offset
= 0;
666 gomp_mutex_unlock (&devicep
->lock
);
667 gomp_fatal ("Pointer target of array section wasn't mapped");
672 cur_node
.host_start
-= n
->host_start
;
674 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
675 /* At this point tgt_offset is target address of the
676 array section. Now subtract bias to get what we want
677 to initialize the pointer with. */
678 cur_node
.tgt_offset
-= bias
;
680 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
681 (void *) &cur_node
.tgt_offset
, sizeof (void *),
686 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
687 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
688 size_t first
, size_t i
, void **hostaddrs
,
689 size_t *sizes
, void *kinds
,
690 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
692 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
693 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
694 struct splay_tree_key_s cur_node
;
697 const bool short_mapkind
= true;
698 const int typemask
= short_mapkind
? 0xff : 0x7;
700 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
701 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
702 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
703 kind
= get_kind (short_mapkind
, kinds
, i
);
704 implicit
= get_implicit (short_mapkind
, kinds
, i
);
707 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
709 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
710 kind
& typemask
, false, implicit
, cbuf
,
716 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
718 cur_node
.host_start
--;
719 n2
= splay_tree_lookup (mem_map
, &cur_node
);
720 cur_node
.host_start
++;
723 && n2
->host_start
- n
->host_start
724 == n2
->tgt_offset
- n
->tgt_offset
)
726 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
727 kind
& typemask
, false, implicit
, cbuf
,
733 n2
= splay_tree_lookup (mem_map
, &cur_node
);
737 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
739 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
740 kind
& typemask
, false, implicit
, cbuf
,
745 gomp_mutex_unlock (&devicep
->lock
);
746 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747 "other mapped elements from the same structure weren't mapped "
748 "together with it", (void *) cur_node
.host_start
,
749 (void *) cur_node
.host_end
);
752 attribute_hidden
void
753 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
754 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
755 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
756 struct gomp_coalesce_buf
*cbufp
,
757 bool allow_zero_length_array_sections
)
759 struct splay_tree_key_s s
;
764 gomp_mutex_unlock (&devicep
->lock
);
765 gomp_fatal ("enclosing struct not mapped for attach");
768 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
769 /* We might have a pointer in a packed struct: however we cannot have more
770 than one such pointer in each pointer-sized portion of the struct, so
772 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
775 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
777 if (!n
->aux
->attach_count
)
779 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
781 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
782 n
->aux
->attach_count
[idx
]++;
785 gomp_mutex_unlock (&devicep
->lock
);
786 gomp_fatal ("attach count overflow");
789 if (n
->aux
->attach_count
[idx
] == 1)
791 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
793 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
797 if ((void *) target
== NULL
)
799 gomp_mutex_unlock (&devicep
->lock
);
800 gomp_fatal ("attempt to attach null pointer");
803 s
.host_start
= target
+ bias
;
804 s
.host_end
= s
.host_start
+ 1;
805 tn
= splay_tree_lookup (mem_map
, &s
);
809 if (allow_zero_length_array_sections
)
810 /* When allowing attachment to zero-length array sections, we
811 allow attaching to NULL pointers when the target region is not
816 gomp_mutex_unlock (&devicep
->lock
);
817 gomp_fatal ("pointer target not mapped for attach");
821 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
824 "%s: attaching host %p, target %p (struct base %p) to %p\n",
825 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
826 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
828 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
829 sizeof (void *), true, cbufp
);
832 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
833 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
836 attribute_hidden
void
837 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
838 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
839 uintptr_t detach_from
, bool finalize
,
840 struct gomp_coalesce_buf
*cbufp
)
846 gomp_mutex_unlock (&devicep
->lock
);
847 gomp_fatal ("enclosing struct not mapped for detach");
850 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
852 if (!n
->aux
|| !n
->aux
->attach_count
)
854 gomp_mutex_unlock (&devicep
->lock
);
855 gomp_fatal ("no attachment counters for struct");
859 n
->aux
->attach_count
[idx
] = 1;
861 if (n
->aux
->attach_count
[idx
] == 0)
863 gomp_mutex_unlock (&devicep
->lock
);
864 gomp_fatal ("attach count underflow");
867 n
->aux
->attach_count
[idx
]--;
869 if (n
->aux
->attach_count
[idx
] == 0)
871 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
873 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
876 "%s: detaching host %p, target %p (struct base %p) to %p\n",
877 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
878 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
881 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
882 sizeof (void *), true, cbufp
);
885 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
886 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
889 attribute_hidden
uintptr_t
890 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
892 if (tgt
->list
[i
].key
!= NULL
)
893 return tgt
->list
[i
].key
->tgt
->tgt_start
894 + tgt
->list
[i
].key
->tgt_offset
895 + tgt
->list
[i
].offset
;
897 switch (tgt
->list
[i
].offset
)
900 return (uintptr_t) hostaddrs
[i
];
906 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
907 + tgt
->list
[i
+ 1].key
->tgt_offset
908 + tgt
->list
[i
+ 1].offset
909 + (uintptr_t) hostaddrs
[i
]
910 - (uintptr_t) hostaddrs
[i
+ 1];
913 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
917 static inline __attribute__((always_inline
)) struct target_mem_desc
*
918 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
919 struct goacc_asyncqueue
*aq
, size_t mapnum
,
920 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
921 void *kinds
, bool short_mapkind
,
922 htab_t
*refcount_set
,
923 enum gomp_map_vars_kind pragma_kind
)
925 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
926 bool has_firstprivate
= false;
927 bool has_always_ptrset
= false;
928 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
929 const int rshift
= short_mapkind
? 8 : 3;
930 const int typemask
= short_mapkind
? 0xff : 0x7;
931 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
932 struct splay_tree_key_s cur_node
;
933 struct target_mem_desc
*tgt
934 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
935 tgt
->list_count
= mapnum
;
936 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
937 tgt
->device_descr
= devicep
;
939 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
948 tgt_align
= sizeof (void *);
954 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
956 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
957 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
960 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
962 size_t align
= 4 * sizeof (void *);
964 tgt_size
= mapnum
* sizeof (void *);
966 cbuf
.use_cnt
= 1 + (mapnum
> 1);
967 cbuf
.chunks
[0].start
= 0;
968 cbuf
.chunks
[0].end
= tgt_size
;
971 gomp_mutex_lock (&devicep
->lock
);
972 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
974 gomp_mutex_unlock (&devicep
->lock
);
979 for (i
= 0; i
< mapnum
; i
++)
981 int kind
= get_kind (short_mapkind
, kinds
, i
);
982 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
983 if (hostaddrs
[i
] == NULL
984 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
986 tgt
->list
[i
].key
= NULL
;
987 tgt
->list
[i
].offset
= OFFSET_INLINED
;
990 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
991 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
993 tgt
->list
[i
].key
= NULL
;
996 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997 on a separate construct prior to using use_device_{addr,ptr}.
998 In OpenMP 5.0, map directives need to be ordered by the
999 middle-end before the use_device_* clauses. If
1000 !not_found_cnt, all mappings requested (if any) are already
1001 mapped, so use_device_{addr,ptr} can be resolved right away.
1002 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003 now but would succeed after performing the mappings in the
1004 following loop. We can't defer this always to the second
1005 loop, because it is not even invoked when !not_found_cnt
1006 after the first loop. */
1007 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1008 cur_node
.host_end
= cur_node
.host_start
;
1009 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1012 cur_node
.host_start
-= n
->host_start
;
1014 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1015 + cur_node
.host_start
);
1017 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1019 gomp_mutex_unlock (&devicep
->lock
);
1020 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1022 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1023 /* If not present, continue using the host address. */
1026 __builtin_unreachable ();
1027 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1030 tgt
->list
[i
].offset
= 0;
1033 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1035 size_t first
= i
+ 1;
1036 size_t last
= i
+ sizes
[i
];
1037 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1038 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1040 tgt
->list
[i
].key
= NULL
;
1041 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1042 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1045 size_t align
= (size_t) 1 << (kind
>> rshift
);
1046 if (tgt_align
< align
)
1048 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1049 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1050 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1051 not_found_cnt
+= last
- i
;
1052 for (i
= first
; i
<= last
; i
++)
1054 tgt
->list
[i
].key
= NULL
;
1056 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1058 gomp_coalesce_buf_add (&cbuf
,
1059 tgt_size
- cur_node
.host_end
1060 + (uintptr_t) hostaddrs
[i
],
1066 for (i
= first
; i
<= last
; i
++)
1067 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1068 sizes
, kinds
, NULL
, refcount_set
);
1072 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1074 tgt
->list
[i
].key
= NULL
;
1075 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1076 has_firstprivate
= true;
1079 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1080 || ((kind
& typemask
)
1081 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1083 tgt
->list
[i
].key
= NULL
;
1084 has_firstprivate
= true;
1087 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1088 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1089 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1091 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1092 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1094 tgt
->list
[i
].key
= NULL
;
1096 size_t align
= (size_t) 1 << (kind
>> rshift
);
1097 if (tgt_align
< align
)
1099 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1101 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1102 cur_node
.host_end
- cur_node
.host_start
);
1103 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1104 has_firstprivate
= true;
1108 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1110 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1113 tgt
->list
[i
].key
= NULL
;
1114 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1119 n
= splay_tree_lookup (mem_map
, &cur_node
);
1120 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1122 int always_to_cnt
= 0;
1123 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1125 bool has_nullptr
= false;
1127 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1128 if (n
->tgt
->list
[j
].key
== n
)
1130 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1133 if (n
->tgt
->list_count
== 0)
1135 /* 'declare target'; assume has_nullptr; it could also be
1136 statically assigned pointer, but that it should be to
1137 the equivalent variable on the host. */
1138 assert (n
->refcount
== REFCOUNT_INFINITY
);
1142 assert (j
< n
->tgt
->list_count
);
1143 /* Re-map the data if there is an 'always' modifier or if it a
1144 null pointer was there and non a nonnull has been found; that
1145 permits transparent re-mapping for Fortran array descriptors
1146 which were previously mapped unallocated. */
1147 for (j
= i
+ 1; j
< mapnum
; j
++)
1149 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1150 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1152 || !GOMP_MAP_POINTER_P (ptr_kind
)
1153 || *(void **) hostaddrs
[j
] == NULL
))
1155 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1156 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1157 > cur_node
.host_end
))
1161 has_always_ptrset
= true;
1166 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1167 kind
& typemask
, always_to_cnt
> 0, implicit
,
1168 NULL
, refcount_set
);
1173 tgt
->list
[i
].key
= NULL
;
1175 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1177 /* Not present, hence, skip entry - including its MAP_POINTER,
1179 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1181 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1182 == GOMP_MAP_POINTER
))
1185 tgt
->list
[i
].key
= NULL
;
1186 tgt
->list
[i
].offset
= 0;
1190 size_t align
= (size_t) 1 << (kind
>> rshift
);
1192 if (tgt_align
< align
)
1194 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1196 && gomp_to_device_kind_p (kind
& typemask
))
1197 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1198 cur_node
.host_end
- cur_node
.host_start
);
1199 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1200 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1204 for (j
= i
+ 1; j
< mapnum
; j
++)
1205 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1206 kinds
, j
)) & typemask
))
1207 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1209 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1210 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1211 > cur_node
.host_end
))
1215 tgt
->list
[j
].key
= NULL
;
1226 gomp_mutex_unlock (&devicep
->lock
);
1227 gomp_fatal ("unexpected aggregation");
1229 tgt
->to_free
= devaddrs
[0];
1230 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1231 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1233 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1235 /* Allocate tgt_align aligned tgt_size block of memory. */
1236 /* FIXME: Perhaps change interface to allocate properly aligned
1238 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1239 tgt_size
+ tgt_align
- 1);
1242 gomp_mutex_unlock (&devicep
->lock
);
1243 gomp_fatal ("device memory allocation fail");
1246 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1247 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1248 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1250 if (cbuf
.use_cnt
== 1)
1252 if (cbuf
.chunk_cnt
> 0)
1255 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1265 tgt
->to_free
= NULL
;
1271 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1272 tgt_size
= mapnum
* sizeof (void *);
1275 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1278 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1279 splay_tree_node array
= tgt
->array
;
1280 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1281 uintptr_t field_tgt_base
= 0;
1282 splay_tree_key field_tgt_structelem_first
= NULL
;
1284 for (i
= 0; i
< mapnum
; i
++)
1285 if (has_always_ptrset
1287 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1288 == GOMP_MAP_TO_PSET
)
1290 splay_tree_key k
= tgt
->list
[i
].key
;
1291 bool has_nullptr
= false;
1293 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1294 if (k
->tgt
->list
[j
].key
== k
)
1296 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1299 if (k
->tgt
->list_count
== 0)
1302 assert (j
< k
->tgt
->list_count
);
1304 tgt
->list
[i
].has_null_ptr_assoc
= false;
1305 for (j
= i
+ 1; j
< mapnum
; j
++)
1307 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1308 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1310 || !GOMP_MAP_POINTER_P (ptr_kind
)
1311 || *(void **) hostaddrs
[j
] == NULL
))
1313 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1314 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1319 if (*(void **) hostaddrs
[j
] == NULL
)
1320 tgt
->list
[i
].has_null_ptr_assoc
= true;
1321 tgt
->list
[j
].key
= k
;
1322 tgt
->list
[j
].copy_from
= false;
1323 tgt
->list
[j
].always_copy_from
= false;
1324 tgt
->list
[j
].is_attach
= false;
1325 gomp_increment_refcount (k
, refcount_set
);
1326 gomp_map_pointer (k
->tgt
, aq
,
1327 (uintptr_t) *(void **) hostaddrs
[j
],
1328 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1330 sizes
[j
], cbufp
, false);
1335 else if (tgt
->list
[i
].key
== NULL
)
1337 int kind
= get_kind (short_mapkind
, kinds
, i
);
1338 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1339 if (hostaddrs
[i
] == NULL
)
1341 switch (kind
& typemask
)
1343 size_t align
, len
, first
, last
;
1345 case GOMP_MAP_FIRSTPRIVATE
:
1346 align
= (size_t) 1 << (kind
>> rshift
);
1347 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1348 tgt
->list
[i
].offset
= tgt_size
;
1350 gomp_copy_host2dev (devicep
, aq
,
1351 (void *) (tgt
->tgt_start
+ tgt_size
),
1352 (void *) hostaddrs
[i
], len
, false, cbufp
);
1355 case GOMP_MAP_FIRSTPRIVATE_INT
:
1356 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1358 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1359 /* The OpenACC 'host_data' construct only allows 'use_device'
1360 "mapping" clauses, so in the first loop, 'not_found_cnt'
1361 must always have been zero, so all OpenACC 'use_device'
1362 clauses have already been handled. (We can only easily test
1363 'use_device' with 'if_present' clause here.) */
1364 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1365 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1366 code conceptually simple, similar to the first loop. */
1367 case GOMP_MAP_USE_DEVICE_PTR
:
1368 if (tgt
->list
[i
].offset
== 0)
1370 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1371 cur_node
.host_end
= cur_node
.host_start
;
1372 n
= gomp_map_lookup (mem_map
, &cur_node
);
1375 cur_node
.host_start
-= n
->host_start
;
1377 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1378 + cur_node
.host_start
);
1380 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1382 gomp_mutex_unlock (&devicep
->lock
);
1383 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1385 else if ((kind
& typemask
)
1386 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1387 /* If not present, continue using the host address. */
1390 __builtin_unreachable ();
1391 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1394 case GOMP_MAP_STRUCT
:
1396 last
= i
+ sizes
[i
];
1397 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1398 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1400 if (tgt
->list
[first
].key
!= NULL
)
1402 n
= splay_tree_lookup (mem_map
, &cur_node
);
1405 size_t align
= (size_t) 1 << (kind
>> rshift
);
1406 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1407 - (uintptr_t) hostaddrs
[i
];
1408 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1409 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1410 - (uintptr_t) hostaddrs
[i
];
1411 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1412 field_tgt_offset
= tgt_size
;
1413 field_tgt_clear
= last
;
1414 field_tgt_structelem_first
= NULL
;
1415 tgt_size
+= cur_node
.host_end
1416 - (uintptr_t) hostaddrs
[first
];
1419 for (i
= first
; i
<= last
; i
++)
1420 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1421 sizes
, kinds
, cbufp
, refcount_set
);
1424 case GOMP_MAP_ALWAYS_POINTER
:
1425 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1426 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1427 n
= splay_tree_lookup (mem_map
, &cur_node
);
1429 || n
->host_start
> cur_node
.host_start
1430 || n
->host_end
< cur_node
.host_end
)
1432 gomp_mutex_unlock (&devicep
->lock
);
1433 gomp_fatal ("always pointer not mapped");
1435 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1436 != GOMP_MAP_ALWAYS_POINTER
)
1437 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1438 if (cur_node
.tgt_offset
)
1439 cur_node
.tgt_offset
-= sizes
[i
];
1440 gomp_copy_host2dev (devicep
, aq
,
1441 (void *) (n
->tgt
->tgt_start
1443 + cur_node
.host_start
1445 (void *) &cur_node
.tgt_offset
,
1446 sizeof (void *), true, cbufp
);
1447 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1448 + cur_node
.host_start
- n
->host_start
;
1450 case GOMP_MAP_IF_PRESENT
:
1451 /* Not present - otherwise handled above. Skip over its
1452 MAP_POINTER as well. */
1454 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1455 == GOMP_MAP_POINTER
))
1458 case GOMP_MAP_ATTACH
:
1459 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1461 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1462 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1463 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1466 tgt
->list
[i
].key
= n
;
1467 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1468 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1469 tgt
->list
[i
].copy_from
= false;
1470 tgt
->list
[i
].always_copy_from
= false;
1471 tgt
->list
[i
].is_attach
= true;
1472 /* OpenACC 'attach'/'detach' doesn't affect
1473 structured/dynamic reference counts ('n->refcount',
1474 'n->dynamic_refcount'). */
1477 = ((kind
& typemask
)
1478 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1479 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1480 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1483 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1485 gomp_mutex_unlock (&devicep
->lock
);
1486 gomp_fatal ("outer struct not mapped for attach");
1493 splay_tree_key k
= &array
->key
;
1494 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1495 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1496 k
->host_end
= k
->host_start
+ sizes
[i
];
1498 k
->host_end
= k
->host_start
+ sizeof (void *);
1499 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1500 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1501 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1502 kind
& typemask
, false, implicit
, cbufp
,
1507 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1509 /* Replace target address of the pointer with target address
1510 of mapped object in the splay tree. */
1511 splay_tree_remove (mem_map
, n
);
1513 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1514 k
->aux
->link_key
= n
;
1516 size_t align
= (size_t) 1 << (kind
>> rshift
);
1517 tgt
->list
[i
].key
= k
;
1520 k
->dynamic_refcount
= 0;
1521 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1523 k
->tgt_offset
= k
->host_start
- field_tgt_base
1527 k
->refcount
= REFCOUNT_STRUCTELEM
;
1528 if (field_tgt_structelem_first
== NULL
)
1530 /* Set to first structure element of sequence. */
1531 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1532 field_tgt_structelem_first
= k
;
1535 /* Point to refcount of leading element, but do not
1537 k
->structelem_refcount_ptr
1538 = &field_tgt_structelem_first
->structelem_refcount
;
1540 if (i
== field_tgt_clear
)
1542 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1543 field_tgt_structelem_first
= NULL
;
1546 if (i
== field_tgt_clear
)
1547 field_tgt_clear
= FIELD_TGT_EMPTY
;
1551 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1552 k
->tgt_offset
= tgt_size
;
1553 tgt_size
+= k
->host_end
- k
->host_start
;
1555 /* First increment, from 0 to 1. gomp_increment_refcount
1556 encapsulates the different increment cases, so use this
1557 instead of directly setting 1 during initialization. */
1558 gomp_increment_refcount (k
, refcount_set
);
1560 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1561 tgt
->list
[i
].always_copy_from
1562 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1563 tgt
->list
[i
].is_attach
= false;
1564 tgt
->list
[i
].offset
= 0;
1565 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1568 array
->right
= NULL
;
1569 splay_tree_insert (mem_map
, array
);
1570 switch (kind
& typemask
)
1572 case GOMP_MAP_ALLOC
:
1574 case GOMP_MAP_FORCE_ALLOC
:
1575 case GOMP_MAP_FORCE_FROM
:
1576 case GOMP_MAP_ALWAYS_FROM
:
1579 case GOMP_MAP_TOFROM
:
1580 case GOMP_MAP_FORCE_TO
:
1581 case GOMP_MAP_FORCE_TOFROM
:
1582 case GOMP_MAP_ALWAYS_TO
:
1583 case GOMP_MAP_ALWAYS_TOFROM
:
1584 gomp_copy_host2dev (devicep
, aq
,
1585 (void *) (tgt
->tgt_start
1587 (void *) k
->host_start
,
1588 k
->host_end
- k
->host_start
,
1591 case GOMP_MAP_POINTER
:
1592 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1594 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1595 k
->tgt_offset
, sizes
[i
], cbufp
,
1597 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1599 case GOMP_MAP_TO_PSET
:
1600 gomp_copy_host2dev (devicep
, aq
,
1601 (void *) (tgt
->tgt_start
1603 (void *) k
->host_start
,
1604 k
->host_end
- k
->host_start
,
1606 tgt
->list
[i
].has_null_ptr_assoc
= false;
1608 for (j
= i
+ 1; j
< mapnum
; j
++)
1610 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1612 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1613 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1615 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1616 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1621 tgt
->list
[j
].key
= k
;
1622 tgt
->list
[j
].copy_from
= false;
1623 tgt
->list
[j
].always_copy_from
= false;
1624 tgt
->list
[j
].is_attach
= false;
1625 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1626 /* For OpenMP, the use of refcount_sets causes
1627 errors if we set k->refcount = 1 above but also
1628 increment it again here, for decrementing will
1629 not properly match, since we decrement only once
1630 for each key's refcount. Therefore avoid this
1631 increment for OpenMP constructs. */
1633 gomp_increment_refcount (k
, refcount_set
);
1634 gomp_map_pointer (tgt
, aq
,
1635 (uintptr_t) *(void **) hostaddrs
[j
],
1637 + ((uintptr_t) hostaddrs
[j
]
1639 sizes
[j
], cbufp
, false);
1644 case GOMP_MAP_FORCE_PRESENT
:
1646 /* We already looked up the memory region above and it
1648 size_t size
= k
->host_end
- k
->host_start
;
1649 gomp_mutex_unlock (&devicep
->lock
);
1650 #ifdef HAVE_INTTYPES_H
1651 gomp_fatal ("present clause: !acc_is_present (%p, "
1652 "%"PRIu64
" (0x%"PRIx64
"))",
1653 (void *) k
->host_start
,
1654 (uint64_t) size
, (uint64_t) size
);
1656 gomp_fatal ("present clause: !acc_is_present (%p, "
1657 "%lu (0x%lx))", (void *) k
->host_start
,
1658 (unsigned long) size
, (unsigned long) size
);
1662 case GOMP_MAP_FORCE_DEVICEPTR
:
1663 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1664 gomp_copy_host2dev (devicep
, aq
,
1665 (void *) (tgt
->tgt_start
1667 (void *) k
->host_start
,
1668 sizeof (void *), false, cbufp
);
1671 gomp_mutex_unlock (&devicep
->lock
);
1672 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1676 if (k
->aux
&& k
->aux
->link_key
)
1678 /* Set link pointer on target to the device address of the
1680 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1681 /* We intentionally do not use coalescing here, as it's not
1682 data allocated by the current call to this function. */
1683 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1684 &tgt_addr
, sizeof (void *), true, NULL
);
1691 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1693 for (i
= 0; i
< mapnum
; i
++)
1695 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1696 gomp_copy_host2dev (devicep
, aq
,
1697 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1698 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1705 /* See 'gomp_coalesce_buf_add'. */
1709 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1710 gomp_copy_host2dev (devicep
, aq
,
1711 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1712 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1713 - cbuf
.chunks
[0].start
),
1714 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1721 /* If the variable from "omp target enter data" map-list was already mapped,
1722 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1724 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1730 gomp_mutex_unlock (&devicep
->lock
);
1734 static struct target_mem_desc
*
1735 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1736 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1737 bool short_mapkind
, htab_t
*refcount_set
,
1738 enum gomp_map_vars_kind pragma_kind
)
1740 /* This management of a local refcount_set is for convenience of callers
1741 who do not share a refcount_set over multiple map/unmap uses. */
1742 htab_t local_refcount_set
= NULL
;
1743 if (refcount_set
== NULL
)
1745 local_refcount_set
= htab_create (mapnum
);
1746 refcount_set
= &local_refcount_set
;
1749 struct target_mem_desc
*tgt
;
1750 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1751 sizes
, kinds
, short_mapkind
, refcount_set
,
1753 if (local_refcount_set
)
1754 htab_free (local_refcount_set
);
1759 attribute_hidden
struct target_mem_desc
*
1760 goacc_map_vars (struct gomp_device_descr
*devicep
,
1761 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1762 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1763 void *kinds
, bool short_mapkind
,
1764 enum gomp_map_vars_kind pragma_kind
)
1766 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1767 sizes
, kinds
, short_mapkind
, NULL
,
1768 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1772 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1774 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1776 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1783 gomp_unref_tgt (void *ptr
)
1785 bool is_tgt_unmapped
= false;
1787 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1789 if (tgt
->refcount
> 1)
1793 gomp_unmap_tgt (tgt
);
1794 is_tgt_unmapped
= true;
1797 return is_tgt_unmapped
;
1801 gomp_unref_tgt_void (void *ptr
)
1803 (void) gomp_unref_tgt (ptr
);
1807 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1809 splay_tree_remove (sp
, k
);
1812 if (k
->aux
->link_key
)
1813 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1814 if (k
->aux
->attach_count
)
1815 free (k
->aux
->attach_count
);
1821 static inline __attribute__((always_inline
)) bool
1822 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1823 struct goacc_asyncqueue
*aq
)
1825 bool is_tgt_unmapped
= false;
1827 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1829 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1830 /* Infer the splay_tree_key of the first structelem key using the
1831 pointer to the first structleme_refcount. */
1832 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1833 - offsetof (struct splay_tree_key_s
,
1834 structelem_refcount
));
1835 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1837 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838 with the splay_tree_keys embedded inside. */
1839 splay_tree_node node
=
1840 (splay_tree_node
) ((char *) k
1841 - offsetof (struct splay_tree_node_s
, key
));
1844 /* Starting from the _FIRST key, and continue for all following
1846 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1847 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1854 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1857 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1860 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1861 return is_tgt_unmapped
;
1864 attribute_hidden
bool
1865 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1867 return gomp_remove_var_internal (devicep
, k
, NULL
);
1870 /* Remove a variable asynchronously. This actually removes the variable
1871 mapping immediately, but retains the linked target_mem_desc until the
1872 asynchronous operation has completed (as it may still refer to target
1873 memory). The device lock must be held before entry, and remains locked on
1876 attribute_hidden
void
1877 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1878 struct goacc_asyncqueue
*aq
)
1880 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1883 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1884 variables back from device to host: if it is false, it is assumed that this
1885 has been done already. */
1887 static inline __attribute__((always_inline
)) void
1888 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1889 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1891 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1893 if (tgt
->list_count
== 0)
1899 gomp_mutex_lock (&devicep
->lock
);
1900 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1902 gomp_mutex_unlock (&devicep
->lock
);
1910 /* We must perform detachments before any copies back to the host. */
1911 for (i
= 0; i
< tgt
->list_count
; i
++)
1913 splay_tree_key k
= tgt
->list
[i
].key
;
1915 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1916 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1917 + tgt
->list
[i
].offset
,
1921 for (i
= 0; i
< tgt
->list_count
; i
++)
1923 splay_tree_key k
= tgt
->list
[i
].key
;
1927 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928 counts ('n->refcount', 'n->dynamic_refcount'). */
1929 if (tgt
->list
[i
].is_attach
)
1932 bool do_copy
, do_remove
;
1933 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1935 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1936 || tgt
->list
[i
].always_copy_from
)
1937 gomp_copy_dev2host (devicep
, aq
,
1938 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1939 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1940 + tgt
->list
[i
].offset
),
1941 tgt
->list
[i
].length
);
1944 struct target_mem_desc
*k_tgt
= k
->tgt
;
1945 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1946 /* It would be bad if TGT got unmapped while we're still iterating
1947 over its LIST_COUNT, and also expect to use it in the following
1949 assert (!is_tgt_unmapped
1955 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1958 gomp_unref_tgt ((void *) tgt
);
1960 gomp_mutex_unlock (&devicep
->lock
);
1964 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1965 htab_t
*refcount_set
)
1967 /* This management of a local refcount_set is for convenience of callers
1968 who do not share a refcount_set over multiple map/unmap uses. */
1969 htab_t local_refcount_set
= NULL
;
1970 if (refcount_set
== NULL
)
1972 local_refcount_set
= htab_create (tgt
->list_count
);
1973 refcount_set
= &local_refcount_set
;
1976 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
1978 if (local_refcount_set
)
1979 htab_free (local_refcount_set
);
1982 attribute_hidden
void
1983 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1984 struct goacc_asyncqueue
*aq
)
1986 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
1990 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1991 size_t *sizes
, void *kinds
, bool short_mapkind
)
1994 struct splay_tree_key_s cur_node
;
1995 const int typemask
= short_mapkind
? 0xff : 0x7;
2003 gomp_mutex_lock (&devicep
->lock
);
2004 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2006 gomp_mutex_unlock (&devicep
->lock
);
2010 for (i
= 0; i
< mapnum
; i
++)
2013 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2014 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2015 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2018 int kind
= get_kind (short_mapkind
, kinds
, i
);
2019 if (n
->host_start
> cur_node
.host_start
2020 || n
->host_end
< cur_node
.host_end
)
2022 gomp_mutex_unlock (&devicep
->lock
);
2023 gomp_fatal ("Trying to update [%p..%p) object when "
2024 "only [%p..%p) is mapped",
2025 (void *) cur_node
.host_start
,
2026 (void *) cur_node
.host_end
,
2027 (void *) n
->host_start
,
2028 (void *) n
->host_end
);
2031 if (n
->aux
&& n
->aux
->attach_count
)
2033 uintptr_t addr
= cur_node
.host_start
;
2034 while (addr
< cur_node
.host_end
)
2036 /* We have to be careful not to overwrite still attached
2037 pointers during host<->device updates. */
2038 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2039 if (n
->aux
->attach_count
[i
] == 0)
2041 void *devaddr
= (void *) (n
->tgt
->tgt_start
2043 + addr
- n
->host_start
);
2044 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2045 gomp_copy_host2dev (devicep
, NULL
,
2046 devaddr
, (void *) addr
,
2047 sizeof (void *), false, NULL
);
2048 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2049 gomp_copy_dev2host (devicep
, NULL
,
2050 (void *) addr
, devaddr
,
2053 addr
+= sizeof (void *);
2058 void *hostaddr
= (void *) cur_node
.host_start
;
2059 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2060 + cur_node
.host_start
2062 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2064 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2065 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2067 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2068 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2072 gomp_mutex_unlock (&devicep
->lock
);
2075 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076 And insert to splay tree the mapping between addresses from HOST_TABLE and
2077 from loaded target image. We rely in the host and device compiler
2078 emitting variable and functions in the same order. */
2081 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2082 const void *host_table
, const void *target_data
,
2083 bool is_register_lock
)
2085 void **host_func_table
= ((void ***) host_table
)[0];
2086 void **host_funcs_end
= ((void ***) host_table
)[1];
2087 void **host_var_table
= ((void ***) host_table
)[2];
2088 void **host_vars_end
= ((void ***) host_table
)[3];
2090 /* The func table contains only addresses, the var table contains addresses
2091 and corresponding sizes. */
2092 int num_funcs
= host_funcs_end
- host_func_table
;
2093 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2095 /* Others currently is only 'device_num' */
2098 /* Load image to device and get target addresses for the image. */
2099 struct addr_pair
*target_table
= NULL
;
2100 int i
, num_target_entries
;
2103 = devicep
->load_image_func (devicep
->target_id
, version
,
2104 target_data
, &target_table
);
2106 if (num_target_entries
!= num_funcs
+ num_vars
2107 /* Others (device_num) are included as trailing entries in pair list. */
2108 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
2110 gomp_mutex_unlock (&devicep
->lock
);
2111 if (is_register_lock
)
2112 gomp_mutex_unlock (®ister_lock
);
2113 gomp_fatal ("Cannot map target functions or variables"
2114 " (expected %u, have %u)", num_funcs
+ num_vars
,
2115 num_target_entries
);
2118 /* Insert host-target address mapping into splay tree. */
2119 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2120 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2121 tgt
->refcount
= REFCOUNT_INFINITY
;
2124 tgt
->to_free
= NULL
;
2126 tgt
->list_count
= 0;
2127 tgt
->device_descr
= devicep
;
2128 splay_tree_node array
= tgt
->array
;
2130 for (i
= 0; i
< num_funcs
; i
++)
2132 splay_tree_key k
= &array
->key
;
2133 k
->host_start
= (uintptr_t) host_func_table
[i
];
2134 k
->host_end
= k
->host_start
+ 1;
2136 k
->tgt_offset
= target_table
[i
].start
;
2137 k
->refcount
= REFCOUNT_INFINITY
;
2138 k
->dynamic_refcount
= 0;
2141 array
->right
= NULL
;
2142 splay_tree_insert (&devicep
->mem_map
, array
);
2146 /* Most significant bit of the size in host and target tables marks
2147 "omp declare target link" variables. */
2148 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2149 const uintptr_t size_mask
= ~link_bit
;
2151 for (i
= 0; i
< num_vars
; i
++)
2153 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2154 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2155 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2157 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2159 gomp_mutex_unlock (&devicep
->lock
);
2160 if (is_register_lock
)
2161 gomp_mutex_unlock (®ister_lock
);
2162 gomp_fatal ("Cannot map target variables (size mismatch)");
2165 splay_tree_key k
= &array
->key
;
2166 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2168 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2170 k
->tgt_offset
= target_var
->start
;
2171 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2172 k
->dynamic_refcount
= 0;
2175 array
->right
= NULL
;
2176 splay_tree_insert (&devicep
->mem_map
, array
);
2180 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181 where plugin does not return this entry. */
2182 if (num_funcs
+ num_vars
< num_target_entries
)
2184 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2185 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186 was found in this image. */
2187 if (device_num_var
->start
!= 0)
2189 /* The index of the devicep within devices[] is regarded as its
2190 'device number', which is different from the per-device type
2191 devicep->target_id. */
2192 int device_num_val
= (int) (devicep
- &devices
[0]);
2193 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2195 gomp_mutex_unlock (&devicep
->lock
);
2196 if (is_register_lock
)
2197 gomp_mutex_unlock (®ister_lock
);
2198 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2202 /* Copy device_num value to place on device memory, hereby actually
2203 designating its device number into effect. */
2204 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2205 &device_num_val
, sizeof (int), false, NULL
);
2209 free (target_table
);
2212 /* Unload the mappings described by target_data from device DEVICE_P.
2213 The device must be locked. */
2216 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2218 const void *host_table
, const void *target_data
)
2220 void **host_func_table
= ((void ***) host_table
)[0];
2221 void **host_funcs_end
= ((void ***) host_table
)[1];
2222 void **host_var_table
= ((void ***) host_table
)[2];
2223 void **host_vars_end
= ((void ***) host_table
)[3];
2225 /* The func table contains only addresses, the var table contains addresses
2226 and corresponding sizes. */
2227 int num_funcs
= host_funcs_end
- host_func_table
;
2228 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2230 struct splay_tree_key_s k
;
2231 splay_tree_key node
= NULL
;
2233 /* Find mapping at start of node array */
2234 if (num_funcs
|| num_vars
)
2236 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2237 : (uintptr_t) host_var_table
[0]);
2238 k
.host_end
= k
.host_start
+ 1;
2239 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2242 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2244 gomp_mutex_unlock (&devicep
->lock
);
2245 gomp_fatal ("image unload fail");
2248 /* Remove mappings from splay tree. */
2250 for (i
= 0; i
< num_funcs
; i
++)
2252 k
.host_start
= (uintptr_t) host_func_table
[i
];
2253 k
.host_end
= k
.host_start
+ 1;
2254 splay_tree_remove (&devicep
->mem_map
, &k
);
2257 /* Most significant bit of the size in host and target tables marks
2258 "omp declare target link" variables. */
2259 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2260 const uintptr_t size_mask
= ~link_bit
;
2261 bool is_tgt_unmapped
= false;
2263 for (i
= 0; i
< num_vars
; i
++)
2265 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2267 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2269 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2270 splay_tree_remove (&devicep
->mem_map
, &k
);
2273 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2274 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2278 if (node
&& !is_tgt_unmapped
)
2285 /* This function should be called from every offload image while loading.
2286 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287 the target, and TARGET_DATA needed by target plugin. */
2290 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2291 int target_type
, const void *target_data
)
2295 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2296 gomp_fatal ("Library too old for offload (version %u < %u)",
2297 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2299 gomp_mutex_lock (®ister_lock
);
2301 /* Load image to all initialized devices. */
2302 for (i
= 0; i
< num_devices
; i
++)
2304 struct gomp_device_descr
*devicep
= &devices
[i
];
2305 gomp_mutex_lock (&devicep
->lock
);
2306 if (devicep
->type
== target_type
2307 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2308 gomp_load_image_to_device (devicep
, version
,
2309 host_table
, target_data
, true);
2310 gomp_mutex_unlock (&devicep
->lock
);
2313 /* Insert image to array of pending images. */
2315 = gomp_realloc_unlock (offload_images
,
2316 (num_offload_images
+ 1)
2317 * sizeof (struct offload_image_descr
));
2318 offload_images
[num_offload_images
].version
= version
;
2319 offload_images
[num_offload_images
].type
= target_type
;
2320 offload_images
[num_offload_images
].host_table
= host_table
;
2321 offload_images
[num_offload_images
].target_data
= target_data
;
2323 num_offload_images
++;
2324 gomp_mutex_unlock (®ister_lock
);
2328 GOMP_offload_register (const void *host_table
, int target_type
,
2329 const void *target_data
)
2331 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2334 /* This function should be called from every offload image while unloading.
2335 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336 the target, and TARGET_DATA needed by target plugin. */
2339 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2340 int target_type
, const void *target_data
)
2344 gomp_mutex_lock (®ister_lock
);
2346 /* Unload image from all initialized devices. */
2347 for (i
= 0; i
< num_devices
; i
++)
2349 struct gomp_device_descr
*devicep
= &devices
[i
];
2350 gomp_mutex_lock (&devicep
->lock
);
2351 if (devicep
->type
== target_type
2352 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2353 gomp_unload_image_from_device (devicep
, version
,
2354 host_table
, target_data
);
2355 gomp_mutex_unlock (&devicep
->lock
);
2358 /* Remove image from array of pending images. */
2359 for (i
= 0; i
< num_offload_images
; i
++)
2360 if (offload_images
[i
].target_data
== target_data
)
2362 offload_images
[i
] = offload_images
[--num_offload_images
];
2366 gomp_mutex_unlock (®ister_lock
);
2370 GOMP_offload_unregister (const void *host_table
, int target_type
,
2371 const void *target_data
)
2373 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2376 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2377 must be locked on entry, and remains locked on return. */
2379 attribute_hidden
void
2380 gomp_init_device (struct gomp_device_descr
*devicep
)
2383 if (!devicep
->init_device_func (devicep
->target_id
))
2385 gomp_mutex_unlock (&devicep
->lock
);
2386 gomp_fatal ("device initialization failed");
2389 /* Load to device all images registered by the moment. */
2390 for (i
= 0; i
< num_offload_images
; i
++)
2392 struct offload_image_descr
*image
= &offload_images
[i
];
2393 if (image
->type
== devicep
->type
)
2394 gomp_load_image_to_device (devicep
, image
->version
,
2395 image
->host_table
, image
->target_data
,
2399 /* Initialize OpenACC asynchronous queues. */
2400 goacc_init_asyncqueues (devicep
);
2402 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2405 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2406 must be locked on entry, and remains locked on return. */
2408 attribute_hidden
bool
2409 gomp_fini_device (struct gomp_device_descr
*devicep
)
2411 bool ret
= goacc_fini_asyncqueues (devicep
);
2412 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2413 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2417 attribute_hidden
void
2418 gomp_unload_device (struct gomp_device_descr
*devicep
)
2420 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2424 /* Unload from device all images registered at the moment. */
2425 for (i
= 0; i
< num_offload_images
; i
++)
2427 struct offload_image_descr
*image
= &offload_images
[i
];
2428 if (image
->type
== devicep
->type
)
2429 gomp_unload_image_from_device (devicep
, image
->version
,
2431 image
->target_data
);
2436 /* Host fallback for GOMP_target{,_ext} routines. */
2439 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2440 struct gomp_device_descr
*devicep
, void **args
)
2442 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2444 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2446 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447 "be used for offloading");
2450 memset (thr
, '\0', sizeof (*thr
));
2451 if (gomp_places_list
)
2453 thr
->place
= old_thr
.place
;
2454 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2459 intptr_t id
= (intptr_t) *args
++, val
;
2460 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2461 val
= (intptr_t) *args
++;
2463 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2464 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2466 id
&= GOMP_TARGET_ARG_ID_MASK
;
2467 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2469 val
= val
> INT_MAX
? INT_MAX
: val
;
2471 gomp_icv (true)->thread_limit_var
= val
;
2476 gomp_free_thread (thr
);
2480 /* Calculate alignment and size requirements of a private copy of data shared
2481 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2484 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2485 unsigned short *kinds
, size_t *tgt_align
,
2489 for (i
= 0; i
< mapnum
; i
++)
2490 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2492 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2493 if (*tgt_align
< align
)
2495 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2496 *tgt_size
+= sizes
[i
];
2500 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2503 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2504 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2507 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2509 tgt
+= tgt_align
- al
;
2512 for (i
= 0; i
< mapnum
; i
++)
2513 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2515 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2516 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2517 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2518 hostaddrs
[i
] = tgt
+ tgt_size
;
2519 tgt_size
= tgt_size
+ sizes
[i
];
2523 /* Helper function of GOMP_target{,_ext} routines. */
2526 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2527 void (*host_fn
) (void *))
2529 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2530 return (void *) host_fn
;
2533 gomp_mutex_lock (&devicep
->lock
);
2534 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2536 gomp_mutex_unlock (&devicep
->lock
);
2540 struct splay_tree_key_s k
;
2541 k
.host_start
= (uintptr_t) host_fn
;
2542 k
.host_end
= k
.host_start
+ 1;
2543 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2544 gomp_mutex_unlock (&devicep
->lock
);
2548 return (void *) tgt_fn
->tgt_offset
;
2552 /* Called when encountering a target directive. If DEVICE
2553 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2554 GOMP_DEVICE_HOST_FALLBACK (or any value
2555 larger than last available hw device), use host fallback.
2556 FN is address of host code, UNUSED is part of the current ABI, but
2557 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2558 with MAPNUM entries, with addresses of the host objects,
2559 sizes of the host objects (resp. for pointer kind pointer bias
2560 and assumed sizeof (void *) size) and kinds. */
2563 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2564 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2565 unsigned char *kinds
)
2567 struct gomp_device_descr
*devicep
= resolve_device (device
);
2571 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2572 /* All shared memory devices should use the GOMP_target_ext function. */
2573 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2574 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2575 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2577 htab_t refcount_set
= htab_create (mapnum
);
2578 struct target_mem_desc
*tgt_vars
2579 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2580 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2581 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2583 htab_clear (refcount_set
);
2584 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2585 htab_free (refcount_set
);
2588 static inline unsigned int
2589 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2591 /* If we cannot run asynchronously, simply ignore nowait. */
2592 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2593 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2598 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599 and several arguments have been added:
2600 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601 DEPEND is array of dependencies, see GOMP_task for details.
2603 ARGS is a pointer to an array consisting of a variable number of both
2604 device-independent and device-specific arguments, which can take one two
2605 elements where the first specifies for which device it is intended, the type
2606 and optionally also the value. If the value is not present in the first
2607 one, the whole second element the actual value. The last element of the
2608 array is a single NULL. Among the device independent can be for example
2609 NUM_TEAMS and THREAD_LIMIT.
2611 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612 that value, or 1 if teams construct is not present, or 0, if
2613 teams construct does not have num_teams clause and so the choice is
2614 implementation defined, and -1 if it can't be determined on the host
2615 what value will GOMP_teams have on the device.
2616 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617 body with that value, or 0, if teams construct does not have thread_limit
2618 clause or the teams construct is not present, or -1 if it can't be
2619 determined on the host what value will GOMP_teams have on the device. */
2622 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2623 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2624 unsigned int flags
, void **depend
, void **args
)
2626 struct gomp_device_descr
*devicep
= resolve_device (device
);
2627 size_t tgt_align
= 0, tgt_size
= 0;
2628 bool fpc_done
= false;
2630 flags
= clear_unsupported_flags (devicep
, flags
);
2632 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2634 struct gomp_thread
*thr
= gomp_thread ();
2635 /* Create a team if we don't have any around, as nowait
2636 target tasks make sense to run asynchronously even when
2637 outside of any parallel. */
2638 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2640 struct gomp_team
*team
= gomp_new_team (1);
2641 struct gomp_task
*task
= thr
->task
;
2642 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2643 team
->prev_ts
= thr
->ts
;
2644 thr
->ts
.team
= team
;
2645 thr
->ts
.team_id
= 0;
2646 thr
->ts
.work_share
= &team
->work_shares
[0];
2647 thr
->ts
.last_work_share
= NULL
;
2648 #ifdef HAVE_SYNC_BUILTINS
2649 thr
->ts
.single_count
= 0;
2651 thr
->ts
.static_trip
= 0;
2652 thr
->task
= &team
->implicit_task
[0];
2653 gomp_init_task (thr
->task
, NULL
, icv
);
2659 thr
->task
= &team
->implicit_task
[0];
2662 pthread_setspecific (gomp_thread_destructor
, thr
);
2665 && !thr
->task
->final_task
)
2667 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2668 sizes
, kinds
, flags
, depend
, args
,
2669 GOMP_TARGET_TASK_BEFORE_MAP
);
2674 /* If there are depend clauses, but nowait is not present
2675 (or we are in a final task), block the parent task until the
2676 dependencies are resolved and then just continue with the rest
2677 of the function as if it is a merged task. */
2680 struct gomp_thread
*thr
= gomp_thread ();
2681 if (thr
->task
&& thr
->task
->depend_hash
)
2683 /* If we might need to wait, copy firstprivate now. */
2684 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2685 &tgt_align
, &tgt_size
);
2688 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2689 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2690 tgt_align
, tgt_size
);
2693 gomp_task_maybe_wait_for_dependencies (depend
);
2699 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2700 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2701 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2705 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2706 &tgt_align
, &tgt_size
);
2709 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2710 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2711 tgt_align
, tgt_size
);
2714 gomp_target_fallback (fn
, hostaddrs
, devicep
, args
);
2718 struct target_mem_desc
*tgt_vars
;
2719 htab_t refcount_set
= NULL
;
2721 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2725 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2726 &tgt_align
, &tgt_size
);
2729 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2730 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2731 tgt_align
, tgt_size
);
2738 refcount_set
= htab_create (mapnum
);
2739 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2740 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2742 devicep
->run_func (devicep
->target_id
, fn_addr
,
2743 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2747 htab_clear (refcount_set
);
2748 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2751 htab_free (refcount_set
);
2754 /* Host fallback for GOMP_target_data{,_ext} routines. */
2757 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2759 struct gomp_task_icv
*icv
= gomp_icv (false);
2761 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2763 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2764 "be used for offloading");
2766 if (icv
->target_data
)
2768 /* Even when doing a host fallback, if there are any active
2769 #pragma omp target data constructs, need to remember the
2770 new #pragma omp target data, otherwise GOMP_target_end_data
2771 would get out of sync. */
2772 struct target_mem_desc
*tgt
2773 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2774 NULL
, GOMP_MAP_VARS_DATA
);
2775 tgt
->prev
= icv
->target_data
;
2776 icv
->target_data
= tgt
;
2781 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2782 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2784 struct gomp_device_descr
*devicep
= resolve_device (device
);
2787 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2788 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2789 return gomp_target_data_fallback (devicep
);
2791 struct target_mem_desc
*tgt
2792 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2793 NULL
, GOMP_MAP_VARS_DATA
);
2794 struct gomp_task_icv
*icv
= gomp_icv (true);
2795 tgt
->prev
= icv
->target_data
;
2796 icv
->target_data
= tgt
;
2800 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2801 size_t *sizes
, unsigned short *kinds
)
2803 struct gomp_device_descr
*devicep
= resolve_device (device
);
2806 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2807 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2808 return gomp_target_data_fallback (devicep
);
2810 struct target_mem_desc
*tgt
2811 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2812 NULL
, GOMP_MAP_VARS_DATA
);
2813 struct gomp_task_icv
*icv
= gomp_icv (true);
2814 tgt
->prev
= icv
->target_data
;
2815 icv
->target_data
= tgt
;
2819 GOMP_target_end_data (void)
2821 struct gomp_task_icv
*icv
= gomp_icv (false);
2822 if (icv
->target_data
)
2824 struct target_mem_desc
*tgt
= icv
->target_data
;
2825 icv
->target_data
= tgt
->prev
;
2826 gomp_unmap_vars (tgt
, true, NULL
);
2831 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2832 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2834 struct gomp_device_descr
*devicep
= resolve_device (device
);
2837 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2838 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2841 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2845 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2846 size_t *sizes
, unsigned short *kinds
,
2847 unsigned int flags
, void **depend
)
2849 struct gomp_device_descr
*devicep
= resolve_device (device
);
2851 /* If there are depend clauses, but nowait is not present,
2852 block the parent task until the dependencies are resolved
2853 and then just continue with the rest of the function as if it
2854 is a merged task. Until we are able to schedule task during
2855 variable mapping or unmapping, ignore nowait if depend clauses
2859 struct gomp_thread
*thr
= gomp_thread ();
2860 if (thr
->task
&& thr
->task
->depend_hash
)
2862 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2864 && !thr
->task
->final_task
)
2866 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2867 mapnum
, hostaddrs
, sizes
, kinds
,
2868 flags
| GOMP_TARGET_FLAG_UPDATE
,
2869 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2874 struct gomp_team
*team
= thr
->ts
.team
;
2875 /* If parallel or taskgroup has been cancelled, don't start new
2877 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2879 if (gomp_team_barrier_cancelled (&team
->barrier
))
2881 if (thr
->task
->taskgroup
)
2883 if (thr
->task
->taskgroup
->cancelled
)
2885 if (thr
->task
->taskgroup
->workshare
2886 && thr
->task
->taskgroup
->prev
2887 && thr
->task
->taskgroup
->prev
->cancelled
)
2892 gomp_task_maybe_wait_for_dependencies (depend
);
2898 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2899 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2902 struct gomp_thread
*thr
= gomp_thread ();
2903 struct gomp_team
*team
= thr
->ts
.team
;
2904 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2905 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2907 if (gomp_team_barrier_cancelled (&team
->barrier
))
2909 if (thr
->task
->taskgroup
)
2911 if (thr
->task
->taskgroup
->cancelled
)
2913 if (thr
->task
->taskgroup
->workshare
2914 && thr
->task
->taskgroup
->prev
2915 && thr
->task
->taskgroup
->prev
->cancelled
)
2920 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2924 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2925 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2926 htab_t
*refcount_set
)
2928 const int typemask
= 0xff;
2930 gomp_mutex_lock (&devicep
->lock
);
2931 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2933 gomp_mutex_unlock (&devicep
->lock
);
2937 for (i
= 0; i
< mapnum
; i
++)
2938 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
2940 struct splay_tree_key_s cur_node
;
2941 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2942 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
2943 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2946 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
2951 splay_tree_key remove_vars
[mapnum
];
2953 for (i
= 0; i
< mapnum
; i
++)
2955 struct splay_tree_key_s cur_node
;
2956 unsigned char kind
= kinds
[i
] & typemask
;
2960 case GOMP_MAP_ALWAYS_FROM
:
2961 case GOMP_MAP_DELETE
:
2962 case GOMP_MAP_RELEASE
:
2963 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2964 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2965 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2966 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2967 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2968 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2969 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2970 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2974 bool delete_p
= (kind
== GOMP_MAP_DELETE
2975 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
2976 bool do_copy
, do_remove
;
2977 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
2980 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
2981 || kind
== GOMP_MAP_ALWAYS_FROM
)
2983 if (k
->aux
&& k
->aux
->attach_count
)
2985 /* We have to be careful not to overwrite still attached
2986 pointers during the copyback to host. */
2987 uintptr_t addr
= k
->host_start
;
2988 while (addr
< k
->host_end
)
2990 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
2991 if (k
->aux
->attach_count
[i
] == 0)
2992 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
2993 (void *) (k
->tgt
->tgt_start
2995 + addr
- k
->host_start
),
2997 addr
+= sizeof (void *);
3001 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3002 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3003 + cur_node
.host_start
3005 cur_node
.host_end
- cur_node
.host_start
);
3008 /* Structure elements lists are removed altogether at once, which
3009 may cause immediate deallocation of the target_mem_desc, causing
3010 errors if we still have following element siblings to copy back.
3011 While we're at it, it also seems more disciplined to simply
3012 queue all removals together for processing below.
3014 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3015 not have this problem, since they maintain an additional
3016 tgt->refcount = 1 reference to the target_mem_desc to start with.
3019 remove_vars
[nrmvars
++] = k
;
3022 case GOMP_MAP_DETACH
:
3025 gomp_mutex_unlock (&devicep
->lock
);
3026 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3031 for (int i
= 0; i
< nrmvars
; i
++)
3032 gomp_remove_var (devicep
, remove_vars
[i
]);
3034 gomp_mutex_unlock (&devicep
->lock
);
3038 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3039 size_t *sizes
, unsigned short *kinds
,
3040 unsigned int flags
, void **depend
)
3042 struct gomp_device_descr
*devicep
= resolve_device (device
);
3044 /* If there are depend clauses, but nowait is not present,
3045 block the parent task until the dependencies are resolved
3046 and then just continue with the rest of the function as if it
3047 is a merged task. Until we are able to schedule task during
3048 variable mapping or unmapping, ignore nowait if depend clauses
3052 struct gomp_thread
*thr
= gomp_thread ();
3053 if (thr
->task
&& thr
->task
->depend_hash
)
3055 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3057 && !thr
->task
->final_task
)
3059 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3060 mapnum
, hostaddrs
, sizes
, kinds
,
3061 flags
, depend
, NULL
,
3062 GOMP_TARGET_TASK_DATA
))
3067 struct gomp_team
*team
= thr
->ts
.team
;
3068 /* If parallel or taskgroup has been cancelled, don't start new
3070 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3072 if (gomp_team_barrier_cancelled (&team
->barrier
))
3074 if (thr
->task
->taskgroup
)
3076 if (thr
->task
->taskgroup
->cancelled
)
3078 if (thr
->task
->taskgroup
->workshare
3079 && thr
->task
->taskgroup
->prev
3080 && thr
->task
->taskgroup
->prev
->cancelled
)
3085 gomp_task_maybe_wait_for_dependencies (depend
);
3091 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3092 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3095 struct gomp_thread
*thr
= gomp_thread ();
3096 struct gomp_team
*team
= thr
->ts
.team
;
3097 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3098 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3100 if (gomp_team_barrier_cancelled (&team
->barrier
))
3102 if (thr
->task
->taskgroup
)
3104 if (thr
->task
->taskgroup
->cancelled
)
3106 if (thr
->task
->taskgroup
->workshare
3107 && thr
->task
->taskgroup
->prev
3108 && thr
->task
->taskgroup
->prev
->cancelled
)
3113 htab_t refcount_set
= htab_create (mapnum
);
3115 /* The variables are mapped separately such that they can be released
3118 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3119 for (i
= 0; i
< mapnum
; i
++)
3120 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3122 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
3123 &kinds
[i
], true, &refcount_set
,
3124 GOMP_MAP_VARS_ENTER_DATA
);
3127 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
3129 for (j
= i
+ 1; j
< mapnum
; j
++)
3130 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
3131 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
3133 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
3134 &kinds
[i
], true, &refcount_set
,
3135 GOMP_MAP_VARS_ENTER_DATA
);
3138 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
3140 /* An attach operation must be processed together with the mapped
3141 base-pointer list item. */
3142 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3143 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3147 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3148 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3150 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
3151 htab_free (refcount_set
);
3155 gomp_target_task_fn (void *data
)
3157 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3158 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3160 if (ttask
->fn
!= NULL
)
3164 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3165 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3166 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3168 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3169 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
3174 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3177 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3181 void *actual_arguments
;
3182 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3185 actual_arguments
= ttask
->hostaddrs
;
3189 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3190 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3191 NULL
, GOMP_MAP_VARS_TARGET
);
3192 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3194 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3196 assert (devicep
->async_run_func
);
3197 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3198 ttask
->args
, (void *) ttask
);
3201 else if (devicep
== NULL
3202 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3203 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3207 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3208 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3209 ttask
->kinds
, true);
3212 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3213 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3214 for (i
= 0; i
< ttask
->mapnum
; i
++)
3215 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3217 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3218 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3219 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3220 i
+= ttask
->sizes
[i
];
3223 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3224 &ttask
->kinds
[i
], true, &refcount_set
,
3225 GOMP_MAP_VARS_ENTER_DATA
);
3227 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3228 ttask
->kinds
, &refcount_set
);
3229 htab_free (refcount_set
);
3235 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3239 struct gomp_task_icv
*icv
= gomp_icv (true);
3240 icv
->thread_limit_var
3241 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3247 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
3248 unsigned int thread_limit
, bool first
)
3250 struct gomp_thread
*thr
= gomp_thread ();
3255 struct gomp_task_icv
*icv
= gomp_icv (true);
3256 icv
->thread_limit_var
3257 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3259 (void) num_teams_high
;
3260 if (num_teams_low
== 0)
3262 thr
->num_teams
= num_teams_low
- 1;
3265 else if (thr
->team_num
== thr
->num_teams
)
3273 omp_target_alloc (size_t size
, int device_num
)
3275 if (device_num
== gomp_get_num_devices ())
3276 return malloc (size
);
3281 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3282 if (devicep
== NULL
)
3285 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3286 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3287 return malloc (size
);
3289 gomp_mutex_lock (&devicep
->lock
);
3290 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3291 gomp_mutex_unlock (&devicep
->lock
);
3296 omp_target_free (void *device_ptr
, int device_num
)
3298 if (device_ptr
== NULL
)
3301 if (device_num
== gomp_get_num_devices ())
3310 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3311 if (devicep
== NULL
)
3314 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3315 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3321 gomp_mutex_lock (&devicep
->lock
);
3322 gomp_free_device_memory (devicep
, device_ptr
);
3323 gomp_mutex_unlock (&devicep
->lock
);
3327 omp_target_is_present (const void *ptr
, int device_num
)
3332 if (device_num
== gomp_get_num_devices ())
3338 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3339 if (devicep
== NULL
)
3342 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3343 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3346 gomp_mutex_lock (&devicep
->lock
);
3347 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3348 struct splay_tree_key_s cur_node
;
3350 cur_node
.host_start
= (uintptr_t) ptr
;
3351 cur_node
.host_end
= cur_node
.host_start
;
3352 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3353 int ret
= n
!= NULL
;
3354 gomp_mutex_unlock (&devicep
->lock
);
3359 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
3360 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
3363 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3366 if (dst_device_num
!= gomp_get_num_devices ())
3368 if (dst_device_num
< 0)
3371 dst_devicep
= resolve_device (dst_device_num
);
3372 if (dst_devicep
== NULL
)
3375 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3376 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3379 if (src_device_num
!= num_devices_openmp
)
3381 if (src_device_num
< 0)
3384 src_devicep
= resolve_device (src_device_num
);
3385 if (src_devicep
== NULL
)
3388 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3389 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3392 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3394 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3397 if (src_devicep
== NULL
)
3399 gomp_mutex_lock (&dst_devicep
->lock
);
3400 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3401 (char *) dst
+ dst_offset
,
3402 (char *) src
+ src_offset
, length
);
3403 gomp_mutex_unlock (&dst_devicep
->lock
);
3404 return (ret
? 0 : EINVAL
);
3406 if (dst_devicep
== NULL
)
3408 gomp_mutex_lock (&src_devicep
->lock
);
3409 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3410 (char *) dst
+ dst_offset
,
3411 (char *) src
+ src_offset
, length
);
3412 gomp_mutex_unlock (&src_devicep
->lock
);
3413 return (ret
? 0 : EINVAL
);
3415 if (src_devicep
== dst_devicep
)
3417 gomp_mutex_lock (&src_devicep
->lock
);
3418 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3419 (char *) dst
+ dst_offset
,
3420 (char *) src
+ src_offset
, length
);
3421 gomp_mutex_unlock (&src_devicep
->lock
);
3422 return (ret
? 0 : EINVAL
);
3428 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3429 int num_dims
, const size_t *volume
,
3430 const size_t *dst_offsets
,
3431 const size_t *src_offsets
,
3432 const size_t *dst_dimensions
,
3433 const size_t *src_dimensions
,
3434 struct gomp_device_descr
*dst_devicep
,
3435 struct gomp_device_descr
*src_devicep
)
3437 size_t dst_slice
= element_size
;
3438 size_t src_slice
= element_size
;
3439 size_t j
, dst_off
, src_off
, length
;
3444 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3445 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3446 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3448 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3450 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3454 else if (src_devicep
== NULL
)
3455 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3456 (char *) dst
+ dst_off
,
3457 (const char *) src
+ src_off
,
3459 else if (dst_devicep
== NULL
)
3460 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3461 (char *) dst
+ dst_off
,
3462 (const char *) src
+ src_off
,
3464 else if (src_devicep
== dst_devicep
)
3465 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3466 (char *) dst
+ dst_off
,
3467 (const char *) src
+ src_off
,
3471 return ret
? 0 : EINVAL
;
3474 /* FIXME: it would be nice to have some plugin function to handle
3475 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3476 be handled in the generic recursion below, and for host-host it
3477 should be used even for any num_dims >= 2. */
3479 for (i
= 1; i
< num_dims
; i
++)
3480 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3481 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3483 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3484 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3486 for (j
= 0; j
< volume
[0]; j
++)
3488 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3489 (const char *) src
+ src_off
,
3490 element_size
, num_dims
- 1,
3491 volume
+ 1, dst_offsets
+ 1,
3492 src_offsets
+ 1, dst_dimensions
+ 1,
3493 src_dimensions
+ 1, dst_devicep
,
3497 dst_off
+= dst_slice
;
3498 src_off
+= src_slice
;
3504 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3505 int num_dims
, const size_t *volume
,
3506 const size_t *dst_offsets
,
3507 const size_t *src_offsets
,
3508 const size_t *dst_dimensions
,
3509 const size_t *src_dimensions
,
3510 int dst_device_num
, int src_device_num
)
3512 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3517 if (dst_device_num
!= gomp_get_num_devices ())
3519 if (dst_device_num
< 0)
3522 dst_devicep
= resolve_device (dst_device_num
);
3523 if (dst_devicep
== NULL
)
3526 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3527 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3530 if (src_device_num
!= num_devices_openmp
)
3532 if (src_device_num
< 0)
3535 src_devicep
= resolve_device (src_device_num
);
3536 if (src_devicep
== NULL
)
3539 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3540 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3544 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
3548 gomp_mutex_lock (&src_devicep
->lock
);
3549 else if (dst_devicep
)
3550 gomp_mutex_lock (&dst_devicep
->lock
);
3551 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3552 volume
, dst_offsets
, src_offsets
,
3553 dst_dimensions
, src_dimensions
,
3554 dst_devicep
, src_devicep
);
3556 gomp_mutex_unlock (&src_devicep
->lock
);
3557 else if (dst_devicep
)
3558 gomp_mutex_unlock (&dst_devicep
->lock
);
3563 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3564 size_t size
, size_t device_offset
, int device_num
)
3566 if (device_num
== gomp_get_num_devices ())
3572 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3573 if (devicep
== NULL
)
3576 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3577 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3580 gomp_mutex_lock (&devicep
->lock
);
3582 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3583 struct splay_tree_key_s cur_node
;
3586 cur_node
.host_start
= (uintptr_t) host_ptr
;
3587 cur_node
.host_end
= cur_node
.host_start
+ size
;
3588 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3591 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3592 == (uintptr_t) device_ptr
+ device_offset
3593 && n
->host_start
<= cur_node
.host_start
3594 && n
->host_end
>= cur_node
.host_end
)
3599 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3600 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3604 tgt
->to_free
= NULL
;
3606 tgt
->list_count
= 0;
3607 tgt
->device_descr
= devicep
;
3608 splay_tree_node array
= tgt
->array
;
3609 splay_tree_key k
= &array
->key
;
3610 k
->host_start
= cur_node
.host_start
;
3611 k
->host_end
= cur_node
.host_end
;
3613 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3614 k
->refcount
= REFCOUNT_INFINITY
;
3615 k
->dynamic_refcount
= 0;
3618 array
->right
= NULL
;
3619 splay_tree_insert (&devicep
->mem_map
, array
);
3622 gomp_mutex_unlock (&devicep
->lock
);
3627 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3629 if (device_num
== gomp_get_num_devices ())
3635 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3636 if (devicep
== NULL
)
3639 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3642 gomp_mutex_lock (&devicep
->lock
);
3644 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3645 struct splay_tree_key_s cur_node
;
3648 cur_node
.host_start
= (uintptr_t) ptr
;
3649 cur_node
.host_end
= cur_node
.host_start
;
3650 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3652 && n
->host_start
== cur_node
.host_start
3653 && n
->refcount
== REFCOUNT_INFINITY
3654 && n
->tgt
->tgt_start
== 0
3655 && n
->tgt
->to_free
== NULL
3656 && n
->tgt
->refcount
== 1
3657 && n
->tgt
->list_count
== 0)
3659 splay_tree_remove (&devicep
->mem_map
, n
);
3660 gomp_unmap_tgt (n
->tgt
);
3664 gomp_mutex_unlock (&devicep
->lock
);
3669 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3672 if (device_num
== gomp_get_num_devices ())
3673 return gomp_pause_host ();
3674 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3676 /* Do nothing for target devices for now. */
3681 omp_pause_resource_all (omp_pause_resource_t kind
)
3684 if (gomp_pause_host ())
3686 /* Do nothing for target devices for now. */
3690 ialias (omp_pause_resource
)
3691 ialias (omp_pause_resource_all
)
3693 #ifdef PLUGIN_SUPPORT
3695 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3697 The handles of the found functions are stored in the corresponding fields
3698 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3701 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3702 const char *plugin_name
)
3704 const char *err
= NULL
, *last_missing
= NULL
;
3706 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3708 #if OFFLOAD_DEFAULTED
3714 /* Check if all required functions are available in the plugin and store
3715 their handlers. None of the symbols can legitimately be NULL,
3716 so we don't need to check dlerror all the time. */
3718 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3720 /* Similar, but missing functions are not an error. Return false if
3721 failed, true otherwise. */
3722 #define DLSYM_OPT(f, n) \
3723 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3724 || (last_missing = #n, 0))
3727 if (device
->version_func () != GOMP_VERSION
)
3729 err
= "plugin version mismatch";
3736 DLSYM (get_num_devices
);
3737 DLSYM (init_device
);
3738 DLSYM (fini_device
);
3740 DLSYM (unload_image
);
3745 device
->capabilities
= device
->get_caps_func ();
3746 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3749 DLSYM_OPT (async_run
, async_run
);
3750 DLSYM_OPT (can_run
, can_run
);
3753 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3755 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3756 || !DLSYM_OPT (openacc
.create_thread_data
,
3757 openacc_create_thread_data
)
3758 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3759 openacc_destroy_thread_data
)
3760 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3761 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3762 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3763 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3764 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3765 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3766 openacc_async_queue_callback
)
3767 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3768 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3769 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3770 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3772 /* Require all the OpenACC handlers if we have
3773 GOMP_OFFLOAD_CAP_OPENACC_200. */
3774 err
= "plugin missing OpenACC handler function";
3779 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3780 openacc_cuda_get_current_device
);
3781 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3782 openacc_cuda_get_current_context
);
3783 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3784 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3785 if (cuda
&& cuda
!= 4)
3787 /* Make sure all the CUDA functions are there if any of them are. */
3788 err
= "plugin missing OpenACC CUDA handler function";
3800 gomp_error ("while loading %s: %s", plugin_name
, err
);
3802 gomp_error ("missing function was %s", last_missing
);
3804 dlclose (plugin_handle
);
3809 /* This function finalizes all initialized devices. */
3812 gomp_target_fini (void)
3815 for (i
= 0; i
< num_devices
; i
++)
3818 struct gomp_device_descr
*devicep
= &devices
[i
];
3819 gomp_mutex_lock (&devicep
->lock
);
3820 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3821 ret
= gomp_fini_device (devicep
);
3822 gomp_mutex_unlock (&devicep
->lock
);
3824 gomp_fatal ("device finalization failed");
3828 /* This function initializes the runtime for offloading.
3829 It parses the list of offload plugins, and tries to load these.
3830 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3831 will be set, and the array DEVICES initialized, containing descriptors for
3832 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3836 gomp_target_init (void)
3838 const char *prefix
="libgomp-plugin-";
3839 const char *suffix
= SONAME_SUFFIX (1);
3840 const char *cur
, *next
;
3842 int i
, new_num_devs
;
3843 int num_devs
= 0, num_devs_openmp
;
3844 struct gomp_device_descr
*devs
= NULL
;
3846 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
3849 cur
= OFFLOAD_PLUGINS
;
3853 struct gomp_device_descr current_device
;
3854 size_t prefix_len
, suffix_len
, cur_len
;
3856 next
= strchr (cur
, ',');
3858 prefix_len
= strlen (prefix
);
3859 cur_len
= next
? next
- cur
: strlen (cur
);
3860 suffix_len
= strlen (suffix
);
3862 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3869 memcpy (plugin_name
, prefix
, prefix_len
);
3870 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3871 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3873 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3875 new_num_devs
= current_device
.get_num_devices_func ();
3876 if (new_num_devs
>= 1)
3878 /* Augment DEVICES and NUM_DEVICES. */
3880 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
3881 * sizeof (struct gomp_device_descr
));
3889 current_device
.name
= current_device
.get_name_func ();
3890 /* current_device.capabilities has already been set. */
3891 current_device
.type
= current_device
.get_type_func ();
3892 current_device
.mem_map
.root
= NULL
;
3893 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3894 for (i
= 0; i
< new_num_devs
; i
++)
3896 current_device
.target_id
= i
;
3897 devs
[num_devs
] = current_device
;
3898 gomp_mutex_init (&devs
[num_devs
].lock
);
3909 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3910 NUM_DEVICES_OPENMP. */
3911 struct gomp_device_descr
*devs_s
3912 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
3919 num_devs_openmp
= 0;
3920 for (i
= 0; i
< num_devs
; i
++)
3921 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3922 devs_s
[num_devs_openmp
++] = devs
[i
];
3923 int num_devs_after_openmp
= num_devs_openmp
;
3924 for (i
= 0; i
< num_devs
; i
++)
3925 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3926 devs_s
[num_devs_after_openmp
++] = devs
[i
];
3930 for (i
= 0; i
< num_devs
; i
++)
3932 /* The 'devices' array can be moved (by the realloc call) until we have
3933 found all the plugins, so registering with the OpenACC runtime (which
3934 takes a copy of the pointer argument) must be delayed until now. */
3935 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3936 goacc_register (&devs
[i
]);
3939 num_devices
= num_devs
;
3940 num_devices_openmp
= num_devs_openmp
;
3942 if (atexit (gomp_target_fini
) != 0)
3943 gomp_fatal ("atexit failed");
3946 #else /* PLUGIN_SUPPORT */
3947 /* If dlfcn.h is unavailable we always fallback to host execution.
3948 GOMP_target* routines are just stubs for this case. */
3950 gomp_target_init (void)
3953 #endif /* PLUGIN_SUPPORT */