1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 typedef uintptr_t *hash_entry_type
;
48 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
49 static inline void htab_free (void *ptr
) { free (ptr
); }
52 static inline hashval_t
53 htab_hash (hash_entry_type element
)
55 return hash_pointer ((void *) element
);
59 htab_eq (hash_entry_type x
, hash_entry_type y
)
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
66 static void gomp_target_init (void);
68 /* The whole initialization code for offloading plugins is only run one. */
69 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
71 /* Mutex for offload image registration. */
72 static gomp_mutex_t register_lock
;
74 /* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77 struct offload_image_descr
{
79 enum offload_target_type type
;
80 const void *host_table
;
81 const void *target_data
;
84 /* Array of descriptors of offload images. */
85 static struct offload_image_descr
*offload_images
;
87 /* Total number of offload images. */
88 static int num_offload_images
;
90 /* Array of descriptors for all available devices. */
91 static struct gomp_device_descr
*devices
;
93 /* Total number of available devices. */
94 static int num_devices
;
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 static int num_devices_openmp
;
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
102 gomp_realloc_unlock (void *old
, size_t size
)
104 void *ret
= realloc (old
, size
);
107 gomp_mutex_unlock (®ister_lock
);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
113 attribute_hidden
void
114 gomp_init_targets_once (void)
116 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
120 gomp_get_num_devices (void)
122 gomp_init_targets_once ();
123 return num_devices_openmp
;
126 static struct gomp_device_descr
*
127 resolve_device (int device_id
)
129 if (device_id
== GOMP_DEVICE_ICV
)
131 struct gomp_task_icv
*icv
= gomp_icv (false);
132 device_id
= icv
->default_device_var
;
135 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
137 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
138 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
139 && device_id
!= num_devices_openmp
)
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
146 gomp_mutex_lock (&devices
[device_id
].lock
);
147 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
148 gomp_init_device (&devices
[device_id
]);
149 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
151 gomp_mutex_unlock (&devices
[device_id
].lock
);
153 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
159 gomp_mutex_unlock (&devices
[device_id
].lock
);
161 return &devices
[device_id
];
165 static inline splay_tree_key
166 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
168 if (key
->host_start
!= key
->host_end
)
169 return splay_tree_lookup (mem_map
, key
);
172 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
177 n
= splay_tree_lookup (mem_map
, key
);
181 return splay_tree_lookup (mem_map
, key
);
184 static inline splay_tree_key
185 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
187 if (key
->host_start
!= key
->host_end
)
188 return splay_tree_lookup (mem_map
, key
);
191 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
197 gomp_device_copy (struct gomp_device_descr
*devicep
,
198 bool (*copy_func
) (int, void *, const void *, size_t),
199 const char *dst
, void *dstaddr
,
200 const char *src
, const void *srcaddr
,
203 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
205 gomp_mutex_unlock (&devicep
->lock
);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
212 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
213 bool (*copy_func
) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue
*),
215 const char *dst
, void *dstaddr
,
216 const char *src
, const void *srcaddr
,
217 const void *srcaddr_orig
,
218 size_t size
, struct goacc_asyncqueue
*aq
)
220 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
222 gomp_mutex_unlock (&devicep
->lock
);
223 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src
, srcaddr_orig
, srcaddr_orig
+ size
,
228 src
, srcaddr
, srcaddr
+ size
,
229 dst
, dstaddr
, dstaddr
+ size
);
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src
, srcaddr
, srcaddr
+ size
,
234 dst
, dstaddr
, dstaddr
+ size
);
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
241 struct gomp_coalesce_chunk
243 /* The starting and ending point of a coalesced chunk of memory. */
247 struct gomp_coalesce_buf
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
252 struct target_mem_desc
*tgt
;
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
255 of chunks which are to be copied to buf and later copied to device. */
256 struct gomp_coalesce_chunk
*chunks
;
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
266 /* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
270 /* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276 #define MAX_COALESCE_BUF_GAP (4 * 1024)
278 /* Add region with device tgt_start relative offset and length to CBUF.
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
288 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
290 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
294 if (cbuf
->chunk_cnt
< 0)
296 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
298 cbuf
->chunk_cnt
= -1;
301 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
303 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf
->use_cnt
== 1)
313 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
314 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
319 /* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
323 gomp_to_device_kind_p (int kind
)
329 case GOMP_MAP_FORCE_ALLOC
:
330 case GOMP_MAP_FORCE_FROM
:
331 case GOMP_MAP_ALWAYS_FROM
:
338 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
343 attribute_hidden
void
344 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
345 struct goacc_asyncqueue
*aq
,
346 void *d
, const void *h
, size_t sz
,
347 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
349 if (__builtin_expect (aq
!= NULL
, 0))
351 /* See 'gomp_coalesce_buf_add'. */
354 void *h_buf
= (void *) h
;
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf
= gomp_malloc (sz
);
362 memcpy (h_buf
, h
, sz
);
364 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
365 "dev", d
, "host", h_buf
, h
, sz
, aq
);
367 /* Free temporary buffer once the transfer has completed. */
368 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
375 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
376 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
379 long last
= cbuf
->chunk_cnt
- 1;
380 while (first
<= last
)
382 long middle
= (first
+ last
) >> 1;
383 if (cbuf
->chunks
[middle
].end
<= doff
)
385 else if (cbuf
->chunks
[middle
].start
<= doff
)
387 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
388 gomp_fatal ("internal libgomp cbuf error");
389 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
399 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
402 attribute_hidden
void
403 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
404 struct goacc_asyncqueue
*aq
,
405 void *h
, const void *d
, size_t sz
)
407 if (__builtin_expect (aq
!= NULL
, 0))
408 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
409 "host", h
, "dev", d
, NULL
, sz
, aq
);
411 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
415 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
417 if (!devicep
->free_func (devicep
->target_id
, devptr
))
419 gomp_mutex_unlock (&devicep
->lock
);
420 gomp_fatal ("error in freeing device memory block at %p", devptr
);
424 /* Increment reference count of a splay_tree_key region K by 1.
425 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
426 increment the value if refcount is not yet contained in the set (used for
427 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
428 once for each construct). */
431 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
433 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
436 uintptr_t *refcount_ptr
= &k
->refcount
;
438 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
439 refcount_ptr
= &k
->structelem_refcount
;
440 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
441 refcount_ptr
= k
->structelem_refcount_ptr
;
445 if (htab_find (*refcount_set
, refcount_ptr
))
447 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
448 *slot
= refcount_ptr
;
455 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
456 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
457 track already seen refcounts, and only adjust the value if refcount is not
458 yet contained in the set (like gomp_increment_refcount).
460 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
461 it is already zero and we know we decremented it earlier. This signals that
462 associated maps should be copied back to host.
464 *DO_REMOVE is set to true when we this is the first handling of this refcount
465 and we are setting it to zero. This signals a removal of this key from the
468 Copy and removal are separated due to cases like handling of structure
469 elements, e.g. each map of a structure element representing a possible copy
470 out of a structure field has to be handled individually, but we only signal
471 removal for one (the first encountered) sibing map. */
474 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
475 bool *do_copy
, bool *do_remove
)
477 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
479 *do_copy
= *do_remove
= false;
483 uintptr_t *refcount_ptr
= &k
->refcount
;
485 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
486 refcount_ptr
= &k
->structelem_refcount
;
487 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
488 refcount_ptr
= k
->structelem_refcount_ptr
;
490 bool new_encountered_refcount
;
491 bool set_to_zero
= false;
492 bool is_zero
= false;
494 uintptr_t orig_refcount
= *refcount_ptr
;
498 if (htab_find (*refcount_set
, refcount_ptr
))
500 new_encountered_refcount
= false;
504 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
505 *slot
= refcount_ptr
;
506 new_encountered_refcount
= true;
509 /* If no refcount_set being used, assume all keys are being decremented
510 for the first time. */
511 new_encountered_refcount
= true;
515 else if (*refcount_ptr
> 0)
519 if (*refcount_ptr
== 0)
521 if (orig_refcount
> 0)
527 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
528 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
531 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
532 gomp_map_0len_lookup found oldn for newn.
533 Helper function of gomp_map_vars. */
536 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
537 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
538 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
539 unsigned char kind
, bool always_to_flag
,
540 struct gomp_coalesce_buf
*cbuf
,
541 htab_t
*refcount_set
)
543 assert (kind
!= GOMP_MAP_ATTACH
);
546 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
547 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
548 tgt_var
->is_attach
= false;
549 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
550 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
552 if ((kind
& GOMP_MAP_FLAG_FORCE
)
553 || oldn
->host_start
> newn
->host_start
554 || oldn
->host_end
< newn
->host_end
)
556 gomp_mutex_unlock (&devicep
->lock
);
557 gomp_fatal ("Trying to map into device [%p..%p) object when "
558 "[%p..%p) is already mapped",
559 (void *) newn
->host_start
, (void *) newn
->host_end
,
560 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
563 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
564 gomp_copy_host2dev (devicep
, aq
,
565 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
566 + newn
->host_start
- oldn
->host_start
),
567 (void *) newn
->host_start
,
568 newn
->host_end
- newn
->host_start
, false, cbuf
);
570 gomp_increment_refcount (oldn
, refcount_set
);
574 get_kind (bool short_mapkind
, void *kinds
, int idx
)
576 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
577 : ((unsigned char *) kinds
)[idx
];
581 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
582 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
583 struct gomp_coalesce_buf
*cbuf
)
585 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
586 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
587 struct splay_tree_key_s cur_node
;
589 cur_node
.host_start
= host_ptr
;
590 if (cur_node
.host_start
== (uintptr_t) NULL
)
592 cur_node
.tgt_offset
= (uintptr_t) NULL
;
593 gomp_copy_host2dev (devicep
, aq
,
594 (void *) (tgt
->tgt_start
+ target_offset
),
595 (void *) &cur_node
.tgt_offset
, sizeof (void *),
599 /* Add bias to the pointer value. */
600 cur_node
.host_start
+= bias
;
601 cur_node
.host_end
= cur_node
.host_start
;
602 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
605 gomp_mutex_unlock (&devicep
->lock
);
606 gomp_fatal ("Pointer target of array section wasn't mapped");
608 cur_node
.host_start
-= n
->host_start
;
610 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
611 /* At this point tgt_offset is target address of the
612 array section. Now subtract bias to get what we want
613 to initialize the pointer with. */
614 cur_node
.tgt_offset
-= bias
;
615 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
616 (void *) &cur_node
.tgt_offset
, sizeof (void *),
621 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
622 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
623 size_t first
, size_t i
, void **hostaddrs
,
624 size_t *sizes
, void *kinds
,
625 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
627 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
628 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
629 struct splay_tree_key_s cur_node
;
631 const bool short_mapkind
= true;
632 const int typemask
= short_mapkind
? 0xff : 0x7;
634 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
635 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
636 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
637 kind
= get_kind (short_mapkind
, kinds
, i
);
640 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
642 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
643 kind
& typemask
, false, cbuf
, refcount_set
);
648 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
650 cur_node
.host_start
--;
651 n2
= splay_tree_lookup (mem_map
, &cur_node
);
652 cur_node
.host_start
++;
655 && n2
->host_start
- n
->host_start
656 == n2
->tgt_offset
- n
->tgt_offset
)
658 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
659 kind
& typemask
, false, cbuf
, refcount_set
);
664 n2
= splay_tree_lookup (mem_map
, &cur_node
);
668 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
670 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
671 kind
& typemask
, false, cbuf
, refcount_set
);
675 gomp_mutex_unlock (&devicep
->lock
);
676 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
677 "other mapped elements from the same structure weren't mapped "
678 "together with it", (void *) cur_node
.host_start
,
679 (void *) cur_node
.host_end
);
682 attribute_hidden
void
683 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
684 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
685 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
686 struct gomp_coalesce_buf
*cbufp
)
688 struct splay_tree_key_s s
;
693 gomp_mutex_unlock (&devicep
->lock
);
694 gomp_fatal ("enclosing struct not mapped for attach");
697 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
698 /* We might have a pointer in a packed struct: however we cannot have more
699 than one such pointer in each pointer-sized portion of the struct, so
701 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
704 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
706 if (!n
->aux
->attach_count
)
708 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
710 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
711 n
->aux
->attach_count
[idx
]++;
714 gomp_mutex_unlock (&devicep
->lock
);
715 gomp_fatal ("attach count overflow");
718 if (n
->aux
->attach_count
[idx
] == 1)
720 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
722 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
726 if ((void *) target
== NULL
)
728 gomp_mutex_unlock (&devicep
->lock
);
729 gomp_fatal ("attempt to attach null pointer");
732 s
.host_start
= target
+ bias
;
733 s
.host_end
= s
.host_start
+ 1;
734 tn
= splay_tree_lookup (mem_map
, &s
);
738 gomp_mutex_unlock (&devicep
->lock
);
739 gomp_fatal ("pointer target not mapped for attach");
742 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
745 "%s: attaching host %p, target %p (struct base %p) to %p\n",
746 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
747 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
749 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
750 sizeof (void *), true, cbufp
);
753 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
754 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
757 attribute_hidden
void
758 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
759 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
760 uintptr_t detach_from
, bool finalize
,
761 struct gomp_coalesce_buf
*cbufp
)
767 gomp_mutex_unlock (&devicep
->lock
);
768 gomp_fatal ("enclosing struct not mapped for detach");
771 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
773 if (!n
->aux
|| !n
->aux
->attach_count
)
775 gomp_mutex_unlock (&devicep
->lock
);
776 gomp_fatal ("no attachment counters for struct");
780 n
->aux
->attach_count
[idx
] = 1;
782 if (n
->aux
->attach_count
[idx
] == 0)
784 gomp_mutex_unlock (&devicep
->lock
);
785 gomp_fatal ("attach count underflow");
788 n
->aux
->attach_count
[idx
]--;
790 if (n
->aux
->attach_count
[idx
] == 0)
792 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
794 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
797 "%s: detaching host %p, target %p (struct base %p) to %p\n",
798 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
799 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
802 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
803 sizeof (void *), true, cbufp
);
806 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
807 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
810 attribute_hidden
uintptr_t
811 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
813 if (tgt
->list
[i
].key
!= NULL
)
814 return tgt
->list
[i
].key
->tgt
->tgt_start
815 + tgt
->list
[i
].key
->tgt_offset
816 + tgt
->list
[i
].offset
;
818 switch (tgt
->list
[i
].offset
)
821 return (uintptr_t) hostaddrs
[i
];
827 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
828 + tgt
->list
[i
+ 1].key
->tgt_offset
829 + tgt
->list
[i
+ 1].offset
830 + (uintptr_t) hostaddrs
[i
]
831 - (uintptr_t) hostaddrs
[i
+ 1];
834 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
838 static inline __attribute__((always_inline
)) struct target_mem_desc
*
839 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
840 struct goacc_asyncqueue
*aq
, size_t mapnum
,
841 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
842 void *kinds
, bool short_mapkind
,
843 htab_t
*refcount_set
,
844 enum gomp_map_vars_kind pragma_kind
)
846 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
847 bool has_firstprivate
= false;
848 bool has_always_ptrset
= false;
849 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
850 const int rshift
= short_mapkind
? 8 : 3;
851 const int typemask
= short_mapkind
? 0xff : 0x7;
852 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
853 struct splay_tree_key_s cur_node
;
854 struct target_mem_desc
*tgt
855 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
856 tgt
->list_count
= mapnum
;
857 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
858 tgt
->device_descr
= devicep
;
860 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
869 tgt_align
= sizeof (void *);
875 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
877 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
878 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
881 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
883 size_t align
= 4 * sizeof (void *);
885 tgt_size
= mapnum
* sizeof (void *);
887 cbuf
.use_cnt
= 1 + (mapnum
> 1);
888 cbuf
.chunks
[0].start
= 0;
889 cbuf
.chunks
[0].end
= tgt_size
;
892 gomp_mutex_lock (&devicep
->lock
);
893 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
895 gomp_mutex_unlock (&devicep
->lock
);
900 for (i
= 0; i
< mapnum
; i
++)
902 int kind
= get_kind (short_mapkind
, kinds
, i
);
903 if (hostaddrs
[i
] == NULL
904 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
906 tgt
->list
[i
].key
= NULL
;
907 tgt
->list
[i
].offset
= OFFSET_INLINED
;
910 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
911 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
913 tgt
->list
[i
].key
= NULL
;
916 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
917 on a separate construct prior to using use_device_{addr,ptr}.
918 In OpenMP 5.0, map directives need to be ordered by the
919 middle-end before the use_device_* clauses. If
920 !not_found_cnt, all mappings requested (if any) are already
921 mapped, so use_device_{addr,ptr} can be resolved right away.
922 Otherwise, if not_found_cnt, gomp_map_lookup might fail
923 now but would succeed after performing the mappings in the
924 following loop. We can't defer this always to the second
925 loop, because it is not even invoked when !not_found_cnt
926 after the first loop. */
927 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
928 cur_node
.host_end
= cur_node
.host_start
;
929 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
932 cur_node
.host_start
-= n
->host_start
;
934 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
935 + cur_node
.host_start
);
937 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
939 gomp_mutex_unlock (&devicep
->lock
);
940 gomp_fatal ("use_device_ptr pointer wasn't mapped");
942 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
943 /* If not present, continue using the host address. */
946 __builtin_unreachable ();
947 tgt
->list
[i
].offset
= OFFSET_INLINED
;
950 tgt
->list
[i
].offset
= 0;
953 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
955 size_t first
= i
+ 1;
956 size_t last
= i
+ sizes
[i
];
957 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
958 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
960 tgt
->list
[i
].key
= NULL
;
961 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
962 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
965 size_t align
= (size_t) 1 << (kind
>> rshift
);
966 if (tgt_align
< align
)
968 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
969 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
970 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
971 not_found_cnt
+= last
- i
;
972 for (i
= first
; i
<= last
; i
++)
974 tgt
->list
[i
].key
= NULL
;
976 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
978 gomp_coalesce_buf_add (&cbuf
,
979 tgt_size
- cur_node
.host_end
980 + (uintptr_t) hostaddrs
[i
],
986 for (i
= first
; i
<= last
; i
++)
987 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
988 sizes
, kinds
, NULL
, refcount_set
);
992 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
994 tgt
->list
[i
].key
= NULL
;
995 tgt
->list
[i
].offset
= OFFSET_POINTER
;
996 has_firstprivate
= true;
999 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
1001 tgt
->list
[i
].key
= NULL
;
1002 has_firstprivate
= true;
1005 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1006 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1007 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1009 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1010 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1012 tgt
->list
[i
].key
= NULL
;
1014 size_t align
= (size_t) 1 << (kind
>> rshift
);
1015 if (tgt_align
< align
)
1017 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1019 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1020 cur_node
.host_end
- cur_node
.host_start
);
1021 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1022 has_firstprivate
= true;
1026 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1028 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1031 tgt
->list
[i
].key
= NULL
;
1032 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1037 n
= splay_tree_lookup (mem_map
, &cur_node
);
1038 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1040 int always_to_cnt
= 0;
1041 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1043 bool has_nullptr
= false;
1045 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1046 if (n
->tgt
->list
[j
].key
== n
)
1048 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1051 if (n
->tgt
->list_count
== 0)
1053 /* 'declare target'; assume has_nullptr; it could also be
1054 statically assigned pointer, but that it should be to
1055 the equivalent variable on the host. */
1056 assert (n
->refcount
== REFCOUNT_INFINITY
);
1060 assert (j
< n
->tgt
->list_count
);
1061 /* Re-map the data if there is an 'always' modifier or if it a
1062 null pointer was there and non a nonnull has been found; that
1063 permits transparent re-mapping for Fortran array descriptors
1064 which were previously mapped unallocated. */
1065 for (j
= i
+ 1; j
< mapnum
; j
++)
1067 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1068 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1070 || !GOMP_MAP_POINTER_P (ptr_kind
)
1071 || *(void **) hostaddrs
[j
] == NULL
))
1073 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1074 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1075 > cur_node
.host_end
))
1079 has_always_ptrset
= true;
1084 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1085 kind
& typemask
, always_to_cnt
> 0, NULL
,
1091 tgt
->list
[i
].key
= NULL
;
1093 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1095 /* Not present, hence, skip entry - including its MAP_POINTER,
1097 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1099 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1100 == GOMP_MAP_POINTER
))
1103 tgt
->list
[i
].key
= NULL
;
1104 tgt
->list
[i
].offset
= 0;
1108 size_t align
= (size_t) 1 << (kind
>> rshift
);
1110 if (tgt_align
< align
)
1112 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1114 && gomp_to_device_kind_p (kind
& typemask
))
1115 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1116 cur_node
.host_end
- cur_node
.host_start
);
1117 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1118 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1122 for (j
= i
+ 1; j
< mapnum
; j
++)
1123 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1124 kinds
, j
)) & typemask
))
1125 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1127 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1128 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1129 > cur_node
.host_end
))
1133 tgt
->list
[j
].key
= NULL
;
1144 gomp_mutex_unlock (&devicep
->lock
);
1145 gomp_fatal ("unexpected aggregation");
1147 tgt
->to_free
= devaddrs
[0];
1148 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1149 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1151 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1153 /* Allocate tgt_align aligned tgt_size block of memory. */
1154 /* FIXME: Perhaps change interface to allocate properly aligned
1156 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1157 tgt_size
+ tgt_align
- 1);
1160 gomp_mutex_unlock (&devicep
->lock
);
1161 gomp_fatal ("device memory allocation fail");
1164 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1165 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1166 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1168 if (cbuf
.use_cnt
== 1)
1170 if (cbuf
.chunk_cnt
> 0)
1173 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1183 tgt
->to_free
= NULL
;
1189 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1190 tgt_size
= mapnum
* sizeof (void *);
1193 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1196 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1197 splay_tree_node array
= tgt
->array
;
1198 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1199 uintptr_t field_tgt_base
= 0;
1200 splay_tree_key field_tgt_structelem_first
= NULL
;
1202 for (i
= 0; i
< mapnum
; i
++)
1203 if (has_always_ptrset
1205 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1206 == GOMP_MAP_TO_PSET
)
1208 splay_tree_key k
= tgt
->list
[i
].key
;
1209 bool has_nullptr
= false;
1211 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1212 if (k
->tgt
->list
[j
].key
== k
)
1214 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1217 if (k
->tgt
->list_count
== 0)
1220 assert (j
< k
->tgt
->list_count
);
1222 tgt
->list
[i
].has_null_ptr_assoc
= false;
1223 for (j
= i
+ 1; j
< mapnum
; j
++)
1225 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1226 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1228 || !GOMP_MAP_POINTER_P (ptr_kind
)
1229 || *(void **) hostaddrs
[j
] == NULL
))
1231 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1232 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1237 if (*(void **) hostaddrs
[j
] == NULL
)
1238 tgt
->list
[i
].has_null_ptr_assoc
= true;
1239 tgt
->list
[j
].key
= k
;
1240 tgt
->list
[j
].copy_from
= false;
1241 tgt
->list
[j
].always_copy_from
= false;
1242 tgt
->list
[j
].is_attach
= false;
1243 gomp_increment_refcount (k
, refcount_set
);
1244 gomp_map_pointer (k
->tgt
, aq
,
1245 (uintptr_t) *(void **) hostaddrs
[j
],
1246 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1253 else if (tgt
->list
[i
].key
== NULL
)
1255 int kind
= get_kind (short_mapkind
, kinds
, i
);
1256 if (hostaddrs
[i
] == NULL
)
1258 switch (kind
& typemask
)
1260 size_t align
, len
, first
, last
;
1262 case GOMP_MAP_FIRSTPRIVATE
:
1263 align
= (size_t) 1 << (kind
>> rshift
);
1264 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1265 tgt
->list
[i
].offset
= tgt_size
;
1267 gomp_copy_host2dev (devicep
, aq
,
1268 (void *) (tgt
->tgt_start
+ tgt_size
),
1269 (void *) hostaddrs
[i
], len
, false, cbufp
);
1272 case GOMP_MAP_FIRSTPRIVATE_INT
:
1273 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1275 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1276 /* The OpenACC 'host_data' construct only allows 'use_device'
1277 "mapping" clauses, so in the first loop, 'not_found_cnt'
1278 must always have been zero, so all OpenACC 'use_device'
1279 clauses have already been handled. (We can only easily test
1280 'use_device' with 'if_present' clause here.) */
1281 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1282 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1283 code conceptually simple, similar to the first loop. */
1284 case GOMP_MAP_USE_DEVICE_PTR
:
1285 if (tgt
->list
[i
].offset
== 0)
1287 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1288 cur_node
.host_end
= cur_node
.host_start
;
1289 n
= gomp_map_lookup (mem_map
, &cur_node
);
1292 cur_node
.host_start
-= n
->host_start
;
1294 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1295 + cur_node
.host_start
);
1297 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1299 gomp_mutex_unlock (&devicep
->lock
);
1300 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1302 else if ((kind
& typemask
)
1303 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1304 /* If not present, continue using the host address. */
1307 __builtin_unreachable ();
1308 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1311 case GOMP_MAP_STRUCT
:
1313 last
= i
+ sizes
[i
];
1314 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1315 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1317 if (tgt
->list
[first
].key
!= NULL
)
1319 n
= splay_tree_lookup (mem_map
, &cur_node
);
1322 size_t align
= (size_t) 1 << (kind
>> rshift
);
1323 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1324 - (uintptr_t) hostaddrs
[i
];
1325 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1326 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1327 - (uintptr_t) hostaddrs
[i
];
1328 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1329 field_tgt_offset
= tgt_size
;
1330 field_tgt_clear
= last
;
1331 field_tgt_structelem_first
= NULL
;
1332 tgt_size
+= cur_node
.host_end
1333 - (uintptr_t) hostaddrs
[first
];
1336 for (i
= first
; i
<= last
; i
++)
1337 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1338 sizes
, kinds
, cbufp
, refcount_set
);
1341 case GOMP_MAP_ALWAYS_POINTER
:
1342 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1343 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1344 n
= splay_tree_lookup (mem_map
, &cur_node
);
1346 || n
->host_start
> cur_node
.host_start
1347 || n
->host_end
< cur_node
.host_end
)
1349 gomp_mutex_unlock (&devicep
->lock
);
1350 gomp_fatal ("always pointer not mapped");
1352 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1353 != GOMP_MAP_ALWAYS_POINTER
)
1354 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1355 if (cur_node
.tgt_offset
)
1356 cur_node
.tgt_offset
-= sizes
[i
];
1357 gomp_copy_host2dev (devicep
, aq
,
1358 (void *) (n
->tgt
->tgt_start
1360 + cur_node
.host_start
1362 (void *) &cur_node
.tgt_offset
,
1363 sizeof (void *), true, cbufp
);
1364 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1365 + cur_node
.host_start
- n
->host_start
;
1367 case GOMP_MAP_IF_PRESENT
:
1368 /* Not present - otherwise handled above. Skip over its
1369 MAP_POINTER as well. */
1371 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1372 == GOMP_MAP_POINTER
))
1375 case GOMP_MAP_ATTACH
:
1377 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1378 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1379 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1382 tgt
->list
[i
].key
= n
;
1383 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1384 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1385 tgt
->list
[i
].copy_from
= false;
1386 tgt
->list
[i
].always_copy_from
= false;
1387 tgt
->list
[i
].is_attach
= true;
1388 /* OpenACC 'attach'/'detach' doesn't affect
1389 structured/dynamic reference counts ('n->refcount',
1390 'n->dynamic_refcount'). */
1392 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1393 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1396 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1398 gomp_mutex_unlock (&devicep
->lock
);
1399 gomp_fatal ("outer struct not mapped for attach");
1406 splay_tree_key k
= &array
->key
;
1407 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1408 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1409 k
->host_end
= k
->host_start
+ sizes
[i
];
1411 k
->host_end
= k
->host_start
+ sizeof (void *);
1412 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1413 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1414 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1415 kind
& typemask
, false, cbufp
,
1420 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1422 /* Replace target address of the pointer with target address
1423 of mapped object in the splay tree. */
1424 splay_tree_remove (mem_map
, n
);
1426 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1427 k
->aux
->link_key
= n
;
1429 size_t align
= (size_t) 1 << (kind
>> rshift
);
1430 tgt
->list
[i
].key
= k
;
1433 k
->dynamic_refcount
= 0;
1434 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1436 k
->tgt_offset
= k
->host_start
- field_tgt_base
1440 k
->refcount
= REFCOUNT_STRUCTELEM
;
1441 if (field_tgt_structelem_first
== NULL
)
1443 /* Set to first structure element of sequence. */
1444 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1445 field_tgt_structelem_first
= k
;
1448 /* Point to refcount of leading element, but do not
1450 k
->structelem_refcount_ptr
1451 = &field_tgt_structelem_first
->structelem_refcount
;
1453 if (i
== field_tgt_clear
)
1455 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1456 field_tgt_structelem_first
= NULL
;
1459 if (i
== field_tgt_clear
)
1460 field_tgt_clear
= FIELD_TGT_EMPTY
;
1464 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1465 k
->tgt_offset
= tgt_size
;
1466 tgt_size
+= k
->host_end
- k
->host_start
;
1468 /* First increment, from 0 to 1. gomp_increment_refcount
1469 encapsulates the different increment cases, so use this
1470 instead of directly setting 1 during initialization. */
1471 gomp_increment_refcount (k
, refcount_set
);
1473 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1474 tgt
->list
[i
].always_copy_from
1475 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1476 tgt
->list
[i
].is_attach
= false;
1477 tgt
->list
[i
].offset
= 0;
1478 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1481 array
->right
= NULL
;
1482 splay_tree_insert (mem_map
, array
);
1483 switch (kind
& typemask
)
1485 case GOMP_MAP_ALLOC
:
1487 case GOMP_MAP_FORCE_ALLOC
:
1488 case GOMP_MAP_FORCE_FROM
:
1489 case GOMP_MAP_ALWAYS_FROM
:
1492 case GOMP_MAP_TOFROM
:
1493 case GOMP_MAP_FORCE_TO
:
1494 case GOMP_MAP_FORCE_TOFROM
:
1495 case GOMP_MAP_ALWAYS_TO
:
1496 case GOMP_MAP_ALWAYS_TOFROM
:
1497 gomp_copy_host2dev (devicep
, aq
,
1498 (void *) (tgt
->tgt_start
1500 (void *) k
->host_start
,
1501 k
->host_end
- k
->host_start
,
1504 case GOMP_MAP_POINTER
:
1505 gomp_map_pointer (tgt
, aq
,
1506 (uintptr_t) *(void **) k
->host_start
,
1507 k
->tgt_offset
, sizes
[i
], cbufp
);
1509 case GOMP_MAP_TO_PSET
:
1510 gomp_copy_host2dev (devicep
, aq
,
1511 (void *) (tgt
->tgt_start
1513 (void *) k
->host_start
,
1514 k
->host_end
- k
->host_start
,
1516 tgt
->list
[i
].has_null_ptr_assoc
= false;
1518 for (j
= i
+ 1; j
< mapnum
; j
++)
1520 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1522 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1523 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1525 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1526 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1531 tgt
->list
[j
].key
= k
;
1532 tgt
->list
[j
].copy_from
= false;
1533 tgt
->list
[j
].always_copy_from
= false;
1534 tgt
->list
[j
].is_attach
= false;
1535 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1536 /* For OpenMP, the use of refcount_sets causes
1537 errors if we set k->refcount = 1 above but also
1538 increment it again here, for decrementing will
1539 not properly match, since we decrement only once
1540 for each key's refcount. Therefore avoid this
1541 increment for OpenMP constructs. */
1543 gomp_increment_refcount (k
, refcount_set
);
1544 gomp_map_pointer (tgt
, aq
,
1545 (uintptr_t) *(void **) hostaddrs
[j
],
1547 + ((uintptr_t) hostaddrs
[j
]
1554 case GOMP_MAP_FORCE_PRESENT
:
1556 /* We already looked up the memory region above and it
1558 size_t size
= k
->host_end
- k
->host_start
;
1559 gomp_mutex_unlock (&devicep
->lock
);
1560 #ifdef HAVE_INTTYPES_H
1561 gomp_fatal ("present clause: !acc_is_present (%p, "
1562 "%"PRIu64
" (0x%"PRIx64
"))",
1563 (void *) k
->host_start
,
1564 (uint64_t) size
, (uint64_t) size
);
1566 gomp_fatal ("present clause: !acc_is_present (%p, "
1567 "%lu (0x%lx))", (void *) k
->host_start
,
1568 (unsigned long) size
, (unsigned long) size
);
1572 case GOMP_MAP_FORCE_DEVICEPTR
:
1573 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1574 gomp_copy_host2dev (devicep
, aq
,
1575 (void *) (tgt
->tgt_start
1577 (void *) k
->host_start
,
1578 sizeof (void *), false, cbufp
);
1581 gomp_mutex_unlock (&devicep
->lock
);
1582 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1586 if (k
->aux
&& k
->aux
->link_key
)
1588 /* Set link pointer on target to the device address of the
1590 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1591 /* We intentionally do not use coalescing here, as it's not
1592 data allocated by the current call to this function. */
1593 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1594 &tgt_addr
, sizeof (void *), true, NULL
);
1601 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1603 for (i
= 0; i
< mapnum
; i
++)
1605 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1606 gomp_copy_host2dev (devicep
, aq
,
1607 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1608 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1615 /* See 'gomp_coalesce_buf_add'. */
1619 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1620 gomp_copy_host2dev (devicep
, aq
,
1621 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1622 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1623 - cbuf
.chunks
[0].start
),
1624 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1631 /* If the variable from "omp target enter data" map-list was already mapped,
1632 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1634 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1640 gomp_mutex_unlock (&devicep
->lock
);
1644 static struct target_mem_desc
*
1645 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1646 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1647 bool short_mapkind
, htab_t
*refcount_set
,
1648 enum gomp_map_vars_kind pragma_kind
)
1650 /* This management of a local refcount_set is for convenience of callers
1651 who do not share a refcount_set over multiple map/unmap uses. */
1652 htab_t local_refcount_set
= NULL
;
1653 if (refcount_set
== NULL
)
1655 local_refcount_set
= htab_create (mapnum
);
1656 refcount_set
= &local_refcount_set
;
1659 struct target_mem_desc
*tgt
;
1660 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1661 sizes
, kinds
, short_mapkind
, refcount_set
,
1663 if (local_refcount_set
)
1664 htab_free (local_refcount_set
);
1669 attribute_hidden
struct target_mem_desc
*
1670 goacc_map_vars (struct gomp_device_descr
*devicep
,
1671 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1672 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1673 void *kinds
, bool short_mapkind
,
1674 enum gomp_map_vars_kind pragma_kind
)
1676 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1677 sizes
, kinds
, short_mapkind
, NULL
,
1678 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1682 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1684 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1686 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1693 gomp_unref_tgt (void *ptr
)
1695 bool is_tgt_unmapped
= false;
1697 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1699 if (tgt
->refcount
> 1)
1703 gomp_unmap_tgt (tgt
);
1704 is_tgt_unmapped
= true;
1707 return is_tgt_unmapped
;
1711 gomp_unref_tgt_void (void *ptr
)
1713 (void) gomp_unref_tgt (ptr
);
1717 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1719 splay_tree_remove (sp
, k
);
1722 if (k
->aux
->link_key
)
1723 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1724 if (k
->aux
->attach_count
)
1725 free (k
->aux
->attach_count
);
1731 static inline __attribute__((always_inline
)) bool
1732 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1733 struct goacc_asyncqueue
*aq
)
1735 bool is_tgt_unmapped
= false;
1737 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1739 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1740 /* Infer the splay_tree_key of the first structelem key using the
1741 pointer to the first structleme_refcount. */
1742 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1743 - offsetof (struct splay_tree_key_s
,
1744 structelem_refcount
));
1745 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1747 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1748 with the splay_tree_keys embedded inside. */
1749 splay_tree_node node
=
1750 (splay_tree_node
) ((char *) k
1751 - offsetof (struct splay_tree_node_s
, key
));
1754 /* Starting from the _FIRST key, and continue for all following
1756 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1757 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1764 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1767 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1770 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1771 return is_tgt_unmapped
;
1774 attribute_hidden
bool
1775 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1777 return gomp_remove_var_internal (devicep
, k
, NULL
);
1780 /* Remove a variable asynchronously. This actually removes the variable
1781 mapping immediately, but retains the linked target_mem_desc until the
1782 asynchronous operation has completed (as it may still refer to target
1783 memory). The device lock must be held before entry, and remains locked on
1786 attribute_hidden
void
1787 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1788 struct goacc_asyncqueue
*aq
)
1790 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1793 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1794 variables back from device to host: if it is false, it is assumed that this
1795 has been done already. */
1797 static inline __attribute__((always_inline
)) void
1798 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1799 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1801 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1803 if (tgt
->list_count
== 0)
1809 gomp_mutex_lock (&devicep
->lock
);
1810 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1812 gomp_mutex_unlock (&devicep
->lock
);
1820 /* We must perform detachments before any copies back to the host. */
1821 for (i
= 0; i
< tgt
->list_count
; i
++)
1823 splay_tree_key k
= tgt
->list
[i
].key
;
1825 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1826 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1827 + tgt
->list
[i
].offset
,
1831 for (i
= 0; i
< tgt
->list_count
; i
++)
1833 splay_tree_key k
= tgt
->list
[i
].key
;
1837 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1838 counts ('n->refcount', 'n->dynamic_refcount'). */
1839 if (tgt
->list
[i
].is_attach
)
1842 bool do_copy
, do_remove
;
1843 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1845 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1846 || tgt
->list
[i
].always_copy_from
)
1847 gomp_copy_dev2host (devicep
, aq
,
1848 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1849 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1850 + tgt
->list
[i
].offset
),
1851 tgt
->list
[i
].length
);
1854 struct target_mem_desc
*k_tgt
= k
->tgt
;
1855 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1856 /* It would be bad if TGT got unmapped while we're still iterating
1857 over its LIST_COUNT, and also expect to use it in the following
1859 assert (!is_tgt_unmapped
1865 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1868 gomp_unref_tgt ((void *) tgt
);
1870 gomp_mutex_unlock (&devicep
->lock
);
1874 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1875 htab_t
*refcount_set
)
1877 /* This management of a local refcount_set is for convenience of callers
1878 who do not share a refcount_set over multiple map/unmap uses. */
1879 htab_t local_refcount_set
= NULL
;
1880 if (refcount_set
== NULL
)
1882 local_refcount_set
= htab_create (tgt
->list_count
);
1883 refcount_set
= &local_refcount_set
;
1886 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
1888 if (local_refcount_set
)
1889 htab_free (local_refcount_set
);
1892 attribute_hidden
void
1893 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1894 struct goacc_asyncqueue
*aq
)
1896 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
1900 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1901 size_t *sizes
, void *kinds
, bool short_mapkind
)
1904 struct splay_tree_key_s cur_node
;
1905 const int typemask
= short_mapkind
? 0xff : 0x7;
1913 gomp_mutex_lock (&devicep
->lock
);
1914 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1916 gomp_mutex_unlock (&devicep
->lock
);
1920 for (i
= 0; i
< mapnum
; i
++)
1923 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1924 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1925 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1928 int kind
= get_kind (short_mapkind
, kinds
, i
);
1929 if (n
->host_start
> cur_node
.host_start
1930 || n
->host_end
< cur_node
.host_end
)
1932 gomp_mutex_unlock (&devicep
->lock
);
1933 gomp_fatal ("Trying to update [%p..%p) object when "
1934 "only [%p..%p) is mapped",
1935 (void *) cur_node
.host_start
,
1936 (void *) cur_node
.host_end
,
1937 (void *) n
->host_start
,
1938 (void *) n
->host_end
);
1942 void *hostaddr
= (void *) cur_node
.host_start
;
1943 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1944 + cur_node
.host_start
- n
->host_start
);
1945 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1947 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1948 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1950 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1951 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1954 gomp_mutex_unlock (&devicep
->lock
);
1957 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1958 And insert to splay tree the mapping between addresses from HOST_TABLE and
1959 from loaded target image. We rely in the host and device compiler
1960 emitting variable and functions in the same order. */
1963 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1964 const void *host_table
, const void *target_data
,
1965 bool is_register_lock
)
1967 void **host_func_table
= ((void ***) host_table
)[0];
1968 void **host_funcs_end
= ((void ***) host_table
)[1];
1969 void **host_var_table
= ((void ***) host_table
)[2];
1970 void **host_vars_end
= ((void ***) host_table
)[3];
1972 /* The func table contains only addresses, the var table contains addresses
1973 and corresponding sizes. */
1974 int num_funcs
= host_funcs_end
- host_func_table
;
1975 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1977 /* Others currently is only 'device_num' */
1980 /* Load image to device and get target addresses for the image. */
1981 struct addr_pair
*target_table
= NULL
;
1982 int i
, num_target_entries
;
1985 = devicep
->load_image_func (devicep
->target_id
, version
,
1986 target_data
, &target_table
);
1988 if (num_target_entries
!= num_funcs
+ num_vars
1989 /* Others (device_num) are included as trailing entries in pair list. */
1990 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
1992 gomp_mutex_unlock (&devicep
->lock
);
1993 if (is_register_lock
)
1994 gomp_mutex_unlock (®ister_lock
);
1995 gomp_fatal ("Cannot map target functions or variables"
1996 " (expected %u, have %u)", num_funcs
+ num_vars
,
1997 num_target_entries
);
2000 /* Insert host-target address mapping into splay tree. */
2001 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2002 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2003 tgt
->refcount
= REFCOUNT_INFINITY
;
2006 tgt
->to_free
= NULL
;
2008 tgt
->list_count
= 0;
2009 tgt
->device_descr
= devicep
;
2010 splay_tree_node array
= tgt
->array
;
2012 for (i
= 0; i
< num_funcs
; i
++)
2014 splay_tree_key k
= &array
->key
;
2015 k
->host_start
= (uintptr_t) host_func_table
[i
];
2016 k
->host_end
= k
->host_start
+ 1;
2018 k
->tgt_offset
= target_table
[i
].start
;
2019 k
->refcount
= REFCOUNT_INFINITY
;
2020 k
->dynamic_refcount
= 0;
2023 array
->right
= NULL
;
2024 splay_tree_insert (&devicep
->mem_map
, array
);
2028 /* Most significant bit of the size in host and target tables marks
2029 "omp declare target link" variables. */
2030 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2031 const uintptr_t size_mask
= ~link_bit
;
2033 for (i
= 0; i
< num_vars
; i
++)
2035 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2036 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2037 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2039 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2041 gomp_mutex_unlock (&devicep
->lock
);
2042 if (is_register_lock
)
2043 gomp_mutex_unlock (®ister_lock
);
2044 gomp_fatal ("Cannot map target variables (size mismatch)");
2047 splay_tree_key k
= &array
->key
;
2048 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2050 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2052 k
->tgt_offset
= target_var
->start
;
2053 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2054 k
->dynamic_refcount
= 0;
2057 array
->right
= NULL
;
2058 splay_tree_insert (&devicep
->mem_map
, array
);
2062 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2063 where plugin does not return this entry. */
2064 if (num_funcs
+ num_vars
< num_target_entries
)
2066 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2067 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2068 was found in this image. */
2069 if (device_num_var
->start
!= 0)
2071 /* The index of the devicep within devices[] is regarded as its
2072 'device number', which is different from the per-device type
2073 devicep->target_id. */
2074 int device_num_val
= (int) (devicep
- &devices
[0]);
2075 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2077 gomp_mutex_unlock (&devicep
->lock
);
2078 if (is_register_lock
)
2079 gomp_mutex_unlock (®ister_lock
);
2080 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2084 /* Copy device_num value to place on device memory, hereby actually
2085 designating its device number into effect. */
2086 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2087 &device_num_val
, sizeof (int), false, NULL
);
2091 free (target_table
);
2094 /* Unload the mappings described by target_data from device DEVICE_P.
2095 The device must be locked. */
2098 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2100 const void *host_table
, const void *target_data
)
2102 void **host_func_table
= ((void ***) host_table
)[0];
2103 void **host_funcs_end
= ((void ***) host_table
)[1];
2104 void **host_var_table
= ((void ***) host_table
)[2];
2105 void **host_vars_end
= ((void ***) host_table
)[3];
2107 /* The func table contains only addresses, the var table contains addresses
2108 and corresponding sizes. */
2109 int num_funcs
= host_funcs_end
- host_func_table
;
2110 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2112 struct splay_tree_key_s k
;
2113 splay_tree_key node
= NULL
;
2115 /* Find mapping at start of node array */
2116 if (num_funcs
|| num_vars
)
2118 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2119 : (uintptr_t) host_var_table
[0]);
2120 k
.host_end
= k
.host_start
+ 1;
2121 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2124 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2126 gomp_mutex_unlock (&devicep
->lock
);
2127 gomp_fatal ("image unload fail");
2130 /* Remove mappings from splay tree. */
2132 for (i
= 0; i
< num_funcs
; i
++)
2134 k
.host_start
= (uintptr_t) host_func_table
[i
];
2135 k
.host_end
= k
.host_start
+ 1;
2136 splay_tree_remove (&devicep
->mem_map
, &k
);
2139 /* Most significant bit of the size in host and target tables marks
2140 "omp declare target link" variables. */
2141 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2142 const uintptr_t size_mask
= ~link_bit
;
2143 bool is_tgt_unmapped
= false;
2145 for (i
= 0; i
< num_vars
; i
++)
2147 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2149 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2151 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2152 splay_tree_remove (&devicep
->mem_map
, &k
);
2155 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2156 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2160 if (node
&& !is_tgt_unmapped
)
2167 /* This function should be called from every offload image while loading.
2168 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2169 the target, and TARGET_DATA needed by target plugin. */
2172 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2173 int target_type
, const void *target_data
)
2177 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2178 gomp_fatal ("Library too old for offload (version %u < %u)",
2179 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2181 gomp_mutex_lock (®ister_lock
);
2183 /* Load image to all initialized devices. */
2184 for (i
= 0; i
< num_devices
; i
++)
2186 struct gomp_device_descr
*devicep
= &devices
[i
];
2187 gomp_mutex_lock (&devicep
->lock
);
2188 if (devicep
->type
== target_type
2189 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2190 gomp_load_image_to_device (devicep
, version
,
2191 host_table
, target_data
, true);
2192 gomp_mutex_unlock (&devicep
->lock
);
2195 /* Insert image to array of pending images. */
2197 = gomp_realloc_unlock (offload_images
,
2198 (num_offload_images
+ 1)
2199 * sizeof (struct offload_image_descr
));
2200 offload_images
[num_offload_images
].version
= version
;
2201 offload_images
[num_offload_images
].type
= target_type
;
2202 offload_images
[num_offload_images
].host_table
= host_table
;
2203 offload_images
[num_offload_images
].target_data
= target_data
;
2205 num_offload_images
++;
2206 gomp_mutex_unlock (®ister_lock
);
2210 GOMP_offload_register (const void *host_table
, int target_type
,
2211 const void *target_data
)
2213 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2216 /* This function should be called from every offload image while unloading.
2217 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2218 the target, and TARGET_DATA needed by target plugin. */
2221 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2222 int target_type
, const void *target_data
)
2226 gomp_mutex_lock (®ister_lock
);
2228 /* Unload image from all initialized devices. */
2229 for (i
= 0; i
< num_devices
; i
++)
2231 struct gomp_device_descr
*devicep
= &devices
[i
];
2232 gomp_mutex_lock (&devicep
->lock
);
2233 if (devicep
->type
== target_type
2234 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2235 gomp_unload_image_from_device (devicep
, version
,
2236 host_table
, target_data
);
2237 gomp_mutex_unlock (&devicep
->lock
);
2240 /* Remove image from array of pending images. */
2241 for (i
= 0; i
< num_offload_images
; i
++)
2242 if (offload_images
[i
].target_data
== target_data
)
2244 offload_images
[i
] = offload_images
[--num_offload_images
];
2248 gomp_mutex_unlock (®ister_lock
);
2252 GOMP_offload_unregister (const void *host_table
, int target_type
,
2253 const void *target_data
)
2255 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2258 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2259 must be locked on entry, and remains locked on return. */
2261 attribute_hidden
void
2262 gomp_init_device (struct gomp_device_descr
*devicep
)
2265 if (!devicep
->init_device_func (devicep
->target_id
))
2267 gomp_mutex_unlock (&devicep
->lock
);
2268 gomp_fatal ("device initialization failed");
2271 /* Load to device all images registered by the moment. */
2272 for (i
= 0; i
< num_offload_images
; i
++)
2274 struct offload_image_descr
*image
= &offload_images
[i
];
2275 if (image
->type
== devicep
->type
)
2276 gomp_load_image_to_device (devicep
, image
->version
,
2277 image
->host_table
, image
->target_data
,
2281 /* Initialize OpenACC asynchronous queues. */
2282 goacc_init_asyncqueues (devicep
);
2284 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2287 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2288 must be locked on entry, and remains locked on return. */
2290 attribute_hidden
bool
2291 gomp_fini_device (struct gomp_device_descr
*devicep
)
2293 bool ret
= goacc_fini_asyncqueues (devicep
);
2294 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2295 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2299 attribute_hidden
void
2300 gomp_unload_device (struct gomp_device_descr
*devicep
)
2302 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2306 /* Unload from device all images registered at the moment. */
2307 for (i
= 0; i
< num_offload_images
; i
++)
2309 struct offload_image_descr
*image
= &offload_images
[i
];
2310 if (image
->type
== devicep
->type
)
2311 gomp_unload_image_from_device (devicep
, image
->version
,
2313 image
->target_data
);
2318 /* Host fallback for GOMP_target{,_ext} routines. */
2321 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2322 struct gomp_device_descr
*devicep
)
2324 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2326 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2328 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2329 "be used for offloading");
2332 memset (thr
, '\0', sizeof (*thr
));
2333 if (gomp_places_list
)
2335 thr
->place
= old_thr
.place
;
2336 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2339 gomp_free_thread (thr
);
2343 /* Calculate alignment and size requirements of a private copy of data shared
2344 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2347 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2348 unsigned short *kinds
, size_t *tgt_align
,
2352 for (i
= 0; i
< mapnum
; i
++)
2353 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2355 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2356 if (*tgt_align
< align
)
2358 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2359 *tgt_size
+= sizes
[i
];
2363 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2366 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2367 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2370 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2372 tgt
+= tgt_align
- al
;
2375 for (i
= 0; i
< mapnum
; i
++)
2376 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2378 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2379 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2380 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2381 hostaddrs
[i
] = tgt
+ tgt_size
;
2382 tgt_size
= tgt_size
+ sizes
[i
];
2386 /* Helper function of GOMP_target{,_ext} routines. */
2389 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2390 void (*host_fn
) (void *))
2392 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2393 return (void *) host_fn
;
2396 gomp_mutex_lock (&devicep
->lock
);
2397 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2399 gomp_mutex_unlock (&devicep
->lock
);
2403 struct splay_tree_key_s k
;
2404 k
.host_start
= (uintptr_t) host_fn
;
2405 k
.host_end
= k
.host_start
+ 1;
2406 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2407 gomp_mutex_unlock (&devicep
->lock
);
2411 return (void *) tgt_fn
->tgt_offset
;
2415 /* Called when encountering a target directive. If DEVICE
2416 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2417 GOMP_DEVICE_HOST_FALLBACK (or any value
2418 larger than last available hw device), use host fallback.
2419 FN is address of host code, UNUSED is part of the current ABI, but
2420 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2421 with MAPNUM entries, with addresses of the host objects,
2422 sizes of the host objects (resp. for pointer kind pointer bias
2423 and assumed sizeof (void *) size) and kinds. */
2426 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2427 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2428 unsigned char *kinds
)
2430 struct gomp_device_descr
*devicep
= resolve_device (device
);
2434 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2435 /* All shared memory devices should use the GOMP_target_ext function. */
2436 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2437 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2438 return gomp_target_fallback (fn
, hostaddrs
, devicep
);
2440 htab_t refcount_set
= htab_create (mapnum
);
2441 struct target_mem_desc
*tgt_vars
2442 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2443 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2444 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2446 htab_clear (refcount_set
);
2447 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2448 htab_free (refcount_set
);
2451 static inline unsigned int
2452 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2454 /* If we cannot run asynchronously, simply ignore nowait. */
2455 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2456 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2461 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2462 and several arguments have been added:
2463 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2464 DEPEND is array of dependencies, see GOMP_task for details.
2466 ARGS is a pointer to an array consisting of a variable number of both
2467 device-independent and device-specific arguments, which can take one two
2468 elements where the first specifies for which device it is intended, the type
2469 and optionally also the value. If the value is not present in the first
2470 one, the whole second element the actual value. The last element of the
2471 array is a single NULL. Among the device independent can be for example
2472 NUM_TEAMS and THREAD_LIMIT.
2474 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2475 that value, or 1 if teams construct is not present, or 0, if
2476 teams construct does not have num_teams clause and so the choice is
2477 implementation defined, and -1 if it can't be determined on the host
2478 what value will GOMP_teams have on the device.
2479 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2480 body with that value, or 0, if teams construct does not have thread_limit
2481 clause or the teams construct is not present, or -1 if it can't be
2482 determined on the host what value will GOMP_teams have on the device. */
2485 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2486 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2487 unsigned int flags
, void **depend
, void **args
)
2489 struct gomp_device_descr
*devicep
= resolve_device (device
);
2490 size_t tgt_align
= 0, tgt_size
= 0;
2491 bool fpc_done
= false;
2493 flags
= clear_unsupported_flags (devicep
, flags
);
2495 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2497 struct gomp_thread
*thr
= gomp_thread ();
2498 /* Create a team if we don't have any around, as nowait
2499 target tasks make sense to run asynchronously even when
2500 outside of any parallel. */
2501 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2503 struct gomp_team
*team
= gomp_new_team (1);
2504 struct gomp_task
*task
= thr
->task
;
2505 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2506 team
->prev_ts
= thr
->ts
;
2507 thr
->ts
.team
= team
;
2508 thr
->ts
.team_id
= 0;
2509 thr
->ts
.work_share
= &team
->work_shares
[0];
2510 thr
->ts
.last_work_share
= NULL
;
2511 #ifdef HAVE_SYNC_BUILTINS
2512 thr
->ts
.single_count
= 0;
2514 thr
->ts
.static_trip
= 0;
2515 thr
->task
= &team
->implicit_task
[0];
2516 gomp_init_task (thr
->task
, NULL
, icv
);
2522 thr
->task
= &team
->implicit_task
[0];
2525 pthread_setspecific (gomp_thread_destructor
, thr
);
2528 && !thr
->task
->final_task
)
2530 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2531 sizes
, kinds
, flags
, depend
, args
,
2532 GOMP_TARGET_TASK_BEFORE_MAP
);
2537 /* If there are depend clauses, but nowait is not present
2538 (or we are in a final task), block the parent task until the
2539 dependencies are resolved and then just continue with the rest
2540 of the function as if it is a merged task. */
2543 struct gomp_thread
*thr
= gomp_thread ();
2544 if (thr
->task
&& thr
->task
->depend_hash
)
2546 /* If we might need to wait, copy firstprivate now. */
2547 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2548 &tgt_align
, &tgt_size
);
2551 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2552 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2553 tgt_align
, tgt_size
);
2556 gomp_task_maybe_wait_for_dependencies (depend
);
2562 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2563 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2564 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2568 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2569 &tgt_align
, &tgt_size
);
2572 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2573 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2574 tgt_align
, tgt_size
);
2577 gomp_target_fallback (fn
, hostaddrs
, devicep
);
2581 struct target_mem_desc
*tgt_vars
;
2582 htab_t refcount_set
= NULL
;
2584 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2588 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2589 &tgt_align
, &tgt_size
);
2592 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2593 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2594 tgt_align
, tgt_size
);
2601 refcount_set
= htab_create (mapnum
);
2602 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2603 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2605 devicep
->run_func (devicep
->target_id
, fn_addr
,
2606 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2610 htab_clear (refcount_set
);
2611 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2614 htab_free (refcount_set
);
2617 /* Host fallback for GOMP_target_data{,_ext} routines. */
2620 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2622 struct gomp_task_icv
*icv
= gomp_icv (false);
2624 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2626 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2627 "be used for offloading");
2629 if (icv
->target_data
)
2631 /* Even when doing a host fallback, if there are any active
2632 #pragma omp target data constructs, need to remember the
2633 new #pragma omp target data, otherwise GOMP_target_end_data
2634 would get out of sync. */
2635 struct target_mem_desc
*tgt
2636 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2637 NULL
, GOMP_MAP_VARS_DATA
);
2638 tgt
->prev
= icv
->target_data
;
2639 icv
->target_data
= tgt
;
2644 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2645 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2647 struct gomp_device_descr
*devicep
= resolve_device (device
);
2650 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2651 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2652 return gomp_target_data_fallback (devicep
);
2654 struct target_mem_desc
*tgt
2655 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2656 NULL
, GOMP_MAP_VARS_DATA
);
2657 struct gomp_task_icv
*icv
= gomp_icv (true);
2658 tgt
->prev
= icv
->target_data
;
2659 icv
->target_data
= tgt
;
2663 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2664 size_t *sizes
, unsigned short *kinds
)
2666 struct gomp_device_descr
*devicep
= resolve_device (device
);
2669 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2670 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2671 return gomp_target_data_fallback (devicep
);
2673 struct target_mem_desc
*tgt
2674 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2675 NULL
, GOMP_MAP_VARS_DATA
);
2676 struct gomp_task_icv
*icv
= gomp_icv (true);
2677 tgt
->prev
= icv
->target_data
;
2678 icv
->target_data
= tgt
;
2682 GOMP_target_end_data (void)
2684 struct gomp_task_icv
*icv
= gomp_icv (false);
2685 if (icv
->target_data
)
2687 struct target_mem_desc
*tgt
= icv
->target_data
;
2688 icv
->target_data
= tgt
->prev
;
2689 gomp_unmap_vars (tgt
, true, NULL
);
2694 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2695 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2697 struct gomp_device_descr
*devicep
= resolve_device (device
);
2700 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2701 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2704 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2708 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2709 size_t *sizes
, unsigned short *kinds
,
2710 unsigned int flags
, void **depend
)
2712 struct gomp_device_descr
*devicep
= resolve_device (device
);
2714 /* If there are depend clauses, but nowait is not present,
2715 block the parent task until the dependencies are resolved
2716 and then just continue with the rest of the function as if it
2717 is a merged task. Until we are able to schedule task during
2718 variable mapping or unmapping, ignore nowait if depend clauses
2722 struct gomp_thread
*thr
= gomp_thread ();
2723 if (thr
->task
&& thr
->task
->depend_hash
)
2725 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2727 && !thr
->task
->final_task
)
2729 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2730 mapnum
, hostaddrs
, sizes
, kinds
,
2731 flags
| GOMP_TARGET_FLAG_UPDATE
,
2732 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2737 struct gomp_team
*team
= thr
->ts
.team
;
2738 /* If parallel or taskgroup has been cancelled, don't start new
2740 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2742 if (gomp_team_barrier_cancelled (&team
->barrier
))
2744 if (thr
->task
->taskgroup
)
2746 if (thr
->task
->taskgroup
->cancelled
)
2748 if (thr
->task
->taskgroup
->workshare
2749 && thr
->task
->taskgroup
->prev
2750 && thr
->task
->taskgroup
->prev
->cancelled
)
2755 gomp_task_maybe_wait_for_dependencies (depend
);
2761 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2762 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2765 struct gomp_thread
*thr
= gomp_thread ();
2766 struct gomp_team
*team
= thr
->ts
.team
;
2767 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2768 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2770 if (gomp_team_barrier_cancelled (&team
->barrier
))
2772 if (thr
->task
->taskgroup
)
2774 if (thr
->task
->taskgroup
->cancelled
)
2776 if (thr
->task
->taskgroup
->workshare
2777 && thr
->task
->taskgroup
->prev
2778 && thr
->task
->taskgroup
->prev
->cancelled
)
2783 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2787 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2788 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2789 htab_t
*refcount_set
)
2791 const int typemask
= 0xff;
2793 gomp_mutex_lock (&devicep
->lock
);
2794 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2796 gomp_mutex_unlock (&devicep
->lock
);
2800 for (i
= 0; i
< mapnum
; i
++)
2801 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
2803 struct splay_tree_key_s cur_node
;
2804 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2805 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
2806 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2809 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
2814 splay_tree_key remove_vars
[mapnum
];
2816 for (i
= 0; i
< mapnum
; i
++)
2818 struct splay_tree_key_s cur_node
;
2819 unsigned char kind
= kinds
[i
] & typemask
;
2823 case GOMP_MAP_ALWAYS_FROM
:
2824 case GOMP_MAP_DELETE
:
2825 case GOMP_MAP_RELEASE
:
2826 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2827 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2828 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2829 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2830 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2831 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2832 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2833 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2837 bool delete_p
= (kind
== GOMP_MAP_DELETE
2838 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
2839 bool do_copy
, do_remove
;
2840 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
2843 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
2844 || kind
== GOMP_MAP_ALWAYS_FROM
)
2845 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2846 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2847 + cur_node
.host_start
2849 cur_node
.host_end
- cur_node
.host_start
);
2851 /* Structure elements lists are removed altogether at once, which
2852 may cause immediate deallocation of the target_mem_desc, causing
2853 errors if we still have following element siblings to copy back.
2854 While we're at it, it also seems more disciplined to simply
2855 queue all removals together for processing below.
2857 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
2858 not have this problem, since they maintain an additional
2859 tgt->refcount = 1 reference to the target_mem_desc to start with.
2862 remove_vars
[nrmvars
++] = k
;
2865 case GOMP_MAP_DETACH
:
2868 gomp_mutex_unlock (&devicep
->lock
);
2869 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2874 for (int i
= 0; i
< nrmvars
; i
++)
2875 gomp_remove_var (devicep
, remove_vars
[i
]);
2877 gomp_mutex_unlock (&devicep
->lock
);
2881 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2882 size_t *sizes
, unsigned short *kinds
,
2883 unsigned int flags
, void **depend
)
2885 struct gomp_device_descr
*devicep
= resolve_device (device
);
2887 /* If there are depend clauses, but nowait is not present,
2888 block the parent task until the dependencies are resolved
2889 and then just continue with the rest of the function as if it
2890 is a merged task. Until we are able to schedule task during
2891 variable mapping or unmapping, ignore nowait if depend clauses
2895 struct gomp_thread
*thr
= gomp_thread ();
2896 if (thr
->task
&& thr
->task
->depend_hash
)
2898 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2900 && !thr
->task
->final_task
)
2902 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2903 mapnum
, hostaddrs
, sizes
, kinds
,
2904 flags
, depend
, NULL
,
2905 GOMP_TARGET_TASK_DATA
))
2910 struct gomp_team
*team
= thr
->ts
.team
;
2911 /* If parallel or taskgroup has been cancelled, don't start new
2913 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2915 if (gomp_team_barrier_cancelled (&team
->barrier
))
2917 if (thr
->task
->taskgroup
)
2919 if (thr
->task
->taskgroup
->cancelled
)
2921 if (thr
->task
->taskgroup
->workshare
2922 && thr
->task
->taskgroup
->prev
2923 && thr
->task
->taskgroup
->prev
->cancelled
)
2928 gomp_task_maybe_wait_for_dependencies (depend
);
2934 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2935 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2938 struct gomp_thread
*thr
= gomp_thread ();
2939 struct gomp_team
*team
= thr
->ts
.team
;
2940 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2941 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2943 if (gomp_team_barrier_cancelled (&team
->barrier
))
2945 if (thr
->task
->taskgroup
)
2947 if (thr
->task
->taskgroup
->cancelled
)
2949 if (thr
->task
->taskgroup
->workshare
2950 && thr
->task
->taskgroup
->prev
2951 && thr
->task
->taskgroup
->prev
->cancelled
)
2956 htab_t refcount_set
= htab_create (mapnum
);
2958 /* The variables are mapped separately such that they can be released
2961 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2962 for (i
= 0; i
< mapnum
; i
++)
2963 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2965 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2966 &kinds
[i
], true, &refcount_set
,
2967 GOMP_MAP_VARS_ENTER_DATA
);
2970 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
2972 for (j
= i
+ 1; j
< mapnum
; j
++)
2973 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
2974 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
2976 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
2977 &kinds
[i
], true, &refcount_set
,
2978 GOMP_MAP_VARS_ENTER_DATA
);
2981 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
2983 /* An attach operation must be processed together with the mapped
2984 base-pointer list item. */
2985 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2986 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
2990 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2991 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
2993 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
2994 htab_free (refcount_set
);
2998 gomp_target_task_fn (void *data
)
3000 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3001 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3003 if (ttask
->fn
!= NULL
)
3007 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3008 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3009 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3011 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3012 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
);
3016 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3019 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3023 void *actual_arguments
;
3024 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3027 actual_arguments
= ttask
->hostaddrs
;
3031 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3032 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3033 NULL
, GOMP_MAP_VARS_TARGET
);
3034 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3036 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3038 assert (devicep
->async_run_func
);
3039 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3040 ttask
->args
, (void *) ttask
);
3043 else if (devicep
== NULL
3044 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3045 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3049 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3050 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3051 ttask
->kinds
, true);
3054 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3055 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3056 for (i
= 0; i
< ttask
->mapnum
; i
++)
3057 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3059 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3060 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3061 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3062 i
+= ttask
->sizes
[i
];
3065 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3066 &ttask
->kinds
[i
], true, &refcount_set
,
3067 GOMP_MAP_VARS_ENTER_DATA
);
3069 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3070 ttask
->kinds
, &refcount_set
);
3071 htab_free (refcount_set
);
3077 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3081 struct gomp_task_icv
*icv
= gomp_icv (true);
3082 icv
->thread_limit_var
3083 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3089 omp_target_alloc (size_t size
, int device_num
)
3091 if (device_num
== gomp_get_num_devices ())
3092 return malloc (size
);
3097 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3098 if (devicep
== NULL
)
3101 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3102 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3103 return malloc (size
);
3105 gomp_mutex_lock (&devicep
->lock
);
3106 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3107 gomp_mutex_unlock (&devicep
->lock
);
3112 omp_target_free (void *device_ptr
, int device_num
)
3114 if (device_ptr
== NULL
)
3117 if (device_num
== gomp_get_num_devices ())
3126 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3127 if (devicep
== NULL
)
3130 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3131 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3137 gomp_mutex_lock (&devicep
->lock
);
3138 gomp_free_device_memory (devicep
, device_ptr
);
3139 gomp_mutex_unlock (&devicep
->lock
);
3143 omp_target_is_present (const void *ptr
, int device_num
)
3148 if (device_num
== gomp_get_num_devices ())
3154 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3155 if (devicep
== NULL
)
3158 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3159 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3162 gomp_mutex_lock (&devicep
->lock
);
3163 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3164 struct splay_tree_key_s cur_node
;
3166 cur_node
.host_start
= (uintptr_t) ptr
;
3167 cur_node
.host_end
= cur_node
.host_start
;
3168 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3169 int ret
= n
!= NULL
;
3170 gomp_mutex_unlock (&devicep
->lock
);
3175 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
3176 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
3179 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3182 if (dst_device_num
!= gomp_get_num_devices ())
3184 if (dst_device_num
< 0)
3187 dst_devicep
= resolve_device (dst_device_num
);
3188 if (dst_devicep
== NULL
)
3191 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3192 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3195 if (src_device_num
!= num_devices_openmp
)
3197 if (src_device_num
< 0)
3200 src_devicep
= resolve_device (src_device_num
);
3201 if (src_devicep
== NULL
)
3204 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3205 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3208 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3210 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3213 if (src_devicep
== NULL
)
3215 gomp_mutex_lock (&dst_devicep
->lock
);
3216 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3217 (char *) dst
+ dst_offset
,
3218 (char *) src
+ src_offset
, length
);
3219 gomp_mutex_unlock (&dst_devicep
->lock
);
3220 return (ret
? 0 : EINVAL
);
3222 if (dst_devicep
== NULL
)
3224 gomp_mutex_lock (&src_devicep
->lock
);
3225 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3226 (char *) dst
+ dst_offset
,
3227 (char *) src
+ src_offset
, length
);
3228 gomp_mutex_unlock (&src_devicep
->lock
);
3229 return (ret
? 0 : EINVAL
);
3231 if (src_devicep
== dst_devicep
)
3233 gomp_mutex_lock (&src_devicep
->lock
);
3234 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3235 (char *) dst
+ dst_offset
,
3236 (char *) src
+ src_offset
, length
);
3237 gomp_mutex_unlock (&src_devicep
->lock
);
3238 return (ret
? 0 : EINVAL
);
3244 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3245 int num_dims
, const size_t *volume
,
3246 const size_t *dst_offsets
,
3247 const size_t *src_offsets
,
3248 const size_t *dst_dimensions
,
3249 const size_t *src_dimensions
,
3250 struct gomp_device_descr
*dst_devicep
,
3251 struct gomp_device_descr
*src_devicep
)
3253 size_t dst_slice
= element_size
;
3254 size_t src_slice
= element_size
;
3255 size_t j
, dst_off
, src_off
, length
;
3260 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3261 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3262 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3264 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3266 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3270 else if (src_devicep
== NULL
)
3271 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3272 (char *) dst
+ dst_off
,
3273 (const char *) src
+ src_off
,
3275 else if (dst_devicep
== NULL
)
3276 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3277 (char *) dst
+ dst_off
,
3278 (const char *) src
+ src_off
,
3280 else if (src_devicep
== dst_devicep
)
3281 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3282 (char *) dst
+ dst_off
,
3283 (const char *) src
+ src_off
,
3287 return ret
? 0 : EINVAL
;
3290 /* FIXME: it would be nice to have some plugin function to handle
3291 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3292 be handled in the generic recursion below, and for host-host it
3293 should be used even for any num_dims >= 2. */
3295 for (i
= 1; i
< num_dims
; i
++)
3296 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3297 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3299 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3300 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3302 for (j
= 0; j
< volume
[0]; j
++)
3304 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3305 (const char *) src
+ src_off
,
3306 element_size
, num_dims
- 1,
3307 volume
+ 1, dst_offsets
+ 1,
3308 src_offsets
+ 1, dst_dimensions
+ 1,
3309 src_dimensions
+ 1, dst_devicep
,
3313 dst_off
+= dst_slice
;
3314 src_off
+= src_slice
;
3320 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3321 int num_dims
, const size_t *volume
,
3322 const size_t *dst_offsets
,
3323 const size_t *src_offsets
,
3324 const size_t *dst_dimensions
,
3325 const size_t *src_dimensions
,
3326 int dst_device_num
, int src_device_num
)
3328 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3333 if (dst_device_num
!= gomp_get_num_devices ())
3335 if (dst_device_num
< 0)
3338 dst_devicep
= resolve_device (dst_device_num
);
3339 if (dst_devicep
== NULL
)
3342 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3343 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3346 if (src_device_num
!= num_devices_openmp
)
3348 if (src_device_num
< 0)
3351 src_devicep
= resolve_device (src_device_num
);
3352 if (src_devicep
== NULL
)
3355 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3356 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3360 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
3364 gomp_mutex_lock (&src_devicep
->lock
);
3365 else if (dst_devicep
)
3366 gomp_mutex_lock (&dst_devicep
->lock
);
3367 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3368 volume
, dst_offsets
, src_offsets
,
3369 dst_dimensions
, src_dimensions
,
3370 dst_devicep
, src_devicep
);
3372 gomp_mutex_unlock (&src_devicep
->lock
);
3373 else if (dst_devicep
)
3374 gomp_mutex_unlock (&dst_devicep
->lock
);
3379 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3380 size_t size
, size_t device_offset
, int device_num
)
3382 if (device_num
== gomp_get_num_devices ())
3388 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3389 if (devicep
== NULL
)
3392 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3393 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3396 gomp_mutex_lock (&devicep
->lock
);
3398 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3399 struct splay_tree_key_s cur_node
;
3402 cur_node
.host_start
= (uintptr_t) host_ptr
;
3403 cur_node
.host_end
= cur_node
.host_start
+ size
;
3404 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3407 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3408 == (uintptr_t) device_ptr
+ device_offset
3409 && n
->host_start
<= cur_node
.host_start
3410 && n
->host_end
>= cur_node
.host_end
)
3415 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3416 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3420 tgt
->to_free
= NULL
;
3422 tgt
->list_count
= 0;
3423 tgt
->device_descr
= devicep
;
3424 splay_tree_node array
= tgt
->array
;
3425 splay_tree_key k
= &array
->key
;
3426 k
->host_start
= cur_node
.host_start
;
3427 k
->host_end
= cur_node
.host_end
;
3429 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3430 k
->refcount
= REFCOUNT_INFINITY
;
3431 k
->dynamic_refcount
= 0;
3434 array
->right
= NULL
;
3435 splay_tree_insert (&devicep
->mem_map
, array
);
3438 gomp_mutex_unlock (&devicep
->lock
);
3443 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3445 if (device_num
== gomp_get_num_devices ())
3451 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3452 if (devicep
== NULL
)
3455 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3458 gomp_mutex_lock (&devicep
->lock
);
3460 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3461 struct splay_tree_key_s cur_node
;
3464 cur_node
.host_start
= (uintptr_t) ptr
;
3465 cur_node
.host_end
= cur_node
.host_start
;
3466 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3468 && n
->host_start
== cur_node
.host_start
3469 && n
->refcount
== REFCOUNT_INFINITY
3470 && n
->tgt
->tgt_start
== 0
3471 && n
->tgt
->to_free
== NULL
3472 && n
->tgt
->refcount
== 1
3473 && n
->tgt
->list_count
== 0)
3475 splay_tree_remove (&devicep
->mem_map
, n
);
3476 gomp_unmap_tgt (n
->tgt
);
3480 gomp_mutex_unlock (&devicep
->lock
);
3485 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3488 if (device_num
== gomp_get_num_devices ())
3489 return gomp_pause_host ();
3490 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3492 /* Do nothing for target devices for now. */
3497 omp_pause_resource_all (omp_pause_resource_t kind
)
3500 if (gomp_pause_host ())
3502 /* Do nothing for target devices for now. */
3506 ialias (omp_pause_resource
)
3507 ialias (omp_pause_resource_all
)
3509 #ifdef PLUGIN_SUPPORT
3511 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3513 The handles of the found functions are stored in the corresponding fields
3514 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3517 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3518 const char *plugin_name
)
3520 const char *err
= NULL
, *last_missing
= NULL
;
3522 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3524 #if OFFLOAD_DEFAULTED
3530 /* Check if all required functions are available in the plugin and store
3531 their handlers. None of the symbols can legitimately be NULL,
3532 so we don't need to check dlerror all the time. */
3534 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3536 /* Similar, but missing functions are not an error. Return false if
3537 failed, true otherwise. */
3538 #define DLSYM_OPT(f, n) \
3539 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3540 || (last_missing = #n, 0))
3543 if (device
->version_func () != GOMP_VERSION
)
3545 err
= "plugin version mismatch";
3552 DLSYM (get_num_devices
);
3553 DLSYM (init_device
);
3554 DLSYM (fini_device
);
3556 DLSYM (unload_image
);
3561 device
->capabilities
= device
->get_caps_func ();
3562 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3565 DLSYM_OPT (async_run
, async_run
);
3566 DLSYM_OPT (can_run
, can_run
);
3569 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3571 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3572 || !DLSYM_OPT (openacc
.create_thread_data
,
3573 openacc_create_thread_data
)
3574 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3575 openacc_destroy_thread_data
)
3576 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3577 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3578 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3579 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3580 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3581 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3582 openacc_async_queue_callback
)
3583 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3584 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3585 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3586 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3588 /* Require all the OpenACC handlers if we have
3589 GOMP_OFFLOAD_CAP_OPENACC_200. */
3590 err
= "plugin missing OpenACC handler function";
3595 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3596 openacc_cuda_get_current_device
);
3597 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3598 openacc_cuda_get_current_context
);
3599 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3600 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3601 if (cuda
&& cuda
!= 4)
3603 /* Make sure all the CUDA functions are there if any of them are. */
3604 err
= "plugin missing OpenACC CUDA handler function";
3616 gomp_error ("while loading %s: %s", plugin_name
, err
);
3618 gomp_error ("missing function was %s", last_missing
);
3620 dlclose (plugin_handle
);
3625 /* This function finalizes all initialized devices. */
3628 gomp_target_fini (void)
3631 for (i
= 0; i
< num_devices
; i
++)
3634 struct gomp_device_descr
*devicep
= &devices
[i
];
3635 gomp_mutex_lock (&devicep
->lock
);
3636 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3637 ret
= gomp_fini_device (devicep
);
3638 gomp_mutex_unlock (&devicep
->lock
);
3640 gomp_fatal ("device finalization failed");
3644 /* This function initializes the runtime for offloading.
3645 It parses the list of offload plugins, and tries to load these.
3646 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3647 will be set, and the array DEVICES initialized, containing descriptors for
3648 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3652 gomp_target_init (void)
3654 const char *prefix
="libgomp-plugin-";
3655 const char *suffix
= SONAME_SUFFIX (1);
3656 const char *cur
, *next
;
3658 int i
, new_num_devs
;
3659 int num_devs
= 0, num_devs_openmp
;
3660 struct gomp_device_descr
*devs
= NULL
;
3662 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
3665 cur
= OFFLOAD_PLUGINS
;
3669 struct gomp_device_descr current_device
;
3670 size_t prefix_len
, suffix_len
, cur_len
;
3672 next
= strchr (cur
, ',');
3674 prefix_len
= strlen (prefix
);
3675 cur_len
= next
? next
- cur
: strlen (cur
);
3676 suffix_len
= strlen (suffix
);
3678 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3685 memcpy (plugin_name
, prefix
, prefix_len
);
3686 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3687 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3689 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3691 new_num_devs
= current_device
.get_num_devices_func ();
3692 if (new_num_devs
>= 1)
3694 /* Augment DEVICES and NUM_DEVICES. */
3696 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
3697 * sizeof (struct gomp_device_descr
));
3705 current_device
.name
= current_device
.get_name_func ();
3706 /* current_device.capabilities has already been set. */
3707 current_device
.type
= current_device
.get_type_func ();
3708 current_device
.mem_map
.root
= NULL
;
3709 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3710 for (i
= 0; i
< new_num_devs
; i
++)
3712 current_device
.target_id
= i
;
3713 devs
[num_devs
] = current_device
;
3714 gomp_mutex_init (&devs
[num_devs
].lock
);
3725 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3726 NUM_DEVICES_OPENMP. */
3727 struct gomp_device_descr
*devs_s
3728 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
3735 num_devs_openmp
= 0;
3736 for (i
= 0; i
< num_devs
; i
++)
3737 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3738 devs_s
[num_devs_openmp
++] = devs
[i
];
3739 int num_devs_after_openmp
= num_devs_openmp
;
3740 for (i
= 0; i
< num_devs
; i
++)
3741 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3742 devs_s
[num_devs_after_openmp
++] = devs
[i
];
3746 for (i
= 0; i
< num_devs
; i
++)
3748 /* The 'devices' array can be moved (by the realloc call) until we have
3749 found all the plugins, so registering with the OpenACC runtime (which
3750 takes a copy of the pointer argument) must be delayed until now. */
3751 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3752 goacc_register (&devs
[i
]);
3755 num_devices
= num_devs
;
3756 num_devices_openmp
= num_devs_openmp
;
3758 if (atexit (gomp_target_fini
) != 0)
3759 gomp_fatal ("atexit failed");
3762 #else /* PLUGIN_SUPPORT */
3763 /* If dlfcn.h is unavailable we always fallback to host execution.
3764 GOMP_target* routines are just stubs for this case. */
3766 gomp_target_init (void)
3769 #endif /* PLUGIN_SUPPORT */