1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
121 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
122 && device_id
!= num_devices_openmp
)
123 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
124 "but device not found");
129 gomp_mutex_lock (&devices
[device_id
].lock
);
130 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
131 gomp_init_device (&devices
[device_id
]);
132 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
134 gomp_mutex_unlock (&devices
[device_id
].lock
);
136 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
137 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
138 "but device is finalized");
142 gomp_mutex_unlock (&devices
[device_id
].lock
);
144 return &devices
[device_id
];
148 static inline splay_tree_key
149 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
151 if (key
->host_start
!= key
->host_end
)
152 return splay_tree_lookup (mem_map
, key
);
155 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
160 n
= splay_tree_lookup (mem_map
, key
);
164 return splay_tree_lookup (mem_map
, key
);
167 static inline splay_tree_key
168 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
170 if (key
->host_start
!= key
->host_end
)
171 return splay_tree_lookup (mem_map
, key
);
174 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
180 gomp_device_copy (struct gomp_device_descr
*devicep
,
181 bool (*copy_func
) (int, void *, const void *, size_t),
182 const char *dst
, void *dstaddr
,
183 const char *src
, const void *srcaddr
,
186 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
188 gomp_mutex_unlock (&devicep
->lock
);
189 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
190 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
195 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
196 bool (*copy_func
) (int, void *, const void *, size_t,
197 struct goacc_asyncqueue
*),
198 const char *dst
, void *dstaddr
,
199 const char *src
, const void *srcaddr
,
200 size_t size
, struct goacc_asyncqueue
*aq
)
202 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
204 gomp_mutex_unlock (&devicep
->lock
);
205 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
206 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
210 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
211 host to device memory transfers. */
213 struct gomp_coalesce_chunk
215 /* The starting and ending point of a coalesced chunk of memory. */
219 struct gomp_coalesce_buf
221 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
222 it will be copied to the device. */
224 struct target_mem_desc
*tgt
;
225 /* Array with offsets, chunks[i].start is the starting offset and
226 chunks[i].end ending offset relative to tgt->tgt_start device address
227 of chunks which are to be copied to buf and later copied to device. */
228 struct gomp_coalesce_chunk
*chunks
;
229 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
232 /* During construction of chunks array, how many memory regions are within
233 the last chunk. If there is just one memory region for a chunk, we copy
234 it directly to device rather than going through buf. */
238 /* Maximum size of memory region considered for coalescing. Larger copies
239 are performed directly. */
240 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
242 /* Maximum size of a gap in between regions to consider them being copied
243 within the same chunk. All the device offsets considered are within
244 newly allocated device memory, so it isn't fatal if we copy some padding
245 in between from host to device. The gaps come either from alignment
246 padding or from memory regions which are not supposed to be copied from
247 host to device (e.g. map(alloc:), map(from:) etc.). */
248 #define MAX_COALESCE_BUF_GAP (4 * 1024)
250 /* Add region with device tgt_start relative offset and length to CBUF. */
253 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
255 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
259 if (cbuf
->chunk_cnt
< 0)
261 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
263 cbuf
->chunk_cnt
= -1;
266 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
268 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
272 /* If the last chunk is only used by one mapping, discard it,
273 as it will be one host to device copy anyway and
274 memcpying it around will only waste cycles. */
275 if (cbuf
->use_cnt
== 1)
278 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
279 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
284 /* Return true for mapping kinds which need to copy data from the
285 host to device for regions that weren't previously mapped. */
288 gomp_to_device_kind_p (int kind
)
294 case GOMP_MAP_FORCE_ALLOC
:
295 case GOMP_MAP_FORCE_FROM
:
296 case GOMP_MAP_ALWAYS_FROM
:
303 attribute_hidden
void
304 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
305 struct goacc_asyncqueue
*aq
,
306 void *d
, const void *h
, size_t sz
,
307 struct gomp_coalesce_buf
*cbuf
)
311 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
312 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
315 long last
= cbuf
->chunk_cnt
- 1;
316 while (first
<= last
)
318 long middle
= (first
+ last
) >> 1;
319 if (cbuf
->chunks
[middle
].end
<= doff
)
321 else if (cbuf
->chunks
[middle
].start
<= doff
)
323 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
324 gomp_fatal ("internal libgomp cbuf error");
325 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
334 if (__builtin_expect (aq
!= NULL
, 0))
335 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
336 "dev", d
, "host", h
, sz
, aq
);
338 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
341 attribute_hidden
void
342 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
343 struct goacc_asyncqueue
*aq
,
344 void *h
, const void *d
, size_t sz
)
346 if (__builtin_expect (aq
!= NULL
, 0))
347 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
348 "host", h
, "dev", d
, sz
, aq
);
350 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
354 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
356 if (!devicep
->free_func (devicep
->target_id
, devptr
))
358 gomp_mutex_unlock (&devicep
->lock
);
359 gomp_fatal ("error in freeing device memory block at %p", devptr
);
363 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
364 gomp_map_0len_lookup found oldn for newn.
365 Helper function of gomp_map_vars. */
368 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
369 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
370 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
371 unsigned char kind
, bool always_to_flag
,
372 struct gomp_coalesce_buf
*cbuf
)
374 assert (kind
!= GOMP_MAP_ATTACH
);
377 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
378 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
379 tgt_var
->is_attach
= false;
380 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
381 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
383 if ((kind
& GOMP_MAP_FLAG_FORCE
)
384 || oldn
->host_start
> newn
->host_start
385 || oldn
->host_end
< newn
->host_end
)
387 gomp_mutex_unlock (&devicep
->lock
);
388 gomp_fatal ("Trying to map into device [%p..%p) object when "
389 "[%p..%p) is already mapped",
390 (void *) newn
->host_start
, (void *) newn
->host_end
,
391 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
394 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
395 gomp_copy_host2dev (devicep
, aq
,
396 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
397 + newn
->host_start
- oldn
->host_start
),
398 (void *) newn
->host_start
,
399 newn
->host_end
- newn
->host_start
, cbuf
);
401 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
406 get_kind (bool short_mapkind
, void *kinds
, int idx
)
408 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
409 : ((unsigned char *) kinds
)[idx
];
413 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
414 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
415 struct gomp_coalesce_buf
*cbuf
)
417 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
418 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
419 struct splay_tree_key_s cur_node
;
421 cur_node
.host_start
= host_ptr
;
422 if (cur_node
.host_start
== (uintptr_t) NULL
)
424 cur_node
.tgt_offset
= (uintptr_t) NULL
;
425 gomp_copy_host2dev (devicep
, aq
,
426 (void *) (tgt
->tgt_start
+ target_offset
),
427 (void *) &cur_node
.tgt_offset
,
428 sizeof (void *), cbuf
);
431 /* Add bias to the pointer value. */
432 cur_node
.host_start
+= bias
;
433 cur_node
.host_end
= cur_node
.host_start
;
434 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
437 gomp_mutex_unlock (&devicep
->lock
);
438 gomp_fatal ("Pointer target of array section wasn't mapped");
440 cur_node
.host_start
-= n
->host_start
;
442 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
443 /* At this point tgt_offset is target address of the
444 array section. Now subtract bias to get what we want
445 to initialize the pointer with. */
446 cur_node
.tgt_offset
-= bias
;
447 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
448 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
452 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
453 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
454 size_t first
, size_t i
, void **hostaddrs
,
455 size_t *sizes
, void *kinds
,
456 struct gomp_coalesce_buf
*cbuf
)
458 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
459 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
460 struct splay_tree_key_s cur_node
;
462 const bool short_mapkind
= true;
463 const int typemask
= short_mapkind
? 0xff : 0x7;
465 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
466 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
467 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
468 kind
= get_kind (short_mapkind
, kinds
, i
);
471 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
473 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
474 kind
& typemask
, false, cbuf
);
479 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
481 cur_node
.host_start
--;
482 n2
= splay_tree_lookup (mem_map
, &cur_node
);
483 cur_node
.host_start
++;
486 && n2
->host_start
- n
->host_start
487 == n2
->tgt_offset
- n
->tgt_offset
)
489 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
490 kind
& typemask
, false, cbuf
);
495 n2
= splay_tree_lookup (mem_map
, &cur_node
);
499 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
501 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
502 kind
& typemask
, false, cbuf
);
506 gomp_mutex_unlock (&devicep
->lock
);
507 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
508 "other mapped elements from the same structure weren't mapped "
509 "together with it", (void *) cur_node
.host_start
,
510 (void *) cur_node
.host_end
);
513 attribute_hidden
void
514 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
515 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
516 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
517 struct gomp_coalesce_buf
*cbufp
)
519 struct splay_tree_key_s s
;
524 gomp_mutex_unlock (&devicep
->lock
);
525 gomp_fatal ("enclosing struct not mapped for attach");
528 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
529 /* We might have a pointer in a packed struct: however we cannot have more
530 than one such pointer in each pointer-sized portion of the struct, so
532 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
535 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
537 if (!n
->aux
->attach_count
)
539 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
541 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
542 n
->aux
->attach_count
[idx
]++;
545 gomp_mutex_unlock (&devicep
->lock
);
546 gomp_fatal ("attach count overflow");
549 if (n
->aux
->attach_count
[idx
] == 1)
551 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
553 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
557 if ((void *) target
== NULL
)
559 gomp_mutex_unlock (&devicep
->lock
);
560 gomp_fatal ("attempt to attach null pointer");
563 s
.host_start
= target
+ bias
;
564 s
.host_end
= s
.host_start
+ 1;
565 tn
= splay_tree_lookup (mem_map
, &s
);
569 gomp_mutex_unlock (&devicep
->lock
);
570 gomp_fatal ("pointer target not mapped for attach");
573 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
576 "%s: attaching host %p, target %p (struct base %p) to %p\n",
577 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
578 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
580 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
581 sizeof (void *), cbufp
);
584 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
585 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
588 attribute_hidden
void
589 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
590 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
591 uintptr_t detach_from
, bool finalize
,
592 struct gomp_coalesce_buf
*cbufp
)
598 gomp_mutex_unlock (&devicep
->lock
);
599 gomp_fatal ("enclosing struct not mapped for detach");
602 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
604 if (!n
->aux
|| !n
->aux
->attach_count
)
606 gomp_mutex_unlock (&devicep
->lock
);
607 gomp_fatal ("no attachment counters for struct");
611 n
->aux
->attach_count
[idx
] = 1;
613 if (n
->aux
->attach_count
[idx
] == 0)
615 gomp_mutex_unlock (&devicep
->lock
);
616 gomp_fatal ("attach count underflow");
619 n
->aux
->attach_count
[idx
]--;
621 if (n
->aux
->attach_count
[idx
] == 0)
623 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
625 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
628 "%s: detaching host %p, target %p (struct base %p) to %p\n",
629 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
630 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
633 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
634 sizeof (void *), cbufp
);
637 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
638 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
641 attribute_hidden
uintptr_t
642 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
644 if (tgt
->list
[i
].key
!= NULL
)
645 return tgt
->list
[i
].key
->tgt
->tgt_start
646 + tgt
->list
[i
].key
->tgt_offset
647 + tgt
->list
[i
].offset
;
649 switch (tgt
->list
[i
].offset
)
652 return (uintptr_t) hostaddrs
[i
];
658 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
659 + tgt
->list
[i
+ 1].key
->tgt_offset
660 + tgt
->list
[i
+ 1].offset
661 + (uintptr_t) hostaddrs
[i
]
662 - (uintptr_t) hostaddrs
[i
+ 1];
665 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
669 static inline __attribute__((always_inline
)) struct target_mem_desc
*
670 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
671 struct goacc_asyncqueue
*aq
, size_t mapnum
,
672 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
673 void *kinds
, bool short_mapkind
,
674 enum gomp_map_vars_kind pragma_kind
)
676 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
677 bool has_firstprivate
= false;
678 bool has_always_ptrset
= false;
679 const int rshift
= short_mapkind
? 8 : 3;
680 const int typemask
= short_mapkind
? 0xff : 0x7;
681 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
682 struct splay_tree_key_s cur_node
;
683 struct target_mem_desc
*tgt
684 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
685 tgt
->list_count
= mapnum
;
686 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
687 tgt
->device_descr
= devicep
;
689 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
698 tgt_align
= sizeof (void *);
704 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
706 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
707 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
710 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
712 size_t align
= 4 * sizeof (void *);
714 tgt_size
= mapnum
* sizeof (void *);
716 cbuf
.use_cnt
= 1 + (mapnum
> 1);
717 cbuf
.chunks
[0].start
= 0;
718 cbuf
.chunks
[0].end
= tgt_size
;
721 gomp_mutex_lock (&devicep
->lock
);
722 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
724 gomp_mutex_unlock (&devicep
->lock
);
729 for (i
= 0; i
< mapnum
; i
++)
731 int kind
= get_kind (short_mapkind
, kinds
, i
);
732 if (hostaddrs
[i
] == NULL
733 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
735 tgt
->list
[i
].key
= NULL
;
736 tgt
->list
[i
].offset
= OFFSET_INLINED
;
739 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
740 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
742 tgt
->list
[i
].key
= NULL
;
745 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
746 on a separate construct prior to using use_device_{addr,ptr}.
747 In OpenMP 5.0, map directives need to be ordered by the
748 middle-end before the use_device_* clauses. If
749 !not_found_cnt, all mappings requested (if any) are already
750 mapped, so use_device_{addr,ptr} can be resolved right away.
751 Otherwise, if not_found_cnt, gomp_map_lookup might fail
752 now but would succeed after performing the mappings in the
753 following loop. We can't defer this always to the second
754 loop, because it is not even invoked when !not_found_cnt
755 after the first loop. */
756 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
757 cur_node
.host_end
= cur_node
.host_start
;
758 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
761 cur_node
.host_start
-= n
->host_start
;
763 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
764 + cur_node
.host_start
);
766 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
768 gomp_mutex_unlock (&devicep
->lock
);
769 gomp_fatal ("use_device_ptr pointer wasn't mapped");
771 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
772 /* If not present, continue using the host address. */
775 __builtin_unreachable ();
776 tgt
->list
[i
].offset
= OFFSET_INLINED
;
779 tgt
->list
[i
].offset
= 0;
782 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
784 size_t first
= i
+ 1;
785 size_t last
= i
+ sizes
[i
];
786 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
787 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
789 tgt
->list
[i
].key
= NULL
;
790 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
791 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
794 size_t align
= (size_t) 1 << (kind
>> rshift
);
795 if (tgt_align
< align
)
797 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
798 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
799 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
800 not_found_cnt
+= last
- i
;
801 for (i
= first
; i
<= last
; i
++)
803 tgt
->list
[i
].key
= NULL
;
804 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
806 gomp_coalesce_buf_add (&cbuf
,
807 tgt_size
- cur_node
.host_end
808 + (uintptr_t) hostaddrs
[i
],
814 for (i
= first
; i
<= last
; i
++)
815 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
820 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
822 tgt
->list
[i
].key
= NULL
;
823 tgt
->list
[i
].offset
= OFFSET_POINTER
;
824 has_firstprivate
= true;
827 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
829 tgt
->list
[i
].key
= NULL
;
830 has_firstprivate
= true;
833 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
834 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
835 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
837 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
838 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
840 tgt
->list
[i
].key
= NULL
;
842 size_t align
= (size_t) 1 << (kind
>> rshift
);
843 if (tgt_align
< align
)
845 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
846 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
847 cur_node
.host_end
- cur_node
.host_start
);
848 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
849 has_firstprivate
= true;
853 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
855 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
858 tgt
->list
[i
].key
= NULL
;
859 tgt
->list
[i
].offset
= OFFSET_POINTER
;
864 n
= splay_tree_lookup (mem_map
, &cur_node
);
865 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
867 int always_to_cnt
= 0;
868 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
870 bool has_nullptr
= false;
872 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
873 if (n
->tgt
->list
[j
].key
== n
)
875 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
878 if (n
->tgt
->list_count
== 0)
880 /* 'declare target'; assume has_nullptr; it could also be
881 statically assigned pointer, but that it should be to
882 the equivalent variable on the host. */
883 assert (n
->refcount
== REFCOUNT_INFINITY
);
887 assert (j
< n
->tgt
->list_count
);
888 /* Re-map the data if there is an 'always' modifier or if it a
889 null pointer was there and non a nonnull has been found; that
890 permits transparent re-mapping for Fortran array descriptors
891 which were previously mapped unallocated. */
892 for (j
= i
+ 1; j
< mapnum
; j
++)
894 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
895 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
897 || !GOMP_MAP_POINTER_P (ptr_kind
)
898 || *(void **) hostaddrs
[j
] == NULL
))
900 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
901 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
902 > cur_node
.host_end
))
906 has_always_ptrset
= true;
911 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
912 kind
& typemask
, always_to_cnt
> 0, NULL
);
917 tgt
->list
[i
].key
= NULL
;
919 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
921 /* Not present, hence, skip entry - including its MAP_POINTER,
923 tgt
->list
[i
].offset
= OFFSET_POINTER
;
925 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
926 == GOMP_MAP_POINTER
))
929 tgt
->list
[i
].key
= NULL
;
930 tgt
->list
[i
].offset
= 0;
934 size_t align
= (size_t) 1 << (kind
>> rshift
);
936 if (tgt_align
< align
)
938 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
939 if (gomp_to_device_kind_p (kind
& typemask
))
940 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
941 cur_node
.host_end
- cur_node
.host_start
);
942 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
943 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
947 for (j
= i
+ 1; j
< mapnum
; j
++)
948 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
949 kinds
, j
)) & typemask
))
950 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
952 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
953 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
954 > cur_node
.host_end
))
958 tgt
->list
[j
].key
= NULL
;
969 gomp_mutex_unlock (&devicep
->lock
);
970 gomp_fatal ("unexpected aggregation");
972 tgt
->to_free
= devaddrs
[0];
973 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
974 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
976 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
978 /* Allocate tgt_align aligned tgt_size block of memory. */
979 /* FIXME: Perhaps change interface to allocate properly aligned
981 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
982 tgt_size
+ tgt_align
- 1);
985 gomp_mutex_unlock (&devicep
->lock
);
986 gomp_fatal ("device memory allocation fail");
989 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
990 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
991 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
993 if (cbuf
.use_cnt
== 1)
995 if (cbuf
.chunk_cnt
> 0)
998 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1008 tgt
->to_free
= NULL
;
1014 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1015 tgt_size
= mapnum
* sizeof (void *);
1018 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1021 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1022 splay_tree_node array
= tgt
->array
;
1023 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
1024 uintptr_t field_tgt_base
= 0;
1026 for (i
= 0; i
< mapnum
; i
++)
1027 if (has_always_ptrset
1029 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1030 == GOMP_MAP_TO_PSET
)
1032 splay_tree_key k
= tgt
->list
[i
].key
;
1033 bool has_nullptr
= false;
1035 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1036 if (k
->tgt
->list
[j
].key
== k
)
1038 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1041 if (k
->tgt
->list_count
== 0)
1044 assert (j
< k
->tgt
->list_count
);
1046 tgt
->list
[i
].has_null_ptr_assoc
= false;
1047 for (j
= i
+ 1; j
< mapnum
; j
++)
1049 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1050 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1052 || !GOMP_MAP_POINTER_P (ptr_kind
)
1053 || *(void **) hostaddrs
[j
] == NULL
))
1055 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1056 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1061 if (*(void **) hostaddrs
[j
] == NULL
)
1062 tgt
->list
[i
].has_null_ptr_assoc
= true;
1063 tgt
->list
[j
].key
= k
;
1064 tgt
->list
[j
].copy_from
= false;
1065 tgt
->list
[j
].always_copy_from
= false;
1066 tgt
->list
[j
].is_attach
= false;
1067 if (k
->refcount
!= REFCOUNT_INFINITY
)
1069 gomp_map_pointer (k
->tgt
, aq
,
1070 (uintptr_t) *(void **) hostaddrs
[j
],
1071 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1078 else if (tgt
->list
[i
].key
== NULL
)
1080 int kind
= get_kind (short_mapkind
, kinds
, i
);
1081 if (hostaddrs
[i
] == NULL
)
1083 switch (kind
& typemask
)
1085 size_t align
, len
, first
, last
;
1087 case GOMP_MAP_FIRSTPRIVATE
:
1088 align
= (size_t) 1 << (kind
>> rshift
);
1089 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1090 tgt
->list
[i
].offset
= tgt_size
;
1092 gomp_copy_host2dev (devicep
, aq
,
1093 (void *) (tgt
->tgt_start
+ tgt_size
),
1094 (void *) hostaddrs
[i
], len
, cbufp
);
1097 case GOMP_MAP_FIRSTPRIVATE_INT
:
1098 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1100 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1101 /* The OpenACC 'host_data' construct only allows 'use_device'
1102 "mapping" clauses, so in the first loop, 'not_found_cnt'
1103 must always have been zero, so all OpenACC 'use_device'
1104 clauses have already been handled. (We can only easily test
1105 'use_device' with 'if_present' clause here.) */
1106 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1107 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1108 code conceptually simple, similar to the first loop. */
1109 case GOMP_MAP_USE_DEVICE_PTR
:
1110 if (tgt
->list
[i
].offset
== 0)
1112 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1113 cur_node
.host_end
= cur_node
.host_start
;
1114 n
= gomp_map_lookup (mem_map
, &cur_node
);
1117 cur_node
.host_start
-= n
->host_start
;
1119 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1120 + cur_node
.host_start
);
1122 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1124 gomp_mutex_unlock (&devicep
->lock
);
1125 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1127 else if ((kind
& typemask
)
1128 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1129 /* If not present, continue using the host address. */
1132 __builtin_unreachable ();
1133 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1136 case GOMP_MAP_STRUCT
:
1138 last
= i
+ sizes
[i
];
1139 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1140 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1142 if (tgt
->list
[first
].key
!= NULL
)
1144 n
= splay_tree_lookup (mem_map
, &cur_node
);
1147 size_t align
= (size_t) 1 << (kind
>> rshift
);
1148 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1149 - (uintptr_t) hostaddrs
[i
];
1150 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1151 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1152 - (uintptr_t) hostaddrs
[i
];
1153 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1154 field_tgt_offset
= tgt_size
;
1155 field_tgt_clear
= last
;
1156 tgt_size
+= cur_node
.host_end
1157 - (uintptr_t) hostaddrs
[first
];
1160 for (i
= first
; i
<= last
; i
++)
1161 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1162 sizes
, kinds
, cbufp
);
1165 case GOMP_MAP_ALWAYS_POINTER
:
1166 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1167 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1168 n
= splay_tree_lookup (mem_map
, &cur_node
);
1170 || n
->host_start
> cur_node
.host_start
1171 || n
->host_end
< cur_node
.host_end
)
1173 gomp_mutex_unlock (&devicep
->lock
);
1174 gomp_fatal ("always pointer not mapped");
1176 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1177 != GOMP_MAP_ALWAYS_POINTER
)
1178 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1179 if (cur_node
.tgt_offset
)
1180 cur_node
.tgt_offset
-= sizes
[i
];
1181 gomp_copy_host2dev (devicep
, aq
,
1182 (void *) (n
->tgt
->tgt_start
1184 + cur_node
.host_start
1186 (void *) &cur_node
.tgt_offset
,
1187 sizeof (void *), cbufp
);
1188 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1189 + cur_node
.host_start
- n
->host_start
;
1191 case GOMP_MAP_IF_PRESENT
:
1192 /* Not present - otherwise handled above. Skip over its
1193 MAP_POINTER as well. */
1195 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1196 == GOMP_MAP_POINTER
))
1199 case GOMP_MAP_ATTACH
:
1201 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1202 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1203 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1206 tgt
->list
[i
].key
= n
;
1207 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1208 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1209 tgt
->list
[i
].copy_from
= false;
1210 tgt
->list
[i
].always_copy_from
= false;
1211 tgt
->list
[i
].is_attach
= true;
1212 /* OpenACC 'attach'/'detach' doesn't affect
1213 structured/dynamic reference counts ('n->refcount',
1214 'n->dynamic_refcount'). */
1218 gomp_mutex_unlock (&devicep
->lock
);
1219 gomp_fatal ("outer struct not mapped for attach");
1221 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1222 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1229 splay_tree_key k
= &array
->key
;
1230 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1231 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1232 k
->host_end
= k
->host_start
+ sizes
[i
];
1234 k
->host_end
= k
->host_start
+ sizeof (void *);
1235 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1236 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1237 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1238 kind
& typemask
, false, cbufp
);
1242 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1244 /* Replace target address of the pointer with target address
1245 of mapped object in the splay tree. */
1246 splay_tree_remove (mem_map
, n
);
1248 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1249 k
->aux
->link_key
= n
;
1251 size_t align
= (size_t) 1 << (kind
>> rshift
);
1252 tgt
->list
[i
].key
= k
;
1254 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1256 k
->tgt_offset
= k
->host_start
- field_tgt_base
1258 if (i
== field_tgt_clear
)
1259 field_tgt_clear
= FIELD_TGT_EMPTY
;
1263 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1264 k
->tgt_offset
= tgt_size
;
1265 tgt_size
+= k
->host_end
- k
->host_start
;
1267 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1268 tgt
->list
[i
].always_copy_from
1269 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1270 tgt
->list
[i
].is_attach
= false;
1271 tgt
->list
[i
].offset
= 0;
1272 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1274 k
->dynamic_refcount
= 0;
1277 array
->right
= NULL
;
1278 splay_tree_insert (mem_map
, array
);
1279 switch (kind
& typemask
)
1281 case GOMP_MAP_ALLOC
:
1283 case GOMP_MAP_FORCE_ALLOC
:
1284 case GOMP_MAP_FORCE_FROM
:
1285 case GOMP_MAP_ALWAYS_FROM
:
1288 case GOMP_MAP_TOFROM
:
1289 case GOMP_MAP_FORCE_TO
:
1290 case GOMP_MAP_FORCE_TOFROM
:
1291 case GOMP_MAP_ALWAYS_TO
:
1292 case GOMP_MAP_ALWAYS_TOFROM
:
1293 gomp_copy_host2dev (devicep
, aq
,
1294 (void *) (tgt
->tgt_start
1296 (void *) k
->host_start
,
1297 k
->host_end
- k
->host_start
, cbufp
);
1299 case GOMP_MAP_POINTER
:
1300 gomp_map_pointer (tgt
, aq
,
1301 (uintptr_t) *(void **) k
->host_start
,
1302 k
->tgt_offset
, sizes
[i
], cbufp
);
1304 case GOMP_MAP_TO_PSET
:
1305 gomp_copy_host2dev (devicep
, aq
,
1306 (void *) (tgt
->tgt_start
1308 (void *) k
->host_start
,
1309 k
->host_end
- k
->host_start
, cbufp
);
1310 tgt
->list
[i
].has_null_ptr_assoc
= false;
1312 for (j
= i
+ 1; j
< mapnum
; j
++)
1314 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1316 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1317 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1319 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1320 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1325 tgt
->list
[j
].key
= k
;
1326 tgt
->list
[j
].copy_from
= false;
1327 tgt
->list
[j
].always_copy_from
= false;
1328 tgt
->list
[j
].is_attach
= false;
1329 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1330 if (k
->refcount
!= REFCOUNT_INFINITY
)
1332 gomp_map_pointer (tgt
, aq
,
1333 (uintptr_t) *(void **) hostaddrs
[j
],
1335 + ((uintptr_t) hostaddrs
[j
]
1342 case GOMP_MAP_FORCE_PRESENT
:
1344 /* We already looked up the memory region above and it
1346 size_t size
= k
->host_end
- k
->host_start
;
1347 gomp_mutex_unlock (&devicep
->lock
);
1348 #ifdef HAVE_INTTYPES_H
1349 gomp_fatal ("present clause: !acc_is_present (%p, "
1350 "%"PRIu64
" (0x%"PRIx64
"))",
1351 (void *) k
->host_start
,
1352 (uint64_t) size
, (uint64_t) size
);
1354 gomp_fatal ("present clause: !acc_is_present (%p, "
1355 "%lu (0x%lx))", (void *) k
->host_start
,
1356 (unsigned long) size
, (unsigned long) size
);
1360 case GOMP_MAP_FORCE_DEVICEPTR
:
1361 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1362 gomp_copy_host2dev (devicep
, aq
,
1363 (void *) (tgt
->tgt_start
1365 (void *) k
->host_start
,
1366 sizeof (void *), cbufp
);
1369 gomp_mutex_unlock (&devicep
->lock
);
1370 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1374 if (k
->aux
&& k
->aux
->link_key
)
1376 /* Set link pointer on target to the device address of the
1378 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1379 /* We intentionally do not use coalescing here, as it's not
1380 data allocated by the current call to this function. */
1381 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1382 &tgt_addr
, sizeof (void *), NULL
);
1389 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1391 for (i
= 0; i
< mapnum
; i
++)
1393 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1394 gomp_copy_host2dev (devicep
, aq
,
1395 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1396 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1404 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1405 gomp_copy_host2dev (devicep
, aq
,
1406 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1407 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1408 - cbuf
.chunks
[0].start
),
1409 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1415 /* If the variable from "omp target enter data" map-list was already mapped,
1416 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1418 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
1424 gomp_mutex_unlock (&devicep
->lock
);
1428 attribute_hidden
struct target_mem_desc
*
1429 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1430 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1431 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1433 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1434 sizes
, kinds
, short_mapkind
, pragma_kind
);
1437 attribute_hidden
struct target_mem_desc
*
1438 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1439 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1440 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1441 void *kinds
, bool short_mapkind
,
1442 enum gomp_map_vars_kind pragma_kind
)
1444 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1445 sizes
, kinds
, short_mapkind
, pragma_kind
);
1449 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1451 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1453 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1460 gomp_unref_tgt (void *ptr
)
1462 bool is_tgt_unmapped
= false;
1464 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1466 if (tgt
->refcount
> 1)
1470 gomp_unmap_tgt (tgt
);
1471 is_tgt_unmapped
= true;
1474 return is_tgt_unmapped
;
1478 gomp_unref_tgt_void (void *ptr
)
1480 (void) gomp_unref_tgt (ptr
);
1483 static inline __attribute__((always_inline
)) bool
1484 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1485 struct goacc_asyncqueue
*aq
)
1487 bool is_tgt_unmapped
= false;
1488 splay_tree_remove (&devicep
->mem_map
, k
);
1491 if (k
->aux
->link_key
)
1492 splay_tree_insert (&devicep
->mem_map
,
1493 (splay_tree_node
) k
->aux
->link_key
);
1494 if (k
->aux
->attach_count
)
1495 free (k
->aux
->attach_count
);
1500 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1503 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1504 return is_tgt_unmapped
;
1507 attribute_hidden
bool
1508 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1510 return gomp_remove_var_internal (devicep
, k
, NULL
);
1513 /* Remove a variable asynchronously. This actually removes the variable
1514 mapping immediately, but retains the linked target_mem_desc until the
1515 asynchronous operation has completed (as it may still refer to target
1516 memory). The device lock must be held before entry, and remains locked on
1519 attribute_hidden
void
1520 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1521 struct goacc_asyncqueue
*aq
)
1523 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1526 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1527 variables back from device to host: if it is false, it is assumed that this
1528 has been done already. */
1530 static inline __attribute__((always_inline
)) void
1531 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1532 struct goacc_asyncqueue
*aq
)
1534 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1536 if (tgt
->list_count
== 0)
1542 gomp_mutex_lock (&devicep
->lock
);
1543 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1545 gomp_mutex_unlock (&devicep
->lock
);
1553 /* We must perform detachments before any copies back to the host. */
1554 for (i
= 0; i
< tgt
->list_count
; i
++)
1556 splay_tree_key k
= tgt
->list
[i
].key
;
1558 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1559 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1560 + tgt
->list
[i
].offset
,
1564 for (i
= 0; i
< tgt
->list_count
; i
++)
1566 splay_tree_key k
= tgt
->list
[i
].key
;
1570 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1571 counts ('n->refcount', 'n->dynamic_refcount'). */
1572 if (tgt
->list
[i
].is_attach
)
1575 bool do_unmap
= false;
1576 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1578 else if (k
->refcount
== 1)
1584 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1585 || tgt
->list
[i
].always_copy_from
)
1586 gomp_copy_dev2host (devicep
, aq
,
1587 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1588 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1589 + tgt
->list
[i
].offset
),
1590 tgt
->list
[i
].length
);
1593 struct target_mem_desc
*k_tgt
= k
->tgt
;
1594 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1595 /* It would be bad if TGT got unmapped while we're still iterating
1596 over its LIST_COUNT, and also expect to use it in the following
1598 assert (!is_tgt_unmapped
1604 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1607 gomp_unref_tgt ((void *) tgt
);
1609 gomp_mutex_unlock (&devicep
->lock
);
1612 attribute_hidden
void
1613 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1615 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1618 attribute_hidden
void
1619 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1620 struct goacc_asyncqueue
*aq
)
1622 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1626 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1627 size_t *sizes
, void *kinds
, bool short_mapkind
)
1630 struct splay_tree_key_s cur_node
;
1631 const int typemask
= short_mapkind
? 0xff : 0x7;
1639 gomp_mutex_lock (&devicep
->lock
);
1640 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1642 gomp_mutex_unlock (&devicep
->lock
);
1646 for (i
= 0; i
< mapnum
; i
++)
1649 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1650 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1651 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1654 int kind
= get_kind (short_mapkind
, kinds
, i
);
1655 if (n
->host_start
> cur_node
.host_start
1656 || n
->host_end
< cur_node
.host_end
)
1658 gomp_mutex_unlock (&devicep
->lock
);
1659 gomp_fatal ("Trying to update [%p..%p) object when "
1660 "only [%p..%p) is mapped",
1661 (void *) cur_node
.host_start
,
1662 (void *) cur_node
.host_end
,
1663 (void *) n
->host_start
,
1664 (void *) n
->host_end
);
1668 void *hostaddr
= (void *) cur_node
.host_start
;
1669 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1670 + cur_node
.host_start
- n
->host_start
);
1671 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1673 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1674 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1676 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1677 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1680 gomp_mutex_unlock (&devicep
->lock
);
1683 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1684 And insert to splay tree the mapping between addresses from HOST_TABLE and
1685 from loaded target image. We rely in the host and device compiler
1686 emitting variable and functions in the same order. */
1689 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1690 const void *host_table
, const void *target_data
,
1691 bool is_register_lock
)
1693 void **host_func_table
= ((void ***) host_table
)[0];
1694 void **host_funcs_end
= ((void ***) host_table
)[1];
1695 void **host_var_table
= ((void ***) host_table
)[2];
1696 void **host_vars_end
= ((void ***) host_table
)[3];
1698 /* The func table contains only addresses, the var table contains addresses
1699 and corresponding sizes. */
1700 int num_funcs
= host_funcs_end
- host_func_table
;
1701 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1703 /* Load image to device and get target addresses for the image. */
1704 struct addr_pair
*target_table
= NULL
;
1705 int i
, num_target_entries
;
1708 = devicep
->load_image_func (devicep
->target_id
, version
,
1709 target_data
, &target_table
);
1711 if (num_target_entries
!= num_funcs
+ num_vars
)
1713 gomp_mutex_unlock (&devicep
->lock
);
1714 if (is_register_lock
)
1715 gomp_mutex_unlock (®ister_lock
);
1716 gomp_fatal ("Cannot map target functions or variables"
1717 " (expected %u, have %u)", num_funcs
+ num_vars
,
1718 num_target_entries
);
1721 /* Insert host-target address mapping into splay tree. */
1722 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1723 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1724 tgt
->refcount
= REFCOUNT_INFINITY
;
1727 tgt
->to_free
= NULL
;
1729 tgt
->list_count
= 0;
1730 tgt
->device_descr
= devicep
;
1731 splay_tree_node array
= tgt
->array
;
1733 for (i
= 0; i
< num_funcs
; i
++)
1735 splay_tree_key k
= &array
->key
;
1736 k
->host_start
= (uintptr_t) host_func_table
[i
];
1737 k
->host_end
= k
->host_start
+ 1;
1739 k
->tgt_offset
= target_table
[i
].start
;
1740 k
->refcount
= REFCOUNT_INFINITY
;
1741 k
->dynamic_refcount
= 0;
1744 array
->right
= NULL
;
1745 splay_tree_insert (&devicep
->mem_map
, array
);
1749 /* Most significant bit of the size in host and target tables marks
1750 "omp declare target link" variables. */
1751 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1752 const uintptr_t size_mask
= ~link_bit
;
1754 for (i
= 0; i
< num_vars
; i
++)
1756 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1757 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1758 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
1760 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1762 gomp_mutex_unlock (&devicep
->lock
);
1763 if (is_register_lock
)
1764 gomp_mutex_unlock (®ister_lock
);
1765 gomp_fatal ("Cannot map target variables (size mismatch)");
1768 splay_tree_key k
= &array
->key
;
1769 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1771 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1773 k
->tgt_offset
= target_var
->start
;
1774 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1775 k
->dynamic_refcount
= 0;
1778 array
->right
= NULL
;
1779 splay_tree_insert (&devicep
->mem_map
, array
);
1783 free (target_table
);
1786 /* Unload the mappings described by target_data from device DEVICE_P.
1787 The device must be locked. */
1790 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1792 const void *host_table
, const void *target_data
)
1794 void **host_func_table
= ((void ***) host_table
)[0];
1795 void **host_funcs_end
= ((void ***) host_table
)[1];
1796 void **host_var_table
= ((void ***) host_table
)[2];
1797 void **host_vars_end
= ((void ***) host_table
)[3];
1799 /* The func table contains only addresses, the var table contains addresses
1800 and corresponding sizes. */
1801 int num_funcs
= host_funcs_end
- host_func_table
;
1802 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1804 struct splay_tree_key_s k
;
1805 splay_tree_key node
= NULL
;
1807 /* Find mapping at start of node array */
1808 if (num_funcs
|| num_vars
)
1810 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1811 : (uintptr_t) host_var_table
[0]);
1812 k
.host_end
= k
.host_start
+ 1;
1813 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1816 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1818 gomp_mutex_unlock (&devicep
->lock
);
1819 gomp_fatal ("image unload fail");
1822 /* Remove mappings from splay tree. */
1824 for (i
= 0; i
< num_funcs
; i
++)
1826 k
.host_start
= (uintptr_t) host_func_table
[i
];
1827 k
.host_end
= k
.host_start
+ 1;
1828 splay_tree_remove (&devicep
->mem_map
, &k
);
1831 /* Most significant bit of the size in host and target tables marks
1832 "omp declare target link" variables. */
1833 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1834 const uintptr_t size_mask
= ~link_bit
;
1835 bool is_tgt_unmapped
= false;
1837 for (i
= 0; i
< num_vars
; i
++)
1839 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1841 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1843 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1844 splay_tree_remove (&devicep
->mem_map
, &k
);
1847 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1848 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1852 if (node
&& !is_tgt_unmapped
)
1859 /* This function should be called from every offload image while loading.
1860 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1861 the target, and TARGET_DATA needed by target plugin. */
1864 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1865 int target_type
, const void *target_data
)
1869 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1870 gomp_fatal ("Library too old for offload (version %u < %u)",
1871 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1873 gomp_mutex_lock (®ister_lock
);
1875 /* Load image to all initialized devices. */
1876 for (i
= 0; i
< num_devices
; i
++)
1878 struct gomp_device_descr
*devicep
= &devices
[i
];
1879 gomp_mutex_lock (&devicep
->lock
);
1880 if (devicep
->type
== target_type
1881 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1882 gomp_load_image_to_device (devicep
, version
,
1883 host_table
, target_data
, true);
1884 gomp_mutex_unlock (&devicep
->lock
);
1887 /* Insert image to array of pending images. */
1889 = gomp_realloc_unlock (offload_images
,
1890 (num_offload_images
+ 1)
1891 * sizeof (struct offload_image_descr
));
1892 offload_images
[num_offload_images
].version
= version
;
1893 offload_images
[num_offload_images
].type
= target_type
;
1894 offload_images
[num_offload_images
].host_table
= host_table
;
1895 offload_images
[num_offload_images
].target_data
= target_data
;
1897 num_offload_images
++;
1898 gomp_mutex_unlock (®ister_lock
);
1902 GOMP_offload_register (const void *host_table
, int target_type
,
1903 const void *target_data
)
1905 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1908 /* This function should be called from every offload image while unloading.
1909 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1910 the target, and TARGET_DATA needed by target plugin. */
1913 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1914 int target_type
, const void *target_data
)
1918 gomp_mutex_lock (®ister_lock
);
1920 /* Unload image from all initialized devices. */
1921 for (i
= 0; i
< num_devices
; i
++)
1923 struct gomp_device_descr
*devicep
= &devices
[i
];
1924 gomp_mutex_lock (&devicep
->lock
);
1925 if (devicep
->type
== target_type
1926 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1927 gomp_unload_image_from_device (devicep
, version
,
1928 host_table
, target_data
);
1929 gomp_mutex_unlock (&devicep
->lock
);
1932 /* Remove image from array of pending images. */
1933 for (i
= 0; i
< num_offload_images
; i
++)
1934 if (offload_images
[i
].target_data
== target_data
)
1936 offload_images
[i
] = offload_images
[--num_offload_images
];
1940 gomp_mutex_unlock (®ister_lock
);
1944 GOMP_offload_unregister (const void *host_table
, int target_type
,
1945 const void *target_data
)
1947 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1950 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1951 must be locked on entry, and remains locked on return. */
1953 attribute_hidden
void
1954 gomp_init_device (struct gomp_device_descr
*devicep
)
1957 if (!devicep
->init_device_func (devicep
->target_id
))
1959 gomp_mutex_unlock (&devicep
->lock
);
1960 gomp_fatal ("device initialization failed");
1963 /* Load to device all images registered by the moment. */
1964 for (i
= 0; i
< num_offload_images
; i
++)
1966 struct offload_image_descr
*image
= &offload_images
[i
];
1967 if (image
->type
== devicep
->type
)
1968 gomp_load_image_to_device (devicep
, image
->version
,
1969 image
->host_table
, image
->target_data
,
1973 /* Initialize OpenACC asynchronous queues. */
1974 goacc_init_asyncqueues (devicep
);
1976 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1979 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1980 must be locked on entry, and remains locked on return. */
1982 attribute_hidden
bool
1983 gomp_fini_device (struct gomp_device_descr
*devicep
)
1985 bool ret
= goacc_fini_asyncqueues (devicep
);
1986 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1987 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1991 attribute_hidden
void
1992 gomp_unload_device (struct gomp_device_descr
*devicep
)
1994 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1998 /* Unload from device all images registered at the moment. */
1999 for (i
= 0; i
< num_offload_images
; i
++)
2001 struct offload_image_descr
*image
= &offload_images
[i
];
2002 if (image
->type
== devicep
->type
)
2003 gomp_unload_image_from_device (devicep
, image
->version
,
2005 image
->target_data
);
2010 /* Host fallback for GOMP_target{,_ext} routines. */
2013 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2014 struct gomp_device_descr
*devicep
)
2016 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2018 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2020 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2021 "be used for offloading");
2024 memset (thr
, '\0', sizeof (*thr
));
2025 if (gomp_places_list
)
2027 thr
->place
= old_thr
.place
;
2028 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2031 gomp_free_thread (thr
);
2035 /* Calculate alignment and size requirements of a private copy of data shared
2036 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2039 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2040 unsigned short *kinds
, size_t *tgt_align
,
2044 for (i
= 0; i
< mapnum
; i
++)
2045 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2047 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2048 if (*tgt_align
< align
)
2050 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2051 *tgt_size
+= sizes
[i
];
2055 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2058 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2059 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2062 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2064 tgt
+= tgt_align
- al
;
2067 for (i
= 0; i
< mapnum
; i
++)
2068 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2070 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2071 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2072 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2073 hostaddrs
[i
] = tgt
+ tgt_size
;
2074 tgt_size
= tgt_size
+ sizes
[i
];
2078 /* Helper function of GOMP_target{,_ext} routines. */
2081 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2082 void (*host_fn
) (void *))
2084 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2085 return (void *) host_fn
;
2088 gomp_mutex_lock (&devicep
->lock
);
2089 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2091 gomp_mutex_unlock (&devicep
->lock
);
2095 struct splay_tree_key_s k
;
2096 k
.host_start
= (uintptr_t) host_fn
;
2097 k
.host_end
= k
.host_start
+ 1;
2098 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2099 gomp_mutex_unlock (&devicep
->lock
);
2103 return (void *) tgt_fn
->tgt_offset
;
2107 /* Called when encountering a target directive. If DEVICE
2108 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2109 GOMP_DEVICE_HOST_FALLBACK (or any value
2110 larger than last available hw device), use host fallback.
2111 FN is address of host code, UNUSED is part of the current ABI, but
2112 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2113 with MAPNUM entries, with addresses of the host objects,
2114 sizes of the host objects (resp. for pointer kind pointer bias
2115 and assumed sizeof (void *) size) and kinds. */
2118 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2119 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2120 unsigned char *kinds
)
2122 struct gomp_device_descr
*devicep
= resolve_device (device
);
2126 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2127 /* All shared memory devices should use the GOMP_target_ext function. */
2128 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2129 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2130 return gomp_target_fallback (fn
, hostaddrs
, devicep
);
2132 struct target_mem_desc
*tgt_vars
2133 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2134 GOMP_MAP_VARS_TARGET
);
2135 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2137 gomp_unmap_vars (tgt_vars
, true);
2140 static inline unsigned int
2141 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2143 /* If we cannot run asynchronously, simply ignore nowait. */
2144 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2145 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2150 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2151 and several arguments have been added:
2152 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2153 DEPEND is array of dependencies, see GOMP_task for details.
2155 ARGS is a pointer to an array consisting of a variable number of both
2156 device-independent and device-specific arguments, which can take one two
2157 elements where the first specifies for which device it is intended, the type
2158 and optionally also the value. If the value is not present in the first
2159 one, the whole second element the actual value. The last element of the
2160 array is a single NULL. Among the device independent can be for example
2161 NUM_TEAMS and THREAD_LIMIT.
2163 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2164 that value, or 1 if teams construct is not present, or 0, if
2165 teams construct does not have num_teams clause and so the choice is
2166 implementation defined, and -1 if it can't be determined on the host
2167 what value will GOMP_teams have on the device.
2168 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2169 body with that value, or 0, if teams construct does not have thread_limit
2170 clause or the teams construct is not present, or -1 if it can't be
2171 determined on the host what value will GOMP_teams have on the device. */
2174 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2175 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2176 unsigned int flags
, void **depend
, void **args
)
2178 struct gomp_device_descr
*devicep
= resolve_device (device
);
2179 size_t tgt_align
= 0, tgt_size
= 0;
2180 bool fpc_done
= false;
2182 flags
= clear_unsupported_flags (devicep
, flags
);
2184 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2186 struct gomp_thread
*thr
= gomp_thread ();
2187 /* Create a team if we don't have any around, as nowait
2188 target tasks make sense to run asynchronously even when
2189 outside of any parallel. */
2190 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2192 struct gomp_team
*team
= gomp_new_team (1);
2193 struct gomp_task
*task
= thr
->task
;
2194 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2195 team
->prev_ts
= thr
->ts
;
2196 thr
->ts
.team
= team
;
2197 thr
->ts
.team_id
= 0;
2198 thr
->ts
.work_share
= &team
->work_shares
[0];
2199 thr
->ts
.last_work_share
= NULL
;
2200 #ifdef HAVE_SYNC_BUILTINS
2201 thr
->ts
.single_count
= 0;
2203 thr
->ts
.static_trip
= 0;
2204 thr
->task
= &team
->implicit_task
[0];
2205 gomp_init_task (thr
->task
, NULL
, icv
);
2211 thr
->task
= &team
->implicit_task
[0];
2214 pthread_setspecific (gomp_thread_destructor
, thr
);
2217 && !thr
->task
->final_task
)
2219 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2220 sizes
, kinds
, flags
, depend
, args
,
2221 GOMP_TARGET_TASK_BEFORE_MAP
);
2226 /* If there are depend clauses, but nowait is not present
2227 (or we are in a final task), block the parent task until the
2228 dependencies are resolved and then just continue with the rest
2229 of the function as if it is a merged task. */
2232 struct gomp_thread
*thr
= gomp_thread ();
2233 if (thr
->task
&& thr
->task
->depend_hash
)
2235 /* If we might need to wait, copy firstprivate now. */
2236 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2237 &tgt_align
, &tgt_size
);
2240 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2241 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2242 tgt_align
, tgt_size
);
2245 gomp_task_maybe_wait_for_dependencies (depend
);
2251 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2252 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2253 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2257 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2258 &tgt_align
, &tgt_size
);
2261 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2262 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2263 tgt_align
, tgt_size
);
2266 gomp_target_fallback (fn
, hostaddrs
, devicep
);
2270 struct target_mem_desc
*tgt_vars
;
2271 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2275 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2276 &tgt_align
, &tgt_size
);
2279 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2280 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2281 tgt_align
, tgt_size
);
2287 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2288 true, GOMP_MAP_VARS_TARGET
);
2289 devicep
->run_func (devicep
->target_id
, fn_addr
,
2290 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2293 gomp_unmap_vars (tgt_vars
, true);
2296 /* Host fallback for GOMP_target_data{,_ext} routines. */
2299 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2301 struct gomp_task_icv
*icv
= gomp_icv (false);
2303 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2305 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2306 "be used for offloading");
2308 if (icv
->target_data
)
2310 /* Even when doing a host fallback, if there are any active
2311 #pragma omp target data constructs, need to remember the
2312 new #pragma omp target data, otherwise GOMP_target_end_data
2313 would get out of sync. */
2314 struct target_mem_desc
*tgt
2315 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2316 GOMP_MAP_VARS_DATA
);
2317 tgt
->prev
= icv
->target_data
;
2318 icv
->target_data
= tgt
;
2323 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2324 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2326 struct gomp_device_descr
*devicep
= resolve_device (device
);
2329 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2330 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2331 return gomp_target_data_fallback (devicep
);
2333 struct target_mem_desc
*tgt
2334 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2335 GOMP_MAP_VARS_DATA
);
2336 struct gomp_task_icv
*icv
= gomp_icv (true);
2337 tgt
->prev
= icv
->target_data
;
2338 icv
->target_data
= tgt
;
2342 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2343 size_t *sizes
, unsigned short *kinds
)
2345 struct gomp_device_descr
*devicep
= resolve_device (device
);
2348 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2349 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2350 return gomp_target_data_fallback (devicep
);
2352 struct target_mem_desc
*tgt
2353 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2354 GOMP_MAP_VARS_DATA
);
2355 struct gomp_task_icv
*icv
= gomp_icv (true);
2356 tgt
->prev
= icv
->target_data
;
2357 icv
->target_data
= tgt
;
2361 GOMP_target_end_data (void)
2363 struct gomp_task_icv
*icv
= gomp_icv (false);
2364 if (icv
->target_data
)
2366 struct target_mem_desc
*tgt
= icv
->target_data
;
2367 icv
->target_data
= tgt
->prev
;
2368 gomp_unmap_vars (tgt
, true);
2373 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2374 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2376 struct gomp_device_descr
*devicep
= resolve_device (device
);
2379 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2380 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2383 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2387 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2388 size_t *sizes
, unsigned short *kinds
,
2389 unsigned int flags
, void **depend
)
2391 struct gomp_device_descr
*devicep
= resolve_device (device
);
2393 /* If there are depend clauses, but nowait is not present,
2394 block the parent task until the dependencies are resolved
2395 and then just continue with the rest of the function as if it
2396 is a merged task. Until we are able to schedule task during
2397 variable mapping or unmapping, ignore nowait if depend clauses
2401 struct gomp_thread
*thr
= gomp_thread ();
2402 if (thr
->task
&& thr
->task
->depend_hash
)
2404 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2406 && !thr
->task
->final_task
)
2408 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2409 mapnum
, hostaddrs
, sizes
, kinds
,
2410 flags
| GOMP_TARGET_FLAG_UPDATE
,
2411 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2416 struct gomp_team
*team
= thr
->ts
.team
;
2417 /* If parallel or taskgroup has been cancelled, don't start new
2419 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2421 if (gomp_team_barrier_cancelled (&team
->barrier
))
2423 if (thr
->task
->taskgroup
)
2425 if (thr
->task
->taskgroup
->cancelled
)
2427 if (thr
->task
->taskgroup
->workshare
2428 && thr
->task
->taskgroup
->prev
2429 && thr
->task
->taskgroup
->prev
->cancelled
)
2434 gomp_task_maybe_wait_for_dependencies (depend
);
2440 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2441 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2444 struct gomp_thread
*thr
= gomp_thread ();
2445 struct gomp_team
*team
= thr
->ts
.team
;
2446 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2447 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2449 if (gomp_team_barrier_cancelled (&team
->barrier
))
2451 if (thr
->task
->taskgroup
)
2453 if (thr
->task
->taskgroup
->cancelled
)
2455 if (thr
->task
->taskgroup
->workshare
2456 && thr
->task
->taskgroup
->prev
2457 && thr
->task
->taskgroup
->prev
->cancelled
)
2462 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2466 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2467 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2469 const int typemask
= 0xff;
2471 gomp_mutex_lock (&devicep
->lock
);
2472 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2474 gomp_mutex_unlock (&devicep
->lock
);
2478 for (i
= 0; i
< mapnum
; i
++)
2480 struct splay_tree_key_s cur_node
;
2481 unsigned char kind
= kinds
[i
] & typemask
;
2485 case GOMP_MAP_ALWAYS_FROM
:
2486 case GOMP_MAP_DELETE
:
2487 case GOMP_MAP_RELEASE
:
2488 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2489 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2490 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2491 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2492 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2493 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2494 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2495 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2499 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2501 if ((kind
== GOMP_MAP_DELETE
2502 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2503 && k
->refcount
!= REFCOUNT_INFINITY
)
2506 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2507 || kind
== GOMP_MAP_ALWAYS_FROM
)
2508 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2509 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2510 + cur_node
.host_start
2512 cur_node
.host_end
- cur_node
.host_start
);
2513 if (k
->refcount
== 0)
2514 gomp_remove_var (devicep
, k
);
2518 gomp_mutex_unlock (&devicep
->lock
);
2519 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2524 gomp_mutex_unlock (&devicep
->lock
);
2528 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2529 size_t *sizes
, unsigned short *kinds
,
2530 unsigned int flags
, void **depend
)
2532 struct gomp_device_descr
*devicep
= resolve_device (device
);
2534 /* If there are depend clauses, but nowait is not present,
2535 block the parent task until the dependencies are resolved
2536 and then just continue with the rest of the function as if it
2537 is a merged task. Until we are able to schedule task during
2538 variable mapping or unmapping, ignore nowait if depend clauses
2542 struct gomp_thread
*thr
= gomp_thread ();
2543 if (thr
->task
&& thr
->task
->depend_hash
)
2545 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2547 && !thr
->task
->final_task
)
2549 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2550 mapnum
, hostaddrs
, sizes
, kinds
,
2551 flags
, depend
, NULL
,
2552 GOMP_TARGET_TASK_DATA
))
2557 struct gomp_team
*team
= thr
->ts
.team
;
2558 /* If parallel or taskgroup has been cancelled, don't start new
2560 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2562 if (gomp_team_barrier_cancelled (&team
->barrier
))
2564 if (thr
->task
->taskgroup
)
2566 if (thr
->task
->taskgroup
->cancelled
)
2568 if (thr
->task
->taskgroup
->workshare
2569 && thr
->task
->taskgroup
->prev
2570 && thr
->task
->taskgroup
->prev
->cancelled
)
2575 gomp_task_maybe_wait_for_dependencies (depend
);
2581 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2582 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2585 struct gomp_thread
*thr
= gomp_thread ();
2586 struct gomp_team
*team
= thr
->ts
.team
;
2587 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2588 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2590 if (gomp_team_barrier_cancelled (&team
->barrier
))
2592 if (thr
->task
->taskgroup
)
2594 if (thr
->task
->taskgroup
->cancelled
)
2596 if (thr
->task
->taskgroup
->workshare
2597 && thr
->task
->taskgroup
->prev
2598 && thr
->task
->taskgroup
->prev
->cancelled
)
2603 /* The variables are mapped separately such that they can be released
2606 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2607 for (i
= 0; i
< mapnum
; i
++)
2608 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2610 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2611 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2614 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
2616 for (j
= i
+ 1; j
< mapnum
; j
++)
2617 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
2618 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
2620 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
2621 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2625 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2626 true, GOMP_MAP_VARS_ENTER_DATA
);
2628 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2632 gomp_target_task_fn (void *data
)
2634 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2635 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2637 if (ttask
->fn
!= NULL
)
2641 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2642 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2643 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2645 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2646 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
);
2650 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2653 gomp_unmap_vars (ttask
->tgt
, true);
2657 void *actual_arguments
;
2658 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2661 actual_arguments
= ttask
->hostaddrs
;
2665 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2666 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2667 GOMP_MAP_VARS_TARGET
);
2668 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2670 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2672 assert (devicep
->async_run_func
);
2673 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2674 ttask
->args
, (void *) ttask
);
2677 else if (devicep
== NULL
2678 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2679 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2683 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2684 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2685 ttask
->kinds
, true);
2686 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2687 for (i
= 0; i
< ttask
->mapnum
; i
++)
2688 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2690 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2691 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2692 GOMP_MAP_VARS_ENTER_DATA
);
2693 i
+= ttask
->sizes
[i
];
2696 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2697 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2699 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2705 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2709 struct gomp_task_icv
*icv
= gomp_icv (true);
2710 icv
->thread_limit_var
2711 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2717 omp_target_alloc (size_t size
, int device_num
)
2719 if (device_num
== gomp_get_num_devices ())
2720 return malloc (size
);
2725 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2726 if (devicep
== NULL
)
2729 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2730 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2731 return malloc (size
);
2733 gomp_mutex_lock (&devicep
->lock
);
2734 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2735 gomp_mutex_unlock (&devicep
->lock
);
2740 omp_target_free (void *device_ptr
, int device_num
)
2742 if (device_ptr
== NULL
)
2745 if (device_num
== gomp_get_num_devices ())
2754 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2755 if (devicep
== NULL
)
2758 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2759 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2765 gomp_mutex_lock (&devicep
->lock
);
2766 gomp_free_device_memory (devicep
, device_ptr
);
2767 gomp_mutex_unlock (&devicep
->lock
);
2771 omp_target_is_present (const void *ptr
, int device_num
)
2776 if (device_num
== gomp_get_num_devices ())
2782 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2783 if (devicep
== NULL
)
2786 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2787 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2790 gomp_mutex_lock (&devicep
->lock
);
2791 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2792 struct splay_tree_key_s cur_node
;
2794 cur_node
.host_start
= (uintptr_t) ptr
;
2795 cur_node
.host_end
= cur_node
.host_start
;
2796 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2797 int ret
= n
!= NULL
;
2798 gomp_mutex_unlock (&devicep
->lock
);
2803 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2804 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2807 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2810 if (dst_device_num
!= gomp_get_num_devices ())
2812 if (dst_device_num
< 0)
2815 dst_devicep
= resolve_device (dst_device_num
);
2816 if (dst_devicep
== NULL
)
2819 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2820 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2823 if (src_device_num
!= num_devices_openmp
)
2825 if (src_device_num
< 0)
2828 src_devicep
= resolve_device (src_device_num
);
2829 if (src_devicep
== NULL
)
2832 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2833 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2836 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2838 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2841 if (src_devicep
== NULL
)
2843 gomp_mutex_lock (&dst_devicep
->lock
);
2844 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2845 (char *) dst
+ dst_offset
,
2846 (char *) src
+ src_offset
, length
);
2847 gomp_mutex_unlock (&dst_devicep
->lock
);
2848 return (ret
? 0 : EINVAL
);
2850 if (dst_devicep
== NULL
)
2852 gomp_mutex_lock (&src_devicep
->lock
);
2853 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2854 (char *) dst
+ dst_offset
,
2855 (char *) src
+ src_offset
, length
);
2856 gomp_mutex_unlock (&src_devicep
->lock
);
2857 return (ret
? 0 : EINVAL
);
2859 if (src_devicep
== dst_devicep
)
2861 gomp_mutex_lock (&src_devicep
->lock
);
2862 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2863 (char *) dst
+ dst_offset
,
2864 (char *) src
+ src_offset
, length
);
2865 gomp_mutex_unlock (&src_devicep
->lock
);
2866 return (ret
? 0 : EINVAL
);
2872 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2873 int num_dims
, const size_t *volume
,
2874 const size_t *dst_offsets
,
2875 const size_t *src_offsets
,
2876 const size_t *dst_dimensions
,
2877 const size_t *src_dimensions
,
2878 struct gomp_device_descr
*dst_devicep
,
2879 struct gomp_device_descr
*src_devicep
)
2881 size_t dst_slice
= element_size
;
2882 size_t src_slice
= element_size
;
2883 size_t j
, dst_off
, src_off
, length
;
2888 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2889 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2890 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2892 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2894 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2898 else if (src_devicep
== NULL
)
2899 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2900 (char *) dst
+ dst_off
,
2901 (const char *) src
+ src_off
,
2903 else if (dst_devicep
== NULL
)
2904 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2905 (char *) dst
+ dst_off
,
2906 (const char *) src
+ src_off
,
2908 else if (src_devicep
== dst_devicep
)
2909 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2910 (char *) dst
+ dst_off
,
2911 (const char *) src
+ src_off
,
2915 return ret
? 0 : EINVAL
;
2918 /* FIXME: it would be nice to have some plugin function to handle
2919 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2920 be handled in the generic recursion below, and for host-host it
2921 should be used even for any num_dims >= 2. */
2923 for (i
= 1; i
< num_dims
; i
++)
2924 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2925 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2927 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2928 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2930 for (j
= 0; j
< volume
[0]; j
++)
2932 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2933 (const char *) src
+ src_off
,
2934 element_size
, num_dims
- 1,
2935 volume
+ 1, dst_offsets
+ 1,
2936 src_offsets
+ 1, dst_dimensions
+ 1,
2937 src_dimensions
+ 1, dst_devicep
,
2941 dst_off
+= dst_slice
;
2942 src_off
+= src_slice
;
2948 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2949 int num_dims
, const size_t *volume
,
2950 const size_t *dst_offsets
,
2951 const size_t *src_offsets
,
2952 const size_t *dst_dimensions
,
2953 const size_t *src_dimensions
,
2954 int dst_device_num
, int src_device_num
)
2956 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2961 if (dst_device_num
!= gomp_get_num_devices ())
2963 if (dst_device_num
< 0)
2966 dst_devicep
= resolve_device (dst_device_num
);
2967 if (dst_devicep
== NULL
)
2970 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2971 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2974 if (src_device_num
!= num_devices_openmp
)
2976 if (src_device_num
< 0)
2979 src_devicep
= resolve_device (src_device_num
);
2980 if (src_devicep
== NULL
)
2983 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2984 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2988 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2992 gomp_mutex_lock (&src_devicep
->lock
);
2993 else if (dst_devicep
)
2994 gomp_mutex_lock (&dst_devicep
->lock
);
2995 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2996 volume
, dst_offsets
, src_offsets
,
2997 dst_dimensions
, src_dimensions
,
2998 dst_devicep
, src_devicep
);
3000 gomp_mutex_unlock (&src_devicep
->lock
);
3001 else if (dst_devicep
)
3002 gomp_mutex_unlock (&dst_devicep
->lock
);
3007 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3008 size_t size
, size_t device_offset
, int device_num
)
3010 if (device_num
== gomp_get_num_devices ())
3016 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3017 if (devicep
== NULL
)
3020 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3021 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3024 gomp_mutex_lock (&devicep
->lock
);
3026 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3027 struct splay_tree_key_s cur_node
;
3030 cur_node
.host_start
= (uintptr_t) host_ptr
;
3031 cur_node
.host_end
= cur_node
.host_start
+ size
;
3032 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3035 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3036 == (uintptr_t) device_ptr
+ device_offset
3037 && n
->host_start
<= cur_node
.host_start
3038 && n
->host_end
>= cur_node
.host_end
)
3043 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3044 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3048 tgt
->to_free
= NULL
;
3050 tgt
->list_count
= 0;
3051 tgt
->device_descr
= devicep
;
3052 splay_tree_node array
= tgt
->array
;
3053 splay_tree_key k
= &array
->key
;
3054 k
->host_start
= cur_node
.host_start
;
3055 k
->host_end
= cur_node
.host_end
;
3057 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3058 k
->refcount
= REFCOUNT_INFINITY
;
3059 k
->dynamic_refcount
= 0;
3062 array
->right
= NULL
;
3063 splay_tree_insert (&devicep
->mem_map
, array
);
3066 gomp_mutex_unlock (&devicep
->lock
);
3071 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3073 if (device_num
== gomp_get_num_devices ())
3079 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3080 if (devicep
== NULL
)
3083 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3086 gomp_mutex_lock (&devicep
->lock
);
3088 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3089 struct splay_tree_key_s cur_node
;
3092 cur_node
.host_start
= (uintptr_t) ptr
;
3093 cur_node
.host_end
= cur_node
.host_start
;
3094 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3096 && n
->host_start
== cur_node
.host_start
3097 && n
->refcount
== REFCOUNT_INFINITY
3098 && n
->tgt
->tgt_start
== 0
3099 && n
->tgt
->to_free
== NULL
3100 && n
->tgt
->refcount
== 1
3101 && n
->tgt
->list_count
== 0)
3103 splay_tree_remove (&devicep
->mem_map
, n
);
3104 gomp_unmap_tgt (n
->tgt
);
3108 gomp_mutex_unlock (&devicep
->lock
);
3113 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3116 if (device_num
== gomp_get_num_devices ())
3117 return gomp_pause_host ();
3118 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3120 /* Do nothing for target devices for now. */
3125 omp_pause_resource_all (omp_pause_resource_t kind
)
3128 if (gomp_pause_host ())
3130 /* Do nothing for target devices for now. */
3134 ialias (omp_pause_resource
)
3135 ialias (omp_pause_resource_all
)
3137 #ifdef PLUGIN_SUPPORT
3139 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3141 The handles of the found functions are stored in the corresponding fields
3142 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3145 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3146 const char *plugin_name
)
3148 const char *err
= NULL
, *last_missing
= NULL
;
3150 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3154 /* Check if all required functions are available in the plugin and store
3155 their handlers. None of the symbols can legitimately be NULL,
3156 so we don't need to check dlerror all the time. */
3158 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3160 /* Similar, but missing functions are not an error. Return false if
3161 failed, true otherwise. */
3162 #define DLSYM_OPT(f, n) \
3163 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3164 || (last_missing = #n, 0))
3167 if (device
->version_func () != GOMP_VERSION
)
3169 err
= "plugin version mismatch";
3176 DLSYM (get_num_devices
);
3177 DLSYM (init_device
);
3178 DLSYM (fini_device
);
3180 DLSYM (unload_image
);
3185 device
->capabilities
= device
->get_caps_func ();
3186 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3189 DLSYM_OPT (async_run
, async_run
);
3190 DLSYM_OPT (can_run
, can_run
);
3193 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3195 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3196 || !DLSYM_OPT (openacc
.create_thread_data
,
3197 openacc_create_thread_data
)
3198 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3199 openacc_destroy_thread_data
)
3200 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3201 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3202 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3203 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3204 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3205 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3206 openacc_async_queue_callback
)
3207 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3208 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3209 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3210 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3212 /* Require all the OpenACC handlers if we have
3213 GOMP_OFFLOAD_CAP_OPENACC_200. */
3214 err
= "plugin missing OpenACC handler function";
3219 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3220 openacc_cuda_get_current_device
);
3221 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3222 openacc_cuda_get_current_context
);
3223 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3224 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3225 if (cuda
&& cuda
!= 4)
3227 /* Make sure all the CUDA functions are there if any of them are. */
3228 err
= "plugin missing OpenACC CUDA handler function";
3240 gomp_error ("while loading %s: %s", plugin_name
, err
);
3242 gomp_error ("missing function was %s", last_missing
);
3244 dlclose (plugin_handle
);
3249 /* This function finalizes all initialized devices. */
3252 gomp_target_fini (void)
3255 for (i
= 0; i
< num_devices
; i
++)
3258 struct gomp_device_descr
*devicep
= &devices
[i
];
3259 gomp_mutex_lock (&devicep
->lock
);
3260 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3261 ret
= gomp_fini_device (devicep
);
3262 gomp_mutex_unlock (&devicep
->lock
);
3264 gomp_fatal ("device finalization failed");
3268 /* This function initializes the runtime for offloading.
3269 It parses the list of offload plugins, and tries to load these.
3270 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3271 will be set, and the array DEVICES initialized, containing descriptors for
3272 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3276 gomp_target_init (void)
3278 const char *prefix
="libgomp-plugin-";
3279 const char *suffix
= SONAME_SUFFIX (1);
3280 const char *cur
, *next
;
3282 int i
, new_num_devs
;
3283 int num_devs
= 0, num_devs_openmp
;
3284 struct gomp_device_descr
*devs
= NULL
;
3286 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
3289 cur
= OFFLOAD_PLUGINS
;
3293 struct gomp_device_descr current_device
;
3294 size_t prefix_len
, suffix_len
, cur_len
;
3296 next
= strchr (cur
, ',');
3298 prefix_len
= strlen (prefix
);
3299 cur_len
= next
? next
- cur
: strlen (cur
);
3300 suffix_len
= strlen (suffix
);
3302 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3309 memcpy (plugin_name
, prefix
, prefix_len
);
3310 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3311 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3313 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3315 new_num_devs
= current_device
.get_num_devices_func ();
3316 if (new_num_devs
>= 1)
3318 /* Augment DEVICES and NUM_DEVICES. */
3320 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
3321 * sizeof (struct gomp_device_descr
));
3329 current_device
.name
= current_device
.get_name_func ();
3330 /* current_device.capabilities has already been set. */
3331 current_device
.type
= current_device
.get_type_func ();
3332 current_device
.mem_map
.root
= NULL
;
3333 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3334 for (i
= 0; i
< new_num_devs
; i
++)
3336 current_device
.target_id
= i
;
3337 devs
[num_devs
] = current_device
;
3338 gomp_mutex_init (&devs
[num_devs
].lock
);
3349 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3350 NUM_DEVICES_OPENMP. */
3351 struct gomp_device_descr
*devs_s
3352 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
3359 num_devs_openmp
= 0;
3360 for (i
= 0; i
< num_devs
; i
++)
3361 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3362 devs_s
[num_devs_openmp
++] = devs
[i
];
3363 int num_devs_after_openmp
= num_devs_openmp
;
3364 for (i
= 0; i
< num_devs
; i
++)
3365 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3366 devs_s
[num_devs_after_openmp
++] = devs
[i
];
3370 for (i
= 0; i
< num_devs
; i
++)
3372 /* The 'devices' array can be moved (by the realloc call) until we have
3373 found all the plugins, so registering with the OpenACC runtime (which
3374 takes a copy of the pointer argument) must be delayed until now. */
3375 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3376 goacc_register (&devs
[i
]);
3379 num_devices
= num_devs
;
3380 num_devices_openmp
= num_devs_openmp
;
3382 if (atexit (gomp_target_fini
) != 0)
3383 gomp_fatal ("atexit failed");
3386 #else /* PLUGIN_SUPPORT */
3387 /* If dlfcn.h is unavailable we always fallback to host execution.
3388 GOMP_target* routines are just stubs for this case. */
3390 gomp_target_init (void)
3393 #endif /* PLUGIN_SUPPORT */