1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
121 gomp_mutex_lock (&devices
[device_id
].lock
);
122 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
123 gomp_init_device (&devices
[device_id
]);
124 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
126 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 gomp_mutex_unlock (&devices
[device_id
].lock
);
131 return &devices
[device_id
];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
138 if (key
->host_start
!= key
->host_end
)
139 return splay_tree_lookup (mem_map
, key
);
142 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
147 n
= splay_tree_lookup (mem_map
, key
);
151 return splay_tree_lookup (mem_map
, key
);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
157 if (key
->host_start
!= key
->host_end
)
158 return splay_tree_lookup (mem_map
, key
);
161 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
167 gomp_device_copy (struct gomp_device_descr
*devicep
,
168 bool (*copy_func
) (int, void *, const void *, size_t),
169 const char *dst
, void *dstaddr
,
170 const char *src
, const void *srcaddr
,
173 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
175 gomp_mutex_unlock (&devicep
->lock
);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
182 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
183 bool (*copy_func
) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue
*),
185 const char *dst
, void *dstaddr
,
186 const char *src
, const void *srcaddr
,
187 size_t size
, struct goacc_asyncqueue
*aq
)
189 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
191 gomp_mutex_unlock (&devicep
->lock
);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
211 struct target_mem_desc
*tgt
;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk
*chunks
;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
242 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
246 if (cbuf
->chunk_cnt
< 0)
248 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
250 cbuf
->chunk_cnt
= -1;
253 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
255 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf
->use_cnt
== 1)
265 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
266 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
275 gomp_to_device_kind_p (int kind
)
281 case GOMP_MAP_FORCE_ALLOC
:
282 case GOMP_MAP_FORCE_FROM
:
283 case GOMP_MAP_ALWAYS_FROM
:
290 attribute_hidden
void
291 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
292 struct goacc_asyncqueue
*aq
,
293 void *d
, const void *h
, size_t sz
,
294 struct gomp_coalesce_buf
*cbuf
)
298 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
299 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
302 long last
= cbuf
->chunk_cnt
- 1;
303 while (first
<= last
)
305 long middle
= (first
+ last
) >> 1;
306 if (cbuf
->chunks
[middle
].end
<= doff
)
308 else if (cbuf
->chunks
[middle
].start
<= doff
)
310 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
321 if (__builtin_expect (aq
!= NULL
, 0))
322 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
323 "dev", d
, "host", h
, sz
, aq
);
325 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
328 attribute_hidden
void
329 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
330 struct goacc_asyncqueue
*aq
,
331 void *h
, const void *d
, size_t sz
)
333 if (__builtin_expect (aq
!= NULL
, 0))
334 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
335 "host", h
, "dev", d
, sz
, aq
);
337 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
341 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
343 if (!devicep
->free_func (devicep
->target_id
, devptr
))
345 gomp_mutex_unlock (&devicep
->lock
);
346 gomp_fatal ("error in freeing device memory block at %p", devptr
);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
355 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
356 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
357 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
358 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
361 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
362 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
363 tgt_var
->do_detach
= kind
== GOMP_MAP_ATTACH
;
364 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
365 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
367 if ((kind
& GOMP_MAP_FLAG_FORCE
)
368 || oldn
->host_start
> newn
->host_start
369 || oldn
->host_end
< newn
->host_end
)
371 gomp_mutex_unlock (&devicep
->lock
);
372 gomp_fatal ("Trying to map into device [%p..%p) object when "
373 "[%p..%p) is already mapped",
374 (void *) newn
->host_start
, (void *) newn
->host_end
,
375 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
378 if (GOMP_MAP_ALWAYS_TO_P (kind
))
379 gomp_copy_host2dev (devicep
, aq
,
380 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
381 + newn
->host_start
- oldn
->host_start
),
382 (void *) newn
->host_start
,
383 newn
->host_end
- newn
->host_start
, cbuf
);
385 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
390 get_kind (bool short_mapkind
, void *kinds
, int idx
)
392 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
393 : ((unsigned char *) kinds
)[idx
];
397 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
398 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
399 struct gomp_coalesce_buf
*cbuf
)
401 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
402 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
403 struct splay_tree_key_s cur_node
;
405 cur_node
.host_start
= host_ptr
;
406 if (cur_node
.host_start
== (uintptr_t) NULL
)
408 cur_node
.tgt_offset
= (uintptr_t) NULL
;
409 gomp_copy_host2dev (devicep
, aq
,
410 (void *) (tgt
->tgt_start
+ target_offset
),
411 (void *) &cur_node
.tgt_offset
,
412 sizeof (void *), cbuf
);
415 /* Add bias to the pointer value. */
416 cur_node
.host_start
+= bias
;
417 cur_node
.host_end
= cur_node
.host_start
;
418 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
421 gomp_mutex_unlock (&devicep
->lock
);
422 gomp_fatal ("Pointer target of array section wasn't mapped");
424 cur_node
.host_start
-= n
->host_start
;
426 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
427 /* At this point tgt_offset is target address of the
428 array section. Now subtract bias to get what we want
429 to initialize the pointer with. */
430 cur_node
.tgt_offset
-= bias
;
431 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
432 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
436 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
437 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
438 size_t first
, size_t i
, void **hostaddrs
,
439 size_t *sizes
, void *kinds
,
440 struct gomp_coalesce_buf
*cbuf
)
442 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
443 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
444 struct splay_tree_key_s cur_node
;
446 const bool short_mapkind
= true;
447 const int typemask
= short_mapkind
? 0xff : 0x7;
449 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
450 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
451 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
452 kind
= get_kind (short_mapkind
, kinds
, i
);
455 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
457 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
458 &tgt
->list
[i
], kind
& typemask
, cbuf
);
463 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
465 cur_node
.host_start
--;
466 n2
= splay_tree_lookup (mem_map
, &cur_node
);
467 cur_node
.host_start
++;
470 && n2
->host_start
- n
->host_start
471 == n2
->tgt_offset
- n
->tgt_offset
)
473 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
474 &tgt
->list
[i
], kind
& typemask
, cbuf
);
479 n2
= splay_tree_lookup (mem_map
, &cur_node
);
483 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
485 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
486 kind
& typemask
, cbuf
);
490 gomp_mutex_unlock (&devicep
->lock
);
491 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
492 "other mapped elements from the same structure weren't mapped "
493 "together with it", (void *) cur_node
.host_start
,
494 (void *) cur_node
.host_end
);
497 attribute_hidden
void
498 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
499 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
500 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
501 struct gomp_coalesce_buf
*cbufp
)
503 struct splay_tree_key_s s
;
508 gomp_mutex_unlock (&devicep
->lock
);
509 gomp_fatal ("enclosing struct not mapped for attach");
512 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
513 /* We might have a pointer in a packed struct: however we cannot have more
514 than one such pointer in each pointer-sized portion of the struct, so
516 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
519 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
521 if (!n
->aux
->attach_count
)
523 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
525 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
526 n
->aux
->attach_count
[idx
]++;
529 gomp_mutex_unlock (&devicep
->lock
);
530 gomp_fatal ("attach count overflow");
533 if (n
->aux
->attach_count
[idx
] == 1)
535 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
537 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
541 if ((void *) target
== NULL
)
543 gomp_mutex_unlock (&devicep
->lock
);
544 gomp_fatal ("attempt to attach null pointer");
547 s
.host_start
= target
+ bias
;
548 s
.host_end
= s
.host_start
+ 1;
549 tn
= splay_tree_lookup (mem_map
, &s
);
553 gomp_mutex_unlock (&devicep
->lock
);
554 gomp_fatal ("pointer target not mapped for attach");
557 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
560 "%s: attaching host %p, target %p (struct base %p) to %p\n",
561 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
562 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
564 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
565 sizeof (void *), cbufp
);
568 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
569 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
572 attribute_hidden
void
573 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
574 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
575 uintptr_t detach_from
, bool finalize
,
576 struct gomp_coalesce_buf
*cbufp
)
582 gomp_mutex_unlock (&devicep
->lock
);
583 gomp_fatal ("enclosing struct not mapped for detach");
586 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
588 if (!n
->aux
|| !n
->aux
->attach_count
)
590 gomp_mutex_unlock (&devicep
->lock
);
591 gomp_fatal ("no attachment counters for struct");
595 n
->aux
->attach_count
[idx
] = 1;
597 if (n
->aux
->attach_count
[idx
] == 0)
599 gomp_mutex_unlock (&devicep
->lock
);
600 gomp_fatal ("attach count underflow");
603 n
->aux
->attach_count
[idx
]--;
605 if (n
->aux
->attach_count
[idx
] == 0)
607 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
609 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
612 "%s: detaching host %p, target %p (struct base %p) to %p\n",
613 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
614 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
617 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
618 sizeof (void *), cbufp
);
621 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
622 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
625 attribute_hidden
uintptr_t
626 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
628 if (tgt
->list
[i
].key
!= NULL
)
629 return tgt
->list
[i
].key
->tgt
->tgt_start
630 + tgt
->list
[i
].key
->tgt_offset
631 + tgt
->list
[i
].offset
;
633 switch (tgt
->list
[i
].offset
)
636 return (uintptr_t) hostaddrs
[i
];
642 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
643 + tgt
->list
[i
+ 1].key
->tgt_offset
644 + tgt
->list
[i
+ 1].offset
645 + (uintptr_t) hostaddrs
[i
]
646 - (uintptr_t) hostaddrs
[i
+ 1];
649 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
653 static inline __attribute__((always_inline
)) struct target_mem_desc
*
654 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
655 struct goacc_asyncqueue
*aq
, size_t mapnum
,
656 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
657 void *kinds
, bool short_mapkind
,
658 enum gomp_map_vars_kind pragma_kind
)
660 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
661 bool has_firstprivate
= false;
662 const int rshift
= short_mapkind
? 8 : 3;
663 const int typemask
= short_mapkind
? 0xff : 0x7;
664 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
665 struct splay_tree_key_s cur_node
;
666 struct target_mem_desc
*tgt
667 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
668 tgt
->list_count
= mapnum
;
669 tgt
->refcount
= (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
670 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
) ? 0 : 1;
671 tgt
->device_descr
= devicep
;
673 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
682 tgt_align
= sizeof (void *);
688 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
690 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
691 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
694 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
696 size_t align
= 4 * sizeof (void *);
698 tgt_size
= mapnum
* sizeof (void *);
700 cbuf
.use_cnt
= 1 + (mapnum
> 1);
701 cbuf
.chunks
[0].start
= 0;
702 cbuf
.chunks
[0].end
= tgt_size
;
705 gomp_mutex_lock (&devicep
->lock
);
706 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
708 gomp_mutex_unlock (&devicep
->lock
);
713 for (i
= 0; i
< mapnum
; i
++)
715 int kind
= get_kind (short_mapkind
, kinds
, i
);
716 if (hostaddrs
[i
] == NULL
717 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
719 tgt
->list
[i
].key
= NULL
;
720 tgt
->list
[i
].offset
= OFFSET_INLINED
;
723 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
724 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
726 tgt
->list
[i
].key
= NULL
;
729 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
730 on a separate construct prior to using use_device_{addr,ptr}.
731 In OpenMP 5.0, map directives need to be ordered by the
732 middle-end before the use_device_* clauses. If
733 !not_found_cnt, all mappings requested (if any) are already
734 mapped, so use_device_{addr,ptr} can be resolved right away.
735 Otherwise, if not_found_cnt, gomp_map_lookup might fail
736 now but would succeed after performing the mappings in the
737 following loop. We can't defer this always to the second
738 loop, because it is not even invoked when !not_found_cnt
739 after the first loop. */
740 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
741 cur_node
.host_end
= cur_node
.host_start
;
742 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
745 cur_node
.host_start
-= n
->host_start
;
747 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
748 + cur_node
.host_start
);
750 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
752 gomp_mutex_unlock (&devicep
->lock
);
753 gomp_fatal ("use_device_ptr pointer wasn't mapped");
755 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
756 /* If not present, continue using the host address. */
759 __builtin_unreachable ();
760 tgt
->list
[i
].offset
= OFFSET_INLINED
;
763 tgt
->list
[i
].offset
= 0;
766 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
768 size_t first
= i
+ 1;
769 size_t last
= i
+ sizes
[i
];
770 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
771 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
773 tgt
->list
[i
].key
= NULL
;
774 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
775 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
778 size_t align
= (size_t) 1 << (kind
>> rshift
);
779 if (tgt_align
< align
)
781 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
782 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
783 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
784 not_found_cnt
+= last
- i
;
785 for (i
= first
; i
<= last
; i
++)
787 tgt
->list
[i
].key
= NULL
;
788 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
790 gomp_coalesce_buf_add (&cbuf
,
791 tgt_size
- cur_node
.host_end
792 + (uintptr_t) hostaddrs
[i
],
798 for (i
= first
; i
<= last
; i
++)
799 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
804 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
806 tgt
->list
[i
].key
= NULL
;
807 tgt
->list
[i
].offset
= OFFSET_POINTER
;
808 has_firstprivate
= true;
811 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
813 tgt
->list
[i
].key
= NULL
;
814 has_firstprivate
= true;
817 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
818 if (!GOMP_MAP_POINTER_P (kind
& typemask
)
819 && (kind
& typemask
) != GOMP_MAP_ATTACH
)
820 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
822 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
823 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
825 tgt
->list
[i
].key
= NULL
;
827 size_t align
= (size_t) 1 << (kind
>> rshift
);
828 if (tgt_align
< align
)
830 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
831 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
832 cur_node
.host_end
- cur_node
.host_start
);
833 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
834 has_firstprivate
= true;
838 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
840 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
843 tgt
->list
[i
].key
= NULL
;
844 tgt
->list
[i
].offset
= OFFSET_POINTER
;
849 n
= splay_tree_lookup (mem_map
, &cur_node
);
850 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
851 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
852 kind
& typemask
, NULL
);
855 tgt
->list
[i
].key
= NULL
;
857 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
859 /* Not present, hence, skip entry - including its MAP_POINTER,
861 tgt
->list
[i
].offset
= OFFSET_POINTER
;
863 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
864 == GOMP_MAP_POINTER
))
867 tgt
->list
[i
].key
= NULL
;
868 tgt
->list
[i
].offset
= 0;
872 size_t align
= (size_t) 1 << (kind
>> rshift
);
874 if (tgt_align
< align
)
876 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
877 if (gomp_to_device_kind_p (kind
& typemask
))
878 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
879 cur_node
.host_end
- cur_node
.host_start
);
880 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
881 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
884 for (j
= i
+ 1; j
< mapnum
; j
++)
885 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
888 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
889 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
890 > cur_node
.host_end
))
894 tgt
->list
[j
].key
= NULL
;
905 gomp_mutex_unlock (&devicep
->lock
);
906 gomp_fatal ("unexpected aggregation");
908 tgt
->to_free
= devaddrs
[0];
909 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
910 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
912 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
914 /* Allocate tgt_align aligned tgt_size block of memory. */
915 /* FIXME: Perhaps change interface to allocate properly aligned
917 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
918 tgt_size
+ tgt_align
- 1);
921 gomp_mutex_unlock (&devicep
->lock
);
922 gomp_fatal ("device memory allocation fail");
925 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
926 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
927 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
929 if (cbuf
.use_cnt
== 1)
931 if (cbuf
.chunk_cnt
> 0)
934 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
950 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
951 tgt_size
= mapnum
* sizeof (void *);
954 if (not_found_cnt
|| has_firstprivate
)
957 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
958 splay_tree_node array
= tgt
->array
;
959 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
960 uintptr_t field_tgt_base
= 0;
962 for (i
= 0; i
< mapnum
; i
++)
963 if (tgt
->list
[i
].key
== NULL
)
965 int kind
= get_kind (short_mapkind
, kinds
, i
);
966 if (hostaddrs
[i
] == NULL
)
968 switch (kind
& typemask
)
970 size_t align
, len
, first
, last
;
972 case GOMP_MAP_FIRSTPRIVATE
:
973 align
= (size_t) 1 << (kind
>> rshift
);
974 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
975 tgt
->list
[i
].offset
= tgt_size
;
977 gomp_copy_host2dev (devicep
, aq
,
978 (void *) (tgt
->tgt_start
+ tgt_size
),
979 (void *) hostaddrs
[i
], len
, cbufp
);
982 case GOMP_MAP_FIRSTPRIVATE_INT
:
983 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
985 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
986 /* The OpenACC 'host_data' construct only allows 'use_device'
987 "mapping" clauses, so in the first loop, 'not_found_cnt'
988 must always have been zero, so all OpenACC 'use_device'
989 clauses have already been handled. (We can only easily test
990 'use_device' with 'if_present' clause here.) */
991 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
992 /* Nevertheless, FALLTHRU to the normal handling, to keep the
993 code conceptually simple, similar to the first loop. */
994 case GOMP_MAP_USE_DEVICE_PTR
:
995 if (tgt
->list
[i
].offset
== 0)
997 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
998 cur_node
.host_end
= cur_node
.host_start
;
999 n
= gomp_map_lookup (mem_map
, &cur_node
);
1002 cur_node
.host_start
-= n
->host_start
;
1004 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1005 + cur_node
.host_start
);
1007 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1009 gomp_mutex_unlock (&devicep
->lock
);
1010 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1012 else if ((kind
& typemask
)
1013 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1014 /* If not present, continue using the host address. */
1017 __builtin_unreachable ();
1018 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1021 case GOMP_MAP_STRUCT
:
1023 last
= i
+ sizes
[i
];
1024 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1025 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1027 if (tgt
->list
[first
].key
!= NULL
)
1029 n
= splay_tree_lookup (mem_map
, &cur_node
);
1032 size_t align
= (size_t) 1 << (kind
>> rshift
);
1033 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1034 - (uintptr_t) hostaddrs
[i
];
1035 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1036 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1037 - (uintptr_t) hostaddrs
[i
];
1038 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1039 field_tgt_offset
= tgt_size
;
1040 field_tgt_clear
= last
;
1041 tgt_size
+= cur_node
.host_end
1042 - (uintptr_t) hostaddrs
[first
];
1045 for (i
= first
; i
<= last
; i
++)
1046 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1047 sizes
, kinds
, cbufp
);
1050 case GOMP_MAP_ALWAYS_POINTER
:
1051 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1052 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1053 n
= splay_tree_lookup (mem_map
, &cur_node
);
1055 || n
->host_start
> cur_node
.host_start
1056 || n
->host_end
< cur_node
.host_end
)
1058 gomp_mutex_unlock (&devicep
->lock
);
1059 gomp_fatal ("always pointer not mapped");
1061 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1062 != GOMP_MAP_ALWAYS_POINTER
)
1063 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1064 if (cur_node
.tgt_offset
)
1065 cur_node
.tgt_offset
-= sizes
[i
];
1066 gomp_copy_host2dev (devicep
, aq
,
1067 (void *) (n
->tgt
->tgt_start
1069 + cur_node
.host_start
1071 (void *) &cur_node
.tgt_offset
,
1072 sizeof (void *), cbufp
);
1073 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1074 + cur_node
.host_start
- n
->host_start
;
1076 case GOMP_MAP_IF_PRESENT
:
1077 /* Not present - otherwise handled above. Skip over its
1078 MAP_POINTER as well. */
1080 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1081 == GOMP_MAP_POINTER
))
1084 case GOMP_MAP_ATTACH
:
1086 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1087 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1088 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1091 tgt
->list
[i
].key
= n
;
1092 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1093 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1094 tgt
->list
[i
].copy_from
= false;
1095 tgt
->list
[i
].always_copy_from
= false;
1096 tgt
->list
[i
].do_detach
1097 = (pragma_kind
!= GOMP_MAP_VARS_OPENACC_ENTER_DATA
);
1102 gomp_mutex_unlock (&devicep
->lock
);
1103 gomp_fatal ("outer struct not mapped for attach");
1105 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1106 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1113 splay_tree_key k
= &array
->key
;
1114 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1115 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1116 k
->host_end
= k
->host_start
+ sizes
[i
];
1118 k
->host_end
= k
->host_start
+ sizeof (void *);
1119 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1120 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1121 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1122 kind
& typemask
, cbufp
);
1126 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1128 /* Replace target address of the pointer with target address
1129 of mapped object in the splay tree. */
1130 splay_tree_remove (mem_map
, n
);
1132 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1133 k
->aux
->link_key
= n
;
1135 size_t align
= (size_t) 1 << (kind
>> rshift
);
1136 tgt
->list
[i
].key
= k
;
1138 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1140 k
->tgt_offset
= k
->host_start
- field_tgt_base
1142 if (i
== field_tgt_clear
)
1143 field_tgt_clear
= FIELD_TGT_EMPTY
;
1147 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1148 k
->tgt_offset
= tgt_size
;
1149 tgt_size
+= k
->host_end
- k
->host_start
;
1151 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1152 tgt
->list
[i
].always_copy_from
1153 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1154 tgt
->list
[i
].do_detach
= false;
1155 tgt
->list
[i
].offset
= 0;
1156 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1158 k
->virtual_refcount
= 0;
1161 array
->right
= NULL
;
1162 splay_tree_insert (mem_map
, array
);
1163 switch (kind
& typemask
)
1165 case GOMP_MAP_ALLOC
:
1167 case GOMP_MAP_FORCE_ALLOC
:
1168 case GOMP_MAP_FORCE_FROM
:
1169 case GOMP_MAP_ALWAYS_FROM
:
1172 case GOMP_MAP_TOFROM
:
1173 case GOMP_MAP_FORCE_TO
:
1174 case GOMP_MAP_FORCE_TOFROM
:
1175 case GOMP_MAP_ALWAYS_TO
:
1176 case GOMP_MAP_ALWAYS_TOFROM
:
1177 gomp_copy_host2dev (devicep
, aq
,
1178 (void *) (tgt
->tgt_start
1180 (void *) k
->host_start
,
1181 k
->host_end
- k
->host_start
, cbufp
);
1183 case GOMP_MAP_POINTER
:
1184 gomp_map_pointer (tgt
, aq
,
1185 (uintptr_t) *(void **) k
->host_start
,
1186 k
->tgt_offset
, sizes
[i
], cbufp
);
1188 case GOMP_MAP_TO_PSET
:
1189 gomp_copy_host2dev (devicep
, aq
,
1190 (void *) (tgt
->tgt_start
1192 (void *) k
->host_start
,
1193 k
->host_end
- k
->host_start
, cbufp
);
1195 for (j
= i
+ 1; j
< mapnum
; j
++)
1196 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
1200 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1201 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1206 tgt
->list
[j
].key
= k
;
1207 tgt
->list
[j
].copy_from
= false;
1208 tgt
->list
[j
].always_copy_from
= false;
1209 tgt
->list
[j
].do_detach
= false;
1210 if (k
->refcount
!= REFCOUNT_INFINITY
)
1212 gomp_map_pointer (tgt
, aq
,
1213 (uintptr_t) *(void **) hostaddrs
[j
],
1215 + ((uintptr_t) hostaddrs
[j
]
1221 case GOMP_MAP_FORCE_PRESENT
:
1223 /* We already looked up the memory region above and it
1225 size_t size
= k
->host_end
- k
->host_start
;
1226 gomp_mutex_unlock (&devicep
->lock
);
1227 #ifdef HAVE_INTTYPES_H
1228 gomp_fatal ("present clause: !acc_is_present (%p, "
1229 "%"PRIu64
" (0x%"PRIx64
"))",
1230 (void *) k
->host_start
,
1231 (uint64_t) size
, (uint64_t) size
);
1233 gomp_fatal ("present clause: !acc_is_present (%p, "
1234 "%lu (0x%lx))", (void *) k
->host_start
,
1235 (unsigned long) size
, (unsigned long) size
);
1239 case GOMP_MAP_FORCE_DEVICEPTR
:
1240 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1241 gomp_copy_host2dev (devicep
, aq
,
1242 (void *) (tgt
->tgt_start
1244 (void *) k
->host_start
,
1245 sizeof (void *), cbufp
);
1248 gomp_mutex_unlock (&devicep
->lock
);
1249 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1253 if (k
->aux
&& k
->aux
->link_key
)
1255 /* Set link pointer on target to the device address of the
1257 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1258 /* We intentionally do not use coalescing here, as it's not
1259 data allocated by the current call to this function. */
1260 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1261 &tgt_addr
, sizeof (void *), NULL
);
1268 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1270 for (i
= 0; i
< mapnum
; i
++)
1272 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1273 gomp_copy_host2dev (devicep
, aq
,
1274 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1275 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1283 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1284 gomp_copy_host2dev (devicep
, aq
,
1285 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1286 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1287 - cbuf
.chunks
[0].start
),
1288 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1294 /* If the variable from "omp target enter data" map-list was already mapped,
1295 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1297 if ((pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
1298 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1299 && tgt
->refcount
== 0)
1301 /* If we're about to discard a target_mem_desc with no "structural"
1302 references (tgt->refcount == 0), any splay keys linked in the tgt's
1303 list must have their virtual refcount incremented to represent that
1304 "lost" reference in order to implement the semantics of the OpenACC
1305 "present increment" operation properly. */
1306 if (pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1307 for (i
= 0; i
< tgt
->list_count
; i
++)
1308 if (tgt
->list
[i
].key
)
1309 tgt
->list
[i
].key
->virtual_refcount
++;
1315 gomp_mutex_unlock (&devicep
->lock
);
1319 attribute_hidden
struct target_mem_desc
*
1320 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1321 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1322 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1324 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1325 sizes
, kinds
, short_mapkind
, pragma_kind
);
1328 attribute_hidden
struct target_mem_desc
*
1329 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1330 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1331 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1332 void *kinds
, bool short_mapkind
,
1333 enum gomp_map_vars_kind pragma_kind
)
1335 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1336 sizes
, kinds
, short_mapkind
, pragma_kind
);
1340 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1342 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1344 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1351 gomp_unref_tgt (void *ptr
)
1353 bool is_tgt_unmapped
= false;
1355 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1357 if (tgt
->refcount
> 1)
1361 gomp_unmap_tgt (tgt
);
1362 is_tgt_unmapped
= true;
1365 return is_tgt_unmapped
;
1369 gomp_unref_tgt_void (void *ptr
)
1371 (void) gomp_unref_tgt (ptr
);
1374 static inline __attribute__((always_inline
)) bool
1375 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1376 struct goacc_asyncqueue
*aq
)
1378 bool is_tgt_unmapped
= false;
1379 splay_tree_remove (&devicep
->mem_map
, k
);
1382 if (k
->aux
->link_key
)
1383 splay_tree_insert (&devicep
->mem_map
,
1384 (splay_tree_node
) k
->aux
->link_key
);
1385 if (k
->aux
->attach_count
)
1386 free (k
->aux
->attach_count
);
1391 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1394 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1395 return is_tgt_unmapped
;
1398 attribute_hidden
bool
1399 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1401 return gomp_remove_var_internal (devicep
, k
, NULL
);
1404 /* Remove a variable asynchronously. This actually removes the variable
1405 mapping immediately, but retains the linked target_mem_desc until the
1406 asynchronous operation has completed (as it may still refer to target
1407 memory). The device lock must be held before entry, and remains locked on
1410 attribute_hidden
void
1411 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1412 struct goacc_asyncqueue
*aq
)
1414 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1417 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1418 variables back from device to host: if it is false, it is assumed that this
1419 has been done already. */
1421 static inline __attribute__((always_inline
)) void
1422 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1423 struct goacc_asyncqueue
*aq
)
1425 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1427 if (tgt
->list_count
== 0)
1433 gomp_mutex_lock (&devicep
->lock
);
1434 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1436 gomp_mutex_unlock (&devicep
->lock
);
1444 /* We must perform detachments before any copies back to the host. */
1445 for (i
= 0; i
< tgt
->list_count
; i
++)
1447 splay_tree_key k
= tgt
->list
[i
].key
;
1449 if (k
!= NULL
&& tgt
->list
[i
].do_detach
)
1450 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1451 + tgt
->list
[i
].offset
,
1452 k
->refcount
== 1, NULL
);
1455 for (i
= 0; i
< tgt
->list_count
; i
++)
1457 splay_tree_key k
= tgt
->list
[i
].key
;
1461 bool do_unmap
= false;
1463 && k
->virtual_refcount
> 0
1464 && k
->refcount
!= REFCOUNT_INFINITY
)
1466 k
->virtual_refcount
--;
1469 else if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1471 else if (k
->refcount
== 1)
1477 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1478 || tgt
->list
[i
].always_copy_from
)
1479 gomp_copy_dev2host (devicep
, aq
,
1480 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1481 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1482 + tgt
->list
[i
].offset
),
1483 tgt
->list
[i
].length
);
1486 struct target_mem_desc
*k_tgt
= k
->tgt
;
1487 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1488 /* It would be bad if TGT got unmapped while we're still iterating
1489 over its LIST_COUNT, and also expect to use it in the following
1491 assert (!is_tgt_unmapped
1497 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1500 gomp_unref_tgt ((void *) tgt
);
1502 gomp_mutex_unlock (&devicep
->lock
);
1505 attribute_hidden
void
1506 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1508 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1511 attribute_hidden
void
1512 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1513 struct goacc_asyncqueue
*aq
)
1515 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1519 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1520 size_t *sizes
, void *kinds
, bool short_mapkind
)
1523 struct splay_tree_key_s cur_node
;
1524 const int typemask
= short_mapkind
? 0xff : 0x7;
1532 gomp_mutex_lock (&devicep
->lock
);
1533 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1535 gomp_mutex_unlock (&devicep
->lock
);
1539 for (i
= 0; i
< mapnum
; i
++)
1542 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1543 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1544 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1547 int kind
= get_kind (short_mapkind
, kinds
, i
);
1548 if (n
->host_start
> cur_node
.host_start
1549 || n
->host_end
< cur_node
.host_end
)
1551 gomp_mutex_unlock (&devicep
->lock
);
1552 gomp_fatal ("Trying to update [%p..%p) object when "
1553 "only [%p..%p) is mapped",
1554 (void *) cur_node
.host_start
,
1555 (void *) cur_node
.host_end
,
1556 (void *) n
->host_start
,
1557 (void *) n
->host_end
);
1561 void *hostaddr
= (void *) cur_node
.host_start
;
1562 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1563 + cur_node
.host_start
- n
->host_start
);
1564 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1566 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1567 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1569 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1570 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1573 gomp_mutex_unlock (&devicep
->lock
);
1576 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1577 And insert to splay tree the mapping between addresses from HOST_TABLE and
1578 from loaded target image. We rely in the host and device compiler
1579 emitting variable and functions in the same order. */
1582 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1583 const void *host_table
, const void *target_data
,
1584 bool is_register_lock
)
1586 void **host_func_table
= ((void ***) host_table
)[0];
1587 void **host_funcs_end
= ((void ***) host_table
)[1];
1588 void **host_var_table
= ((void ***) host_table
)[2];
1589 void **host_vars_end
= ((void ***) host_table
)[3];
1591 /* The func table contains only addresses, the var table contains addresses
1592 and corresponding sizes. */
1593 int num_funcs
= host_funcs_end
- host_func_table
;
1594 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1596 /* Load image to device and get target addresses for the image. */
1597 struct addr_pair
*target_table
= NULL
;
1598 int i
, num_target_entries
;
1601 = devicep
->load_image_func (devicep
->target_id
, version
,
1602 target_data
, &target_table
);
1604 if (num_target_entries
!= num_funcs
+ num_vars
)
1606 gomp_mutex_unlock (&devicep
->lock
);
1607 if (is_register_lock
)
1608 gomp_mutex_unlock (®ister_lock
);
1609 gomp_fatal ("Cannot map target functions or variables"
1610 " (expected %u, have %u)", num_funcs
+ num_vars
,
1611 num_target_entries
);
1614 /* Insert host-target address mapping into splay tree. */
1615 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1616 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1617 tgt
->refcount
= REFCOUNT_INFINITY
;
1620 tgt
->to_free
= NULL
;
1622 tgt
->list_count
= 0;
1623 tgt
->device_descr
= devicep
;
1624 splay_tree_node array
= tgt
->array
;
1626 for (i
= 0; i
< num_funcs
; i
++)
1628 splay_tree_key k
= &array
->key
;
1629 k
->host_start
= (uintptr_t) host_func_table
[i
];
1630 k
->host_end
= k
->host_start
+ 1;
1632 k
->tgt_offset
= target_table
[i
].start
;
1633 k
->refcount
= REFCOUNT_INFINITY
;
1634 k
->virtual_refcount
= 0;
1637 array
->right
= NULL
;
1638 splay_tree_insert (&devicep
->mem_map
, array
);
1642 /* Most significant bit of the size in host and target tables marks
1643 "omp declare target link" variables. */
1644 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1645 const uintptr_t size_mask
= ~link_bit
;
1647 for (i
= 0; i
< num_vars
; i
++)
1649 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1650 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1652 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1654 gomp_mutex_unlock (&devicep
->lock
);
1655 if (is_register_lock
)
1656 gomp_mutex_unlock (®ister_lock
);
1657 gomp_fatal ("Cannot map target variables (size mismatch)");
1660 splay_tree_key k
= &array
->key
;
1661 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1663 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1665 k
->tgt_offset
= target_var
->start
;
1666 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1667 k
->virtual_refcount
= 0;
1670 array
->right
= NULL
;
1671 splay_tree_insert (&devicep
->mem_map
, array
);
1675 free (target_table
);
1678 /* Unload the mappings described by target_data from device DEVICE_P.
1679 The device must be locked. */
1682 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1684 const void *host_table
, const void *target_data
)
1686 void **host_func_table
= ((void ***) host_table
)[0];
1687 void **host_funcs_end
= ((void ***) host_table
)[1];
1688 void **host_var_table
= ((void ***) host_table
)[2];
1689 void **host_vars_end
= ((void ***) host_table
)[3];
1691 /* The func table contains only addresses, the var table contains addresses
1692 and corresponding sizes. */
1693 int num_funcs
= host_funcs_end
- host_func_table
;
1694 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1696 struct splay_tree_key_s k
;
1697 splay_tree_key node
= NULL
;
1699 /* Find mapping at start of node array */
1700 if (num_funcs
|| num_vars
)
1702 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1703 : (uintptr_t) host_var_table
[0]);
1704 k
.host_end
= k
.host_start
+ 1;
1705 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1708 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1710 gomp_mutex_unlock (&devicep
->lock
);
1711 gomp_fatal ("image unload fail");
1714 /* Remove mappings from splay tree. */
1716 for (i
= 0; i
< num_funcs
; i
++)
1718 k
.host_start
= (uintptr_t) host_func_table
[i
];
1719 k
.host_end
= k
.host_start
+ 1;
1720 splay_tree_remove (&devicep
->mem_map
, &k
);
1723 /* Most significant bit of the size in host and target tables marks
1724 "omp declare target link" variables. */
1725 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1726 const uintptr_t size_mask
= ~link_bit
;
1727 bool is_tgt_unmapped
= false;
1729 for (i
= 0; i
< num_vars
; i
++)
1731 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1733 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1735 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1736 splay_tree_remove (&devicep
->mem_map
, &k
);
1739 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1740 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1744 if (node
&& !is_tgt_unmapped
)
1751 /* This function should be called from every offload image while loading.
1752 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1753 the target, and TARGET_DATA needed by target plugin. */
1756 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1757 int target_type
, const void *target_data
)
1761 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1762 gomp_fatal ("Library too old for offload (version %u < %u)",
1763 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1765 gomp_mutex_lock (®ister_lock
);
1767 /* Load image to all initialized devices. */
1768 for (i
= 0; i
< num_devices
; i
++)
1770 struct gomp_device_descr
*devicep
= &devices
[i
];
1771 gomp_mutex_lock (&devicep
->lock
);
1772 if (devicep
->type
== target_type
1773 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1774 gomp_load_image_to_device (devicep
, version
,
1775 host_table
, target_data
, true);
1776 gomp_mutex_unlock (&devicep
->lock
);
1779 /* Insert image to array of pending images. */
1781 = gomp_realloc_unlock (offload_images
,
1782 (num_offload_images
+ 1)
1783 * sizeof (struct offload_image_descr
));
1784 offload_images
[num_offload_images
].version
= version
;
1785 offload_images
[num_offload_images
].type
= target_type
;
1786 offload_images
[num_offload_images
].host_table
= host_table
;
1787 offload_images
[num_offload_images
].target_data
= target_data
;
1789 num_offload_images
++;
1790 gomp_mutex_unlock (®ister_lock
);
1794 GOMP_offload_register (const void *host_table
, int target_type
,
1795 const void *target_data
)
1797 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1800 /* This function should be called from every offload image while unloading.
1801 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1802 the target, and TARGET_DATA needed by target plugin. */
1805 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1806 int target_type
, const void *target_data
)
1810 gomp_mutex_lock (®ister_lock
);
1812 /* Unload image from all initialized devices. */
1813 for (i
= 0; i
< num_devices
; i
++)
1815 struct gomp_device_descr
*devicep
= &devices
[i
];
1816 gomp_mutex_lock (&devicep
->lock
);
1817 if (devicep
->type
== target_type
1818 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1819 gomp_unload_image_from_device (devicep
, version
,
1820 host_table
, target_data
);
1821 gomp_mutex_unlock (&devicep
->lock
);
1824 /* Remove image from array of pending images. */
1825 for (i
= 0; i
< num_offload_images
; i
++)
1826 if (offload_images
[i
].target_data
== target_data
)
1828 offload_images
[i
] = offload_images
[--num_offload_images
];
1832 gomp_mutex_unlock (®ister_lock
);
1836 GOMP_offload_unregister (const void *host_table
, int target_type
,
1837 const void *target_data
)
1839 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1842 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1843 must be locked on entry, and remains locked on return. */
1845 attribute_hidden
void
1846 gomp_init_device (struct gomp_device_descr
*devicep
)
1849 if (!devicep
->init_device_func (devicep
->target_id
))
1851 gomp_mutex_unlock (&devicep
->lock
);
1852 gomp_fatal ("device initialization failed");
1855 /* Load to device all images registered by the moment. */
1856 for (i
= 0; i
< num_offload_images
; i
++)
1858 struct offload_image_descr
*image
= &offload_images
[i
];
1859 if (image
->type
== devicep
->type
)
1860 gomp_load_image_to_device (devicep
, image
->version
,
1861 image
->host_table
, image
->target_data
,
1865 /* Initialize OpenACC asynchronous queues. */
1866 goacc_init_asyncqueues (devicep
);
1868 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1871 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1872 must be locked on entry, and remains locked on return. */
1874 attribute_hidden
bool
1875 gomp_fini_device (struct gomp_device_descr
*devicep
)
1877 bool ret
= goacc_fini_asyncqueues (devicep
);
1878 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1879 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1883 attribute_hidden
void
1884 gomp_unload_device (struct gomp_device_descr
*devicep
)
1886 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1890 /* Unload from device all images registered at the moment. */
1891 for (i
= 0; i
< num_offload_images
; i
++)
1893 struct offload_image_descr
*image
= &offload_images
[i
];
1894 if (image
->type
== devicep
->type
)
1895 gomp_unload_image_from_device (devicep
, image
->version
,
1897 image
->target_data
);
1902 /* Host fallback for GOMP_target{,_ext} routines. */
1905 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1907 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1909 memset (thr
, '\0', sizeof (*thr
));
1910 if (gomp_places_list
)
1912 thr
->place
= old_thr
.place
;
1913 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1916 gomp_free_thread (thr
);
1920 /* Calculate alignment and size requirements of a private copy of data shared
1921 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1924 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1925 unsigned short *kinds
, size_t *tgt_align
,
1929 for (i
= 0; i
< mapnum
; i
++)
1930 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1932 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1933 if (*tgt_align
< align
)
1935 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1936 *tgt_size
+= sizes
[i
];
1940 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1943 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1944 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1947 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1949 tgt
+= tgt_align
- al
;
1952 for (i
= 0; i
< mapnum
; i
++)
1953 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1955 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1956 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1957 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1958 hostaddrs
[i
] = tgt
+ tgt_size
;
1959 tgt_size
= tgt_size
+ sizes
[i
];
1963 /* Helper function of GOMP_target{,_ext} routines. */
1966 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1967 void (*host_fn
) (void *))
1969 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1970 return (void *) host_fn
;
1973 gomp_mutex_lock (&devicep
->lock
);
1974 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1976 gomp_mutex_unlock (&devicep
->lock
);
1980 struct splay_tree_key_s k
;
1981 k
.host_start
= (uintptr_t) host_fn
;
1982 k
.host_end
= k
.host_start
+ 1;
1983 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1984 gomp_mutex_unlock (&devicep
->lock
);
1988 return (void *) tgt_fn
->tgt_offset
;
1992 /* Called when encountering a target directive. If DEVICE
1993 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1994 GOMP_DEVICE_HOST_FALLBACK (or any value
1995 larger than last available hw device), use host fallback.
1996 FN is address of host code, UNUSED is part of the current ABI, but
1997 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1998 with MAPNUM entries, with addresses of the host objects,
1999 sizes of the host objects (resp. for pointer kind pointer bias
2000 and assumed sizeof (void *) size) and kinds. */
2003 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2004 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2005 unsigned char *kinds
)
2007 struct gomp_device_descr
*devicep
= resolve_device (device
);
2011 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2012 /* All shared memory devices should use the GOMP_target_ext function. */
2013 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2014 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2015 return gomp_target_fallback (fn
, hostaddrs
);
2017 struct target_mem_desc
*tgt_vars
2018 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2019 GOMP_MAP_VARS_TARGET
);
2020 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2022 gomp_unmap_vars (tgt_vars
, true);
2025 static inline unsigned int
2026 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2028 /* If we cannot run asynchronously, simply ignore nowait. */
2029 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2030 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2035 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2036 and several arguments have been added:
2037 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2038 DEPEND is array of dependencies, see GOMP_task for details.
2040 ARGS is a pointer to an array consisting of a variable number of both
2041 device-independent and device-specific arguments, which can take one two
2042 elements where the first specifies for which device it is intended, the type
2043 and optionally also the value. If the value is not present in the first
2044 one, the whole second element the actual value. The last element of the
2045 array is a single NULL. Among the device independent can be for example
2046 NUM_TEAMS and THREAD_LIMIT.
2048 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2049 that value, or 1 if teams construct is not present, or 0, if
2050 teams construct does not have num_teams clause and so the choice is
2051 implementation defined, and -1 if it can't be determined on the host
2052 what value will GOMP_teams have on the device.
2053 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2054 body with that value, or 0, if teams construct does not have thread_limit
2055 clause or the teams construct is not present, or -1 if it can't be
2056 determined on the host what value will GOMP_teams have on the device. */
2059 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2060 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2061 unsigned int flags
, void **depend
, void **args
)
2063 struct gomp_device_descr
*devicep
= resolve_device (device
);
2064 size_t tgt_align
= 0, tgt_size
= 0;
2065 bool fpc_done
= false;
2067 flags
= clear_unsupported_flags (devicep
, flags
);
2069 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2071 struct gomp_thread
*thr
= gomp_thread ();
2072 /* Create a team if we don't have any around, as nowait
2073 target tasks make sense to run asynchronously even when
2074 outside of any parallel. */
2075 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2077 struct gomp_team
*team
= gomp_new_team (1);
2078 struct gomp_task
*task
= thr
->task
;
2079 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2080 team
->prev_ts
= thr
->ts
;
2081 thr
->ts
.team
= team
;
2082 thr
->ts
.team_id
= 0;
2083 thr
->ts
.work_share
= &team
->work_shares
[0];
2084 thr
->ts
.last_work_share
= NULL
;
2085 #ifdef HAVE_SYNC_BUILTINS
2086 thr
->ts
.single_count
= 0;
2088 thr
->ts
.static_trip
= 0;
2089 thr
->task
= &team
->implicit_task
[0];
2090 gomp_init_task (thr
->task
, NULL
, icv
);
2096 thr
->task
= &team
->implicit_task
[0];
2099 pthread_setspecific (gomp_thread_destructor
, thr
);
2102 && !thr
->task
->final_task
)
2104 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2105 sizes
, kinds
, flags
, depend
, args
,
2106 GOMP_TARGET_TASK_BEFORE_MAP
);
2111 /* If there are depend clauses, but nowait is not present
2112 (or we are in a final task), block the parent task until the
2113 dependencies are resolved and then just continue with the rest
2114 of the function as if it is a merged task. */
2117 struct gomp_thread
*thr
= gomp_thread ();
2118 if (thr
->task
&& thr
->task
->depend_hash
)
2120 /* If we might need to wait, copy firstprivate now. */
2121 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2122 &tgt_align
, &tgt_size
);
2125 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2126 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2127 tgt_align
, tgt_size
);
2130 gomp_task_maybe_wait_for_dependencies (depend
);
2136 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2137 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2138 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2142 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2143 &tgt_align
, &tgt_size
);
2146 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2147 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2148 tgt_align
, tgt_size
);
2151 gomp_target_fallback (fn
, hostaddrs
);
2155 struct target_mem_desc
*tgt_vars
;
2156 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2160 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2161 &tgt_align
, &tgt_size
);
2164 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2165 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2166 tgt_align
, tgt_size
);
2172 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2173 true, GOMP_MAP_VARS_TARGET
);
2174 devicep
->run_func (devicep
->target_id
, fn_addr
,
2175 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2178 gomp_unmap_vars (tgt_vars
, true);
2181 /* Host fallback for GOMP_target_data{,_ext} routines. */
2184 gomp_target_data_fallback (void)
2186 struct gomp_task_icv
*icv
= gomp_icv (false);
2187 if (icv
->target_data
)
2189 /* Even when doing a host fallback, if there are any active
2190 #pragma omp target data constructs, need to remember the
2191 new #pragma omp target data, otherwise GOMP_target_end_data
2192 would get out of sync. */
2193 struct target_mem_desc
*tgt
2194 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2195 GOMP_MAP_VARS_DATA
);
2196 tgt
->prev
= icv
->target_data
;
2197 icv
->target_data
= tgt
;
2202 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2203 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2205 struct gomp_device_descr
*devicep
= resolve_device (device
);
2208 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2209 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2210 return gomp_target_data_fallback ();
2212 struct target_mem_desc
*tgt
2213 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2214 GOMP_MAP_VARS_DATA
);
2215 struct gomp_task_icv
*icv
= gomp_icv (true);
2216 tgt
->prev
= icv
->target_data
;
2217 icv
->target_data
= tgt
;
2221 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2222 size_t *sizes
, unsigned short *kinds
)
2224 struct gomp_device_descr
*devicep
= resolve_device (device
);
2227 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2228 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2229 return gomp_target_data_fallback ();
2231 struct target_mem_desc
*tgt
2232 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2233 GOMP_MAP_VARS_DATA
);
2234 struct gomp_task_icv
*icv
= gomp_icv (true);
2235 tgt
->prev
= icv
->target_data
;
2236 icv
->target_data
= tgt
;
2240 GOMP_target_end_data (void)
2242 struct gomp_task_icv
*icv
= gomp_icv (false);
2243 if (icv
->target_data
)
2245 struct target_mem_desc
*tgt
= icv
->target_data
;
2246 icv
->target_data
= tgt
->prev
;
2247 gomp_unmap_vars (tgt
, true);
2252 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2253 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2255 struct gomp_device_descr
*devicep
= resolve_device (device
);
2258 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2259 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2262 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2266 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2267 size_t *sizes
, unsigned short *kinds
,
2268 unsigned int flags
, void **depend
)
2270 struct gomp_device_descr
*devicep
= resolve_device (device
);
2272 /* If there are depend clauses, but nowait is not present,
2273 block the parent task until the dependencies are resolved
2274 and then just continue with the rest of the function as if it
2275 is a merged task. Until we are able to schedule task during
2276 variable mapping or unmapping, ignore nowait if depend clauses
2280 struct gomp_thread
*thr
= gomp_thread ();
2281 if (thr
->task
&& thr
->task
->depend_hash
)
2283 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2285 && !thr
->task
->final_task
)
2287 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2288 mapnum
, hostaddrs
, sizes
, kinds
,
2289 flags
| GOMP_TARGET_FLAG_UPDATE
,
2290 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2295 struct gomp_team
*team
= thr
->ts
.team
;
2296 /* If parallel or taskgroup has been cancelled, don't start new
2298 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2300 if (gomp_team_barrier_cancelled (&team
->barrier
))
2302 if (thr
->task
->taskgroup
)
2304 if (thr
->task
->taskgroup
->cancelled
)
2306 if (thr
->task
->taskgroup
->workshare
2307 && thr
->task
->taskgroup
->prev
2308 && thr
->task
->taskgroup
->prev
->cancelled
)
2313 gomp_task_maybe_wait_for_dependencies (depend
);
2319 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2320 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2323 struct gomp_thread
*thr
= gomp_thread ();
2324 struct gomp_team
*team
= thr
->ts
.team
;
2325 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2326 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2328 if (gomp_team_barrier_cancelled (&team
->barrier
))
2330 if (thr
->task
->taskgroup
)
2332 if (thr
->task
->taskgroup
->cancelled
)
2334 if (thr
->task
->taskgroup
->workshare
2335 && thr
->task
->taskgroup
->prev
2336 && thr
->task
->taskgroup
->prev
->cancelled
)
2341 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2345 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2346 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2348 const int typemask
= 0xff;
2350 gomp_mutex_lock (&devicep
->lock
);
2351 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2353 gomp_mutex_unlock (&devicep
->lock
);
2357 for (i
= 0; i
< mapnum
; i
++)
2359 struct splay_tree_key_s cur_node
;
2360 unsigned char kind
= kinds
[i
] & typemask
;
2364 case GOMP_MAP_ALWAYS_FROM
:
2365 case GOMP_MAP_DELETE
:
2366 case GOMP_MAP_RELEASE
:
2367 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2368 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2369 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2370 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2371 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2372 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2373 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2374 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2378 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2380 if ((kind
== GOMP_MAP_DELETE
2381 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2382 && k
->refcount
!= REFCOUNT_INFINITY
)
2385 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2386 || kind
== GOMP_MAP_ALWAYS_FROM
)
2387 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2388 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2389 + cur_node
.host_start
2391 cur_node
.host_end
- cur_node
.host_start
);
2392 if (k
->refcount
== 0)
2393 gomp_remove_var (devicep
, k
);
2397 gomp_mutex_unlock (&devicep
->lock
);
2398 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2403 gomp_mutex_unlock (&devicep
->lock
);
2407 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2408 size_t *sizes
, unsigned short *kinds
,
2409 unsigned int flags
, void **depend
)
2411 struct gomp_device_descr
*devicep
= resolve_device (device
);
2413 /* If there are depend clauses, but nowait is not present,
2414 block the parent task until the dependencies are resolved
2415 and then just continue with the rest of the function as if it
2416 is a merged task. Until we are able to schedule task during
2417 variable mapping or unmapping, ignore nowait if depend clauses
2421 struct gomp_thread
*thr
= gomp_thread ();
2422 if (thr
->task
&& thr
->task
->depend_hash
)
2424 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2426 && !thr
->task
->final_task
)
2428 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2429 mapnum
, hostaddrs
, sizes
, kinds
,
2430 flags
, depend
, NULL
,
2431 GOMP_TARGET_TASK_DATA
))
2436 struct gomp_team
*team
= thr
->ts
.team
;
2437 /* If parallel or taskgroup has been cancelled, don't start new
2439 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2441 if (gomp_team_barrier_cancelled (&team
->barrier
))
2443 if (thr
->task
->taskgroup
)
2445 if (thr
->task
->taskgroup
->cancelled
)
2447 if (thr
->task
->taskgroup
->workshare
2448 && thr
->task
->taskgroup
->prev
2449 && thr
->task
->taskgroup
->prev
->cancelled
)
2454 gomp_task_maybe_wait_for_dependencies (depend
);
2460 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2461 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2464 struct gomp_thread
*thr
= gomp_thread ();
2465 struct gomp_team
*team
= thr
->ts
.team
;
2466 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2467 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2469 if (gomp_team_barrier_cancelled (&team
->barrier
))
2471 if (thr
->task
->taskgroup
)
2473 if (thr
->task
->taskgroup
->cancelled
)
2475 if (thr
->task
->taskgroup
->workshare
2476 && thr
->task
->taskgroup
->prev
2477 && thr
->task
->taskgroup
->prev
->cancelled
)
2483 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2484 for (i
= 0; i
< mapnum
; i
++)
2485 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2487 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2488 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2492 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2493 true, GOMP_MAP_VARS_ENTER_DATA
);
2495 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2499 gomp_target_task_fn (void *data
)
2501 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2502 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2504 if (ttask
->fn
!= NULL
)
2508 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2509 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2510 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2512 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2513 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2517 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2520 gomp_unmap_vars (ttask
->tgt
, true);
2524 void *actual_arguments
;
2525 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2528 actual_arguments
= ttask
->hostaddrs
;
2532 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2533 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2534 GOMP_MAP_VARS_TARGET
);
2535 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2537 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2539 assert (devicep
->async_run_func
);
2540 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2541 ttask
->args
, (void *) ttask
);
2544 else if (devicep
== NULL
2545 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2546 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2550 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2551 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2552 ttask
->kinds
, true);
2553 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2554 for (i
= 0; i
< ttask
->mapnum
; i
++)
2555 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2557 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2558 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2559 GOMP_MAP_VARS_ENTER_DATA
);
2560 i
+= ttask
->sizes
[i
];
2563 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2564 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2566 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2572 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2576 struct gomp_task_icv
*icv
= gomp_icv (true);
2577 icv
->thread_limit_var
2578 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2584 omp_target_alloc (size_t size
, int device_num
)
2586 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2587 return malloc (size
);
2592 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2593 if (devicep
== NULL
)
2596 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2597 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2598 return malloc (size
);
2600 gomp_mutex_lock (&devicep
->lock
);
2601 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2602 gomp_mutex_unlock (&devicep
->lock
);
2607 omp_target_free (void *device_ptr
, int device_num
)
2609 if (device_ptr
== NULL
)
2612 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2621 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2622 if (devicep
== NULL
)
2625 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2626 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2632 gomp_mutex_lock (&devicep
->lock
);
2633 gomp_free_device_memory (devicep
, device_ptr
);
2634 gomp_mutex_unlock (&devicep
->lock
);
2638 omp_target_is_present (const void *ptr
, int device_num
)
2643 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2649 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2650 if (devicep
== NULL
)
2653 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2654 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2657 gomp_mutex_lock (&devicep
->lock
);
2658 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2659 struct splay_tree_key_s cur_node
;
2661 cur_node
.host_start
= (uintptr_t) ptr
;
2662 cur_node
.host_end
= cur_node
.host_start
;
2663 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2664 int ret
= n
!= NULL
;
2665 gomp_mutex_unlock (&devicep
->lock
);
2670 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2671 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2674 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2677 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2679 if (dst_device_num
< 0)
2682 dst_devicep
= resolve_device (dst_device_num
);
2683 if (dst_devicep
== NULL
)
2686 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2687 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2690 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2692 if (src_device_num
< 0)
2695 src_devicep
= resolve_device (src_device_num
);
2696 if (src_devicep
== NULL
)
2699 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2700 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2703 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2705 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2708 if (src_devicep
== NULL
)
2710 gomp_mutex_lock (&dst_devicep
->lock
);
2711 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2712 (char *) dst
+ dst_offset
,
2713 (char *) src
+ src_offset
, length
);
2714 gomp_mutex_unlock (&dst_devicep
->lock
);
2715 return (ret
? 0 : EINVAL
);
2717 if (dst_devicep
== NULL
)
2719 gomp_mutex_lock (&src_devicep
->lock
);
2720 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2721 (char *) dst
+ dst_offset
,
2722 (char *) src
+ src_offset
, length
);
2723 gomp_mutex_unlock (&src_devicep
->lock
);
2724 return (ret
? 0 : EINVAL
);
2726 if (src_devicep
== dst_devicep
)
2728 gomp_mutex_lock (&src_devicep
->lock
);
2729 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2730 (char *) dst
+ dst_offset
,
2731 (char *) src
+ src_offset
, length
);
2732 gomp_mutex_unlock (&src_devicep
->lock
);
2733 return (ret
? 0 : EINVAL
);
2739 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2740 int num_dims
, const size_t *volume
,
2741 const size_t *dst_offsets
,
2742 const size_t *src_offsets
,
2743 const size_t *dst_dimensions
,
2744 const size_t *src_dimensions
,
2745 struct gomp_device_descr
*dst_devicep
,
2746 struct gomp_device_descr
*src_devicep
)
2748 size_t dst_slice
= element_size
;
2749 size_t src_slice
= element_size
;
2750 size_t j
, dst_off
, src_off
, length
;
2755 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2756 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2757 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2759 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2761 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2765 else if (src_devicep
== NULL
)
2766 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2767 (char *) dst
+ dst_off
,
2768 (const char *) src
+ src_off
,
2770 else if (dst_devicep
== NULL
)
2771 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2772 (char *) dst
+ dst_off
,
2773 (const char *) src
+ src_off
,
2775 else if (src_devicep
== dst_devicep
)
2776 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2777 (char *) dst
+ dst_off
,
2778 (const char *) src
+ src_off
,
2782 return ret
? 0 : EINVAL
;
2785 /* FIXME: it would be nice to have some plugin function to handle
2786 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2787 be handled in the generic recursion below, and for host-host it
2788 should be used even for any num_dims >= 2. */
2790 for (i
= 1; i
< num_dims
; i
++)
2791 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2792 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2794 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2795 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2797 for (j
= 0; j
< volume
[0]; j
++)
2799 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2800 (const char *) src
+ src_off
,
2801 element_size
, num_dims
- 1,
2802 volume
+ 1, dst_offsets
+ 1,
2803 src_offsets
+ 1, dst_dimensions
+ 1,
2804 src_dimensions
+ 1, dst_devicep
,
2808 dst_off
+= dst_slice
;
2809 src_off
+= src_slice
;
2815 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2816 int num_dims
, const size_t *volume
,
2817 const size_t *dst_offsets
,
2818 const size_t *src_offsets
,
2819 const size_t *dst_dimensions
,
2820 const size_t *src_dimensions
,
2821 int dst_device_num
, int src_device_num
)
2823 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2828 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2830 if (dst_device_num
< 0)
2833 dst_devicep
= resolve_device (dst_device_num
);
2834 if (dst_devicep
== NULL
)
2837 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2838 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2841 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2843 if (src_device_num
< 0)
2846 src_devicep
= resolve_device (src_device_num
);
2847 if (src_devicep
== NULL
)
2850 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2851 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2855 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2859 gomp_mutex_lock (&src_devicep
->lock
);
2860 else if (dst_devicep
)
2861 gomp_mutex_lock (&dst_devicep
->lock
);
2862 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2863 volume
, dst_offsets
, src_offsets
,
2864 dst_dimensions
, src_dimensions
,
2865 dst_devicep
, src_devicep
);
2867 gomp_mutex_unlock (&src_devicep
->lock
);
2868 else if (dst_devicep
)
2869 gomp_mutex_unlock (&dst_devicep
->lock
);
2874 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2875 size_t size
, size_t device_offset
, int device_num
)
2877 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2883 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2884 if (devicep
== NULL
)
2887 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2888 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2891 gomp_mutex_lock (&devicep
->lock
);
2893 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2894 struct splay_tree_key_s cur_node
;
2897 cur_node
.host_start
= (uintptr_t) host_ptr
;
2898 cur_node
.host_end
= cur_node
.host_start
+ size
;
2899 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2902 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2903 == (uintptr_t) device_ptr
+ device_offset
2904 && n
->host_start
<= cur_node
.host_start
2905 && n
->host_end
>= cur_node
.host_end
)
2910 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2911 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2915 tgt
->to_free
= NULL
;
2917 tgt
->list_count
= 0;
2918 tgt
->device_descr
= devicep
;
2919 splay_tree_node array
= tgt
->array
;
2920 splay_tree_key k
= &array
->key
;
2921 k
->host_start
= cur_node
.host_start
;
2922 k
->host_end
= cur_node
.host_end
;
2924 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2925 k
->refcount
= REFCOUNT_INFINITY
;
2926 k
->virtual_refcount
= 0;
2929 array
->right
= NULL
;
2930 splay_tree_insert (&devicep
->mem_map
, array
);
2933 gomp_mutex_unlock (&devicep
->lock
);
2938 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2940 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2946 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2947 if (devicep
== NULL
)
2950 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2953 gomp_mutex_lock (&devicep
->lock
);
2955 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2956 struct splay_tree_key_s cur_node
;
2959 cur_node
.host_start
= (uintptr_t) ptr
;
2960 cur_node
.host_end
= cur_node
.host_start
;
2961 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2963 && n
->host_start
== cur_node
.host_start
2964 && n
->refcount
== REFCOUNT_INFINITY
2965 && n
->tgt
->tgt_start
== 0
2966 && n
->tgt
->to_free
== NULL
2967 && n
->tgt
->refcount
== 1
2968 && n
->tgt
->list_count
== 0)
2970 splay_tree_remove (&devicep
->mem_map
, n
);
2971 gomp_unmap_tgt (n
->tgt
);
2975 gomp_mutex_unlock (&devicep
->lock
);
2980 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2983 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2984 return gomp_pause_host ();
2985 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2987 /* Do nothing for target devices for now. */
2992 omp_pause_resource_all (omp_pause_resource_t kind
)
2995 if (gomp_pause_host ())
2997 /* Do nothing for target devices for now. */
3001 ialias (omp_pause_resource
)
3002 ialias (omp_pause_resource_all
)
3004 #ifdef PLUGIN_SUPPORT
3006 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3008 The handles of the found functions are stored in the corresponding fields
3009 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3012 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3013 const char *plugin_name
)
3015 const char *err
= NULL
, *last_missing
= NULL
;
3017 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3021 /* Check if all required functions are available in the plugin and store
3022 their handlers. None of the symbols can legitimately be NULL,
3023 so we don't need to check dlerror all the time. */
3025 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3027 /* Similar, but missing functions are not an error. Return false if
3028 failed, true otherwise. */
3029 #define DLSYM_OPT(f, n) \
3030 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3031 || (last_missing = #n, 0))
3034 if (device
->version_func () != GOMP_VERSION
)
3036 err
= "plugin version mismatch";
3043 DLSYM (get_num_devices
);
3044 DLSYM (init_device
);
3045 DLSYM (fini_device
);
3047 DLSYM (unload_image
);
3052 device
->capabilities
= device
->get_caps_func ();
3053 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3056 DLSYM_OPT (async_run
, async_run
);
3057 DLSYM_OPT (can_run
, can_run
);
3060 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3062 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3063 || !DLSYM_OPT (openacc
.create_thread_data
,
3064 openacc_create_thread_data
)
3065 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3066 openacc_destroy_thread_data
)
3067 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3068 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3069 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3070 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3071 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3072 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3073 openacc_async_queue_callback
)
3074 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3075 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3076 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3077 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3079 /* Require all the OpenACC handlers if we have
3080 GOMP_OFFLOAD_CAP_OPENACC_200. */
3081 err
= "plugin missing OpenACC handler function";
3086 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3087 openacc_cuda_get_current_device
);
3088 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3089 openacc_cuda_get_current_context
);
3090 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3091 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3092 if (cuda
&& cuda
!= 4)
3094 /* Make sure all the CUDA functions are there if any of them are. */
3095 err
= "plugin missing OpenACC CUDA handler function";
3107 gomp_error ("while loading %s: %s", plugin_name
, err
);
3109 gomp_error ("missing function was %s", last_missing
);
3111 dlclose (plugin_handle
);
3116 /* This function finalizes all initialized devices. */
3119 gomp_target_fini (void)
3122 for (i
= 0; i
< num_devices
; i
++)
3125 struct gomp_device_descr
*devicep
= &devices
[i
];
3126 gomp_mutex_lock (&devicep
->lock
);
3127 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3128 ret
= gomp_fini_device (devicep
);
3129 gomp_mutex_unlock (&devicep
->lock
);
3131 gomp_fatal ("device finalization failed");
3135 /* This function initializes the runtime for offloading.
3136 It parses the list of offload plugins, and tries to load these.
3137 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3138 will be set, and the array DEVICES initialized, containing descriptors for
3139 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3143 gomp_target_init (void)
3145 const char *prefix
="libgomp-plugin-";
3146 const char *suffix
= SONAME_SUFFIX (1);
3147 const char *cur
, *next
;
3149 int i
, new_num_devices
;
3154 cur
= OFFLOAD_PLUGINS
;
3158 struct gomp_device_descr current_device
;
3159 size_t prefix_len
, suffix_len
, cur_len
;
3161 next
= strchr (cur
, ',');
3163 prefix_len
= strlen (prefix
);
3164 cur_len
= next
? next
- cur
: strlen (cur
);
3165 suffix_len
= strlen (suffix
);
3167 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3174 memcpy (plugin_name
, prefix
, prefix_len
);
3175 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3176 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3178 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3180 new_num_devices
= current_device
.get_num_devices_func ();
3181 if (new_num_devices
>= 1)
3183 /* Augment DEVICES and NUM_DEVICES. */
3185 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
3186 * sizeof (struct gomp_device_descr
));
3194 current_device
.name
= current_device
.get_name_func ();
3195 /* current_device.capabilities has already been set. */
3196 current_device
.type
= current_device
.get_type_func ();
3197 current_device
.mem_map
.root
= NULL
;
3198 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3199 for (i
= 0; i
< new_num_devices
; i
++)
3201 current_device
.target_id
= i
;
3202 devices
[num_devices
] = current_device
;
3203 gomp_mutex_init (&devices
[num_devices
].lock
);
3214 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3215 NUM_DEVICES_OPENMP. */
3216 struct gomp_device_descr
*devices_s
3217 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
3224 num_devices_openmp
= 0;
3225 for (i
= 0; i
< num_devices
; i
++)
3226 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3227 devices_s
[num_devices_openmp
++] = devices
[i
];
3228 int num_devices_after_openmp
= num_devices_openmp
;
3229 for (i
= 0; i
< num_devices
; i
++)
3230 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3231 devices_s
[num_devices_after_openmp
++] = devices
[i
];
3233 devices
= devices_s
;
3235 for (i
= 0; i
< num_devices
; i
++)
3237 /* The 'devices' array can be moved (by the realloc call) until we have
3238 found all the plugins, so registering with the OpenACC runtime (which
3239 takes a copy of the pointer argument) must be delayed until now. */
3240 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3241 goacc_register (&devices
[i
]);
3244 if (atexit (gomp_target_fini
) != 0)
3245 gomp_fatal ("atexit failed");
3248 #else /* PLUGIN_SUPPORT */
3249 /* If dlfcn.h is unavailable we always fallback to host execution.
3250 GOMP_target* routines are just stubs for this case. */
3252 gomp_target_init (void)
3255 #endif /* PLUGIN_SUPPORT */