1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
121 gomp_mutex_lock (&devices
[device_id
].lock
);
122 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
123 gomp_init_device (&devices
[device_id
]);
124 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
126 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 gomp_mutex_unlock (&devices
[device_id
].lock
);
131 return &devices
[device_id
];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
138 if (key
->host_start
!= key
->host_end
)
139 return splay_tree_lookup (mem_map
, key
);
142 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
147 n
= splay_tree_lookup (mem_map
, key
);
151 return splay_tree_lookup (mem_map
, key
);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
157 if (key
->host_start
!= key
->host_end
)
158 return splay_tree_lookup (mem_map
, key
);
161 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
167 gomp_device_copy (struct gomp_device_descr
*devicep
,
168 bool (*copy_func
) (int, void *, const void *, size_t),
169 const char *dst
, void *dstaddr
,
170 const char *src
, const void *srcaddr
,
173 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
175 gomp_mutex_unlock (&devicep
->lock
);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
182 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
183 bool (*copy_func
) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue
*),
185 const char *dst
, void *dstaddr
,
186 const char *src
, const void *srcaddr
,
187 size_t size
, struct goacc_asyncqueue
*aq
)
189 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
191 gomp_mutex_unlock (&devicep
->lock
);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
211 struct target_mem_desc
*tgt
;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk
*chunks
;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
242 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
246 if (cbuf
->chunk_cnt
< 0)
248 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
250 cbuf
->chunk_cnt
= -1;
253 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
255 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf
->use_cnt
== 1)
265 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
266 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
275 gomp_to_device_kind_p (int kind
)
281 case GOMP_MAP_FORCE_ALLOC
:
282 case GOMP_MAP_FORCE_FROM
:
283 case GOMP_MAP_ALWAYS_FROM
:
290 attribute_hidden
void
291 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
292 struct goacc_asyncqueue
*aq
,
293 void *d
, const void *h
, size_t sz
,
294 struct gomp_coalesce_buf
*cbuf
)
298 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
299 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
302 long last
= cbuf
->chunk_cnt
- 1;
303 while (first
<= last
)
305 long middle
= (first
+ last
) >> 1;
306 if (cbuf
->chunks
[middle
].end
<= doff
)
308 else if (cbuf
->chunks
[middle
].start
<= doff
)
310 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
321 if (__builtin_expect (aq
!= NULL
, 0))
322 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
323 "dev", d
, "host", h
, sz
, aq
);
325 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
328 attribute_hidden
void
329 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
330 struct goacc_asyncqueue
*aq
,
331 void *h
, const void *d
, size_t sz
)
333 if (__builtin_expect (aq
!= NULL
, 0))
334 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
335 "host", h
, "dev", d
, sz
, aq
);
337 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
341 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
343 if (!devicep
->free_func (devicep
->target_id
, devptr
))
345 gomp_mutex_unlock (&devicep
->lock
);
346 gomp_fatal ("error in freeing device memory block at %p", devptr
);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
355 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
356 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
357 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
358 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
361 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
362 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
363 tgt_var
->do_detach
= kind
== GOMP_MAP_ATTACH
;
364 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
365 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
367 if ((kind
& GOMP_MAP_FLAG_FORCE
)
368 || oldn
->host_start
> newn
->host_start
369 || oldn
->host_end
< newn
->host_end
)
371 gomp_mutex_unlock (&devicep
->lock
);
372 gomp_fatal ("Trying to map into device [%p..%p) object when "
373 "[%p..%p) is already mapped",
374 (void *) newn
->host_start
, (void *) newn
->host_end
,
375 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
378 if (GOMP_MAP_ALWAYS_TO_P (kind
))
379 gomp_copy_host2dev (devicep
, aq
,
380 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
381 + newn
->host_start
- oldn
->host_start
),
382 (void *) newn
->host_start
,
383 newn
->host_end
- newn
->host_start
, cbuf
);
385 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
390 get_kind (bool short_mapkind
, void *kinds
, int idx
)
392 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
393 : ((unsigned char *) kinds
)[idx
];
397 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
398 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
399 struct gomp_coalesce_buf
*cbuf
)
401 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
402 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
403 struct splay_tree_key_s cur_node
;
405 cur_node
.host_start
= host_ptr
;
406 if (cur_node
.host_start
== (uintptr_t) NULL
)
408 cur_node
.tgt_offset
= (uintptr_t) NULL
;
409 gomp_copy_host2dev (devicep
, aq
,
410 (void *) (tgt
->tgt_start
+ target_offset
),
411 (void *) &cur_node
.tgt_offset
,
412 sizeof (void *), cbuf
);
415 /* Add bias to the pointer value. */
416 cur_node
.host_start
+= bias
;
417 cur_node
.host_end
= cur_node
.host_start
;
418 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
421 gomp_mutex_unlock (&devicep
->lock
);
422 gomp_fatal ("Pointer target of array section wasn't mapped");
424 cur_node
.host_start
-= n
->host_start
;
426 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
427 /* At this point tgt_offset is target address of the
428 array section. Now subtract bias to get what we want
429 to initialize the pointer with. */
430 cur_node
.tgt_offset
-= bias
;
431 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
432 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
436 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
437 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
438 size_t first
, size_t i
, void **hostaddrs
,
439 size_t *sizes
, void *kinds
,
440 struct gomp_coalesce_buf
*cbuf
)
442 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
443 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
444 struct splay_tree_key_s cur_node
;
446 const bool short_mapkind
= true;
447 const int typemask
= short_mapkind
? 0xff : 0x7;
449 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
450 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
451 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
452 kind
= get_kind (short_mapkind
, kinds
, i
);
455 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
457 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
458 &tgt
->list
[i
], kind
& typemask
, cbuf
);
463 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
465 cur_node
.host_start
--;
466 n2
= splay_tree_lookup (mem_map
, &cur_node
);
467 cur_node
.host_start
++;
470 && n2
->host_start
- n
->host_start
471 == n2
->tgt_offset
- n
->tgt_offset
)
473 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
474 &tgt
->list
[i
], kind
& typemask
, cbuf
);
479 n2
= splay_tree_lookup (mem_map
, &cur_node
);
483 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
485 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
486 kind
& typemask
, cbuf
);
490 gomp_mutex_unlock (&devicep
->lock
);
491 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
492 "other mapped elements from the same structure weren't mapped "
493 "together with it", (void *) cur_node
.host_start
,
494 (void *) cur_node
.host_end
);
497 attribute_hidden
void
498 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
499 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
500 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
501 struct gomp_coalesce_buf
*cbufp
)
503 struct splay_tree_key_s s
;
508 gomp_mutex_unlock (&devicep
->lock
);
509 gomp_fatal ("enclosing struct not mapped for attach");
512 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
513 /* We might have a pointer in a packed struct: however we cannot have more
514 than one such pointer in each pointer-sized portion of the struct, so
516 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
519 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
521 if (!n
->aux
->attach_count
)
523 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
525 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
526 n
->aux
->attach_count
[idx
]++;
529 gomp_mutex_unlock (&devicep
->lock
);
530 gomp_fatal ("attach count overflow");
533 if (n
->aux
->attach_count
[idx
] == 1)
535 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
537 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
541 if ((void *) target
== NULL
)
543 gomp_mutex_unlock (&devicep
->lock
);
544 gomp_fatal ("attempt to attach null pointer");
547 s
.host_start
= target
+ bias
;
548 s
.host_end
= s
.host_start
+ 1;
549 tn
= splay_tree_lookup (mem_map
, &s
);
553 gomp_mutex_unlock (&devicep
->lock
);
554 gomp_fatal ("pointer target not mapped for attach");
557 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
560 "%s: attaching host %p, target %p (struct base %p) to %p\n",
561 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
562 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
564 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
565 sizeof (void *), cbufp
);
568 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
569 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
572 attribute_hidden
void
573 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
574 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
575 uintptr_t detach_from
, bool finalize
,
576 struct gomp_coalesce_buf
*cbufp
)
582 gomp_mutex_unlock (&devicep
->lock
);
583 gomp_fatal ("enclosing struct not mapped for detach");
586 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
588 if (!n
->aux
|| !n
->aux
->attach_count
)
590 gomp_mutex_unlock (&devicep
->lock
);
591 gomp_fatal ("no attachment counters for struct");
595 n
->aux
->attach_count
[idx
] = 1;
597 if (n
->aux
->attach_count
[idx
] == 0)
599 gomp_mutex_unlock (&devicep
->lock
);
600 gomp_fatal ("attach count underflow");
603 n
->aux
->attach_count
[idx
]--;
605 if (n
->aux
->attach_count
[idx
] == 0)
607 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
609 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
612 "%s: detaching host %p, target %p (struct base %p) to %p\n",
613 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
614 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
617 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
618 sizeof (void *), cbufp
);
621 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
622 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
625 attribute_hidden
uintptr_t
626 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
628 if (tgt
->list
[i
].key
!= NULL
)
629 return tgt
->list
[i
].key
->tgt
->tgt_start
630 + tgt
->list
[i
].key
->tgt_offset
631 + tgt
->list
[i
].offset
;
633 switch (tgt
->list
[i
].offset
)
636 return (uintptr_t) hostaddrs
[i
];
642 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
643 + tgt
->list
[i
+ 1].key
->tgt_offset
644 + tgt
->list
[i
+ 1].offset
645 + (uintptr_t) hostaddrs
[i
]
646 - (uintptr_t) hostaddrs
[i
+ 1];
649 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
653 static inline __attribute__((always_inline
)) struct target_mem_desc
*
654 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
655 struct goacc_asyncqueue
*aq
, size_t mapnum
,
656 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
657 void *kinds
, bool short_mapkind
,
658 enum gomp_map_vars_kind pragma_kind
)
660 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
661 bool has_firstprivate
= false;
662 const int rshift
= short_mapkind
? 8 : 3;
663 const int typemask
= short_mapkind
? 0xff : 0x7;
664 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
665 struct splay_tree_key_s cur_node
;
666 struct target_mem_desc
*tgt
667 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
668 tgt
->list_count
= mapnum
;
669 tgt
->refcount
= (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
670 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
) ? 0 : 1;
671 tgt
->device_descr
= devicep
;
673 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
682 tgt_align
= sizeof (void *);
688 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
690 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
691 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
694 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
696 size_t align
= 4 * sizeof (void *);
698 tgt_size
= mapnum
* sizeof (void *);
700 cbuf
.use_cnt
= 1 + (mapnum
> 1);
701 cbuf
.chunks
[0].start
= 0;
702 cbuf
.chunks
[0].end
= tgt_size
;
705 gomp_mutex_lock (&devicep
->lock
);
706 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
708 gomp_mutex_unlock (&devicep
->lock
);
713 for (i
= 0; i
< mapnum
; i
++)
715 int kind
= get_kind (short_mapkind
, kinds
, i
);
716 if (hostaddrs
[i
] == NULL
717 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
719 tgt
->list
[i
].key
= NULL
;
720 tgt
->list
[i
].offset
= OFFSET_INLINED
;
723 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
725 tgt
->list
[i
].key
= NULL
;
728 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
729 on a separate construct prior to using use_device_{addr,ptr}.
730 In OpenMP 5.0, map directives need to be ordered by the
731 middle-end before the use_device_* clauses. If
732 !not_found_cnt, all mappings requested (if any) are already
733 mapped, so use_device_{addr,ptr} can be resolved right away.
734 Otherwise, if not_found_cnt, gomp_map_lookup might fail
735 now but would succeed after performing the mappings in the
736 following loop. We can't defer this always to the second
737 loop, because it is not even invoked when !not_found_cnt
738 after the first loop. */
739 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
740 cur_node
.host_end
= cur_node
.host_start
;
741 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
744 gomp_mutex_unlock (&devicep
->lock
);
745 gomp_fatal ("use_device_ptr pointer wasn't mapped");
747 cur_node
.host_start
-= n
->host_start
;
749 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
750 + cur_node
.host_start
);
751 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
754 tgt
->list
[i
].offset
= 0;
757 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
759 size_t first
= i
+ 1;
760 size_t last
= i
+ sizes
[i
];
761 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
762 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
764 tgt
->list
[i
].key
= NULL
;
765 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
766 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
769 size_t align
= (size_t) 1 << (kind
>> rshift
);
770 if (tgt_align
< align
)
772 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
773 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
774 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
775 not_found_cnt
+= last
- i
;
776 for (i
= first
; i
<= last
; i
++)
778 tgt
->list
[i
].key
= NULL
;
779 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
781 gomp_coalesce_buf_add (&cbuf
,
782 tgt_size
- cur_node
.host_end
783 + (uintptr_t) hostaddrs
[i
],
789 for (i
= first
; i
<= last
; i
++)
790 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
795 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
797 tgt
->list
[i
].key
= NULL
;
798 tgt
->list
[i
].offset
= OFFSET_POINTER
;
799 has_firstprivate
= true;
802 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
804 tgt
->list
[i
].key
= NULL
;
805 has_firstprivate
= true;
808 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
809 if (!GOMP_MAP_POINTER_P (kind
& typemask
)
810 && (kind
& typemask
) != GOMP_MAP_ATTACH
)
811 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
813 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
814 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
816 tgt
->list
[i
].key
= NULL
;
818 size_t align
= (size_t) 1 << (kind
>> rshift
);
819 if (tgt_align
< align
)
821 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
822 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
823 cur_node
.host_end
- cur_node
.host_start
);
824 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
825 has_firstprivate
= true;
829 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
831 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
834 tgt
->list
[i
].key
= NULL
;
835 tgt
->list
[i
].offset
= OFFSET_POINTER
;
840 n
= splay_tree_lookup (mem_map
, &cur_node
);
841 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
842 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
843 kind
& typemask
, NULL
);
846 tgt
->list
[i
].key
= NULL
;
848 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
850 /* Not present, hence, skip entry - including its MAP_POINTER,
852 tgt
->list
[i
].offset
= OFFSET_POINTER
;
854 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
855 == GOMP_MAP_POINTER
))
858 tgt
->list
[i
].key
= NULL
;
859 tgt
->list
[i
].offset
= 0;
863 size_t align
= (size_t) 1 << (kind
>> rshift
);
865 if (tgt_align
< align
)
867 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
868 if (gomp_to_device_kind_p (kind
& typemask
))
869 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
870 cur_node
.host_end
- cur_node
.host_start
);
871 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
872 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
875 for (j
= i
+ 1; j
< mapnum
; j
++)
876 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
879 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
880 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
881 > cur_node
.host_end
))
885 tgt
->list
[j
].key
= NULL
;
896 gomp_mutex_unlock (&devicep
->lock
);
897 gomp_fatal ("unexpected aggregation");
899 tgt
->to_free
= devaddrs
[0];
900 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
901 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
903 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
905 /* Allocate tgt_align aligned tgt_size block of memory. */
906 /* FIXME: Perhaps change interface to allocate properly aligned
908 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
909 tgt_size
+ tgt_align
- 1);
912 gomp_mutex_unlock (&devicep
->lock
);
913 gomp_fatal ("device memory allocation fail");
916 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
917 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
918 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
920 if (cbuf
.use_cnt
== 1)
922 if (cbuf
.chunk_cnt
> 0)
925 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
941 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
942 tgt_size
= mapnum
* sizeof (void *);
945 if (not_found_cnt
|| has_firstprivate
)
948 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
949 splay_tree_node array
= tgt
->array
;
950 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
951 uintptr_t field_tgt_base
= 0;
953 for (i
= 0; i
< mapnum
; i
++)
954 if (tgt
->list
[i
].key
== NULL
)
956 int kind
= get_kind (short_mapkind
, kinds
, i
);
957 if (hostaddrs
[i
] == NULL
)
959 switch (kind
& typemask
)
961 size_t align
, len
, first
, last
;
963 case GOMP_MAP_FIRSTPRIVATE
:
964 align
= (size_t) 1 << (kind
>> rshift
);
965 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
966 tgt
->list
[i
].offset
= tgt_size
;
968 gomp_copy_host2dev (devicep
, aq
,
969 (void *) (tgt
->tgt_start
+ tgt_size
),
970 (void *) hostaddrs
[i
], len
, cbufp
);
973 case GOMP_MAP_FIRSTPRIVATE_INT
:
974 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
976 case GOMP_MAP_USE_DEVICE_PTR
:
977 if (tgt
->list
[i
].offset
== 0)
979 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
980 cur_node
.host_end
= cur_node
.host_start
;
981 n
= gomp_map_lookup (mem_map
, &cur_node
);
984 gomp_mutex_unlock (&devicep
->lock
);
985 gomp_fatal ("use_device_ptr pointer wasn't mapped");
987 cur_node
.host_start
-= n
->host_start
;
989 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
990 + cur_node
.host_start
);
991 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
994 case GOMP_MAP_STRUCT
:
997 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
998 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1000 if (tgt
->list
[first
].key
!= NULL
)
1002 n
= splay_tree_lookup (mem_map
, &cur_node
);
1005 size_t align
= (size_t) 1 << (kind
>> rshift
);
1006 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1007 - (uintptr_t) hostaddrs
[i
];
1008 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1009 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1010 - (uintptr_t) hostaddrs
[i
];
1011 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1012 field_tgt_offset
= tgt_size
;
1013 field_tgt_clear
= last
;
1014 tgt_size
+= cur_node
.host_end
1015 - (uintptr_t) hostaddrs
[first
];
1018 for (i
= first
; i
<= last
; i
++)
1019 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1020 sizes
, kinds
, cbufp
);
1023 case GOMP_MAP_ALWAYS_POINTER
:
1024 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1025 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1026 n
= splay_tree_lookup (mem_map
, &cur_node
);
1028 || n
->host_start
> cur_node
.host_start
1029 || n
->host_end
< cur_node
.host_end
)
1031 gomp_mutex_unlock (&devicep
->lock
);
1032 gomp_fatal ("always pointer not mapped");
1034 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1035 != GOMP_MAP_ALWAYS_POINTER
)
1036 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1037 if (cur_node
.tgt_offset
)
1038 cur_node
.tgt_offset
-= sizes
[i
];
1039 gomp_copy_host2dev (devicep
, aq
,
1040 (void *) (n
->tgt
->tgt_start
1042 + cur_node
.host_start
1044 (void *) &cur_node
.tgt_offset
,
1045 sizeof (void *), cbufp
);
1046 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1047 + cur_node
.host_start
- n
->host_start
;
1049 case GOMP_MAP_IF_PRESENT
:
1050 /* Not present - otherwise handled above. Skip over its
1051 MAP_POINTER as well. */
1053 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1054 == GOMP_MAP_POINTER
))
1057 case GOMP_MAP_ATTACH
:
1059 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1060 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1061 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1064 tgt
->list
[i
].key
= n
;
1065 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1066 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1067 tgt
->list
[i
].copy_from
= false;
1068 tgt
->list
[i
].always_copy_from
= false;
1069 tgt
->list
[i
].do_detach
1070 = (pragma_kind
!= GOMP_MAP_VARS_OPENACC_ENTER_DATA
);
1075 gomp_mutex_unlock (&devicep
->lock
);
1076 gomp_fatal ("outer struct not mapped for attach");
1078 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1079 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1086 splay_tree_key k
= &array
->key
;
1087 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1088 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1089 k
->host_end
= k
->host_start
+ sizes
[i
];
1091 k
->host_end
= k
->host_start
+ sizeof (void *);
1092 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1093 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1094 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1095 kind
& typemask
, cbufp
);
1099 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1101 /* Replace target address of the pointer with target address
1102 of mapped object in the splay tree. */
1103 splay_tree_remove (mem_map
, n
);
1105 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1106 k
->aux
->link_key
= n
;
1108 size_t align
= (size_t) 1 << (kind
>> rshift
);
1109 tgt
->list
[i
].key
= k
;
1111 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1113 k
->tgt_offset
= k
->host_start
- field_tgt_base
1115 if (i
== field_tgt_clear
)
1116 field_tgt_clear
= FIELD_TGT_EMPTY
;
1120 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1121 k
->tgt_offset
= tgt_size
;
1122 tgt_size
+= k
->host_end
- k
->host_start
;
1124 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1125 tgt
->list
[i
].always_copy_from
1126 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1127 tgt
->list
[i
].do_detach
= false;
1128 tgt
->list
[i
].offset
= 0;
1129 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1131 k
->virtual_refcount
= 0;
1134 array
->right
= NULL
;
1135 splay_tree_insert (mem_map
, array
);
1136 switch (kind
& typemask
)
1138 case GOMP_MAP_ALLOC
:
1140 case GOMP_MAP_FORCE_ALLOC
:
1141 case GOMP_MAP_FORCE_FROM
:
1142 case GOMP_MAP_ALWAYS_FROM
:
1145 case GOMP_MAP_TOFROM
:
1146 case GOMP_MAP_FORCE_TO
:
1147 case GOMP_MAP_FORCE_TOFROM
:
1148 case GOMP_MAP_ALWAYS_TO
:
1149 case GOMP_MAP_ALWAYS_TOFROM
:
1150 gomp_copy_host2dev (devicep
, aq
,
1151 (void *) (tgt
->tgt_start
1153 (void *) k
->host_start
,
1154 k
->host_end
- k
->host_start
, cbufp
);
1156 case GOMP_MAP_POINTER
:
1157 gomp_map_pointer (tgt
, aq
,
1158 (uintptr_t) *(void **) k
->host_start
,
1159 k
->tgt_offset
, sizes
[i
], cbufp
);
1161 case GOMP_MAP_TO_PSET
:
1162 gomp_copy_host2dev (devicep
, aq
,
1163 (void *) (tgt
->tgt_start
1165 (void *) k
->host_start
,
1166 k
->host_end
- k
->host_start
, cbufp
);
1168 for (j
= i
+ 1; j
< mapnum
; j
++)
1169 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
1173 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1174 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1179 tgt
->list
[j
].key
= k
;
1180 tgt
->list
[j
].copy_from
= false;
1181 tgt
->list
[j
].always_copy_from
= false;
1182 tgt
->list
[j
].do_detach
= false;
1183 if (k
->refcount
!= REFCOUNT_INFINITY
)
1185 gomp_map_pointer (tgt
, aq
,
1186 (uintptr_t) *(void **) hostaddrs
[j
],
1188 + ((uintptr_t) hostaddrs
[j
]
1194 case GOMP_MAP_FORCE_PRESENT
:
1196 /* We already looked up the memory region above and it
1198 size_t size
= k
->host_end
- k
->host_start
;
1199 gomp_mutex_unlock (&devicep
->lock
);
1200 #ifdef HAVE_INTTYPES_H
1201 gomp_fatal ("present clause: !acc_is_present (%p, "
1202 "%"PRIu64
" (0x%"PRIx64
"))",
1203 (void *) k
->host_start
,
1204 (uint64_t) size
, (uint64_t) size
);
1206 gomp_fatal ("present clause: !acc_is_present (%p, "
1207 "%lu (0x%lx))", (void *) k
->host_start
,
1208 (unsigned long) size
, (unsigned long) size
);
1212 case GOMP_MAP_FORCE_DEVICEPTR
:
1213 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1214 gomp_copy_host2dev (devicep
, aq
,
1215 (void *) (tgt
->tgt_start
1217 (void *) k
->host_start
,
1218 sizeof (void *), cbufp
);
1221 gomp_mutex_unlock (&devicep
->lock
);
1222 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1226 if (k
->aux
&& k
->aux
->link_key
)
1228 /* Set link pointer on target to the device address of the
1230 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1231 /* We intentionally do not use coalescing here, as it's not
1232 data allocated by the current call to this function. */
1233 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1234 &tgt_addr
, sizeof (void *), NULL
);
1241 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1243 for (i
= 0; i
< mapnum
; i
++)
1245 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1246 gomp_copy_host2dev (devicep
, aq
,
1247 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1248 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1256 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1257 gomp_copy_host2dev (devicep
, aq
,
1258 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1259 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1260 - cbuf
.chunks
[0].start
),
1261 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1267 /* If the variable from "omp target enter data" map-list was already mapped,
1268 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1270 if ((pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
1271 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1272 && tgt
->refcount
== 0)
1274 /* If we're about to discard a target_mem_desc with no "structural"
1275 references (tgt->refcount == 0), any splay keys linked in the tgt's
1276 list must have their virtual refcount incremented to represent that
1277 "lost" reference in order to implement the semantics of the OpenACC
1278 "present increment" operation properly. */
1279 if (pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1280 for (i
= 0; i
< tgt
->list_count
; i
++)
1281 if (tgt
->list
[i
].key
)
1282 tgt
->list
[i
].key
->virtual_refcount
++;
1288 gomp_mutex_unlock (&devicep
->lock
);
1292 attribute_hidden
struct target_mem_desc
*
1293 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1294 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1295 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1297 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1298 sizes
, kinds
, short_mapkind
, pragma_kind
);
1301 attribute_hidden
struct target_mem_desc
*
1302 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1303 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1304 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1305 void *kinds
, bool short_mapkind
,
1306 enum gomp_map_vars_kind pragma_kind
)
1308 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1309 sizes
, kinds
, short_mapkind
, pragma_kind
);
1313 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1315 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1317 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1324 gomp_unref_tgt (void *ptr
)
1326 bool is_tgt_unmapped
= false;
1328 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1330 if (tgt
->refcount
> 1)
1334 gomp_unmap_tgt (tgt
);
1335 is_tgt_unmapped
= true;
1338 return is_tgt_unmapped
;
1342 gomp_unref_tgt_void (void *ptr
)
1344 (void) gomp_unref_tgt (ptr
);
1347 static inline __attribute__((always_inline
)) bool
1348 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1349 struct goacc_asyncqueue
*aq
)
1351 bool is_tgt_unmapped
= false;
1352 splay_tree_remove (&devicep
->mem_map
, k
);
1355 if (k
->aux
->link_key
)
1356 splay_tree_insert (&devicep
->mem_map
,
1357 (splay_tree_node
) k
->aux
->link_key
);
1358 if (k
->aux
->attach_count
)
1359 free (k
->aux
->attach_count
);
1364 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1367 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1368 return is_tgt_unmapped
;
1371 attribute_hidden
bool
1372 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1374 return gomp_remove_var_internal (devicep
, k
, NULL
);
1377 /* Remove a variable asynchronously. This actually removes the variable
1378 mapping immediately, but retains the linked target_mem_desc until the
1379 asynchronous operation has completed (as it may still refer to target
1380 memory). The device lock must be held before entry, and remains locked on
1383 attribute_hidden
void
1384 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1385 struct goacc_asyncqueue
*aq
)
1387 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1390 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1391 variables back from device to host: if it is false, it is assumed that this
1392 has been done already. */
1394 static inline __attribute__((always_inline
)) void
1395 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1396 struct goacc_asyncqueue
*aq
)
1398 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1400 if (tgt
->list_count
== 0)
1406 gomp_mutex_lock (&devicep
->lock
);
1407 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1409 gomp_mutex_unlock (&devicep
->lock
);
1417 /* We must perform detachments before any copies back to the host. */
1418 for (i
= 0; i
< tgt
->list_count
; i
++)
1420 splay_tree_key k
= tgt
->list
[i
].key
;
1422 if (k
!= NULL
&& tgt
->list
[i
].do_detach
)
1423 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1424 + tgt
->list
[i
].offset
,
1425 k
->refcount
== 1, NULL
);
1428 for (i
= 0; i
< tgt
->list_count
; i
++)
1430 splay_tree_key k
= tgt
->list
[i
].key
;
1434 bool do_unmap
= false;
1436 && k
->virtual_refcount
> 0
1437 && k
->refcount
!= REFCOUNT_INFINITY
)
1439 k
->virtual_refcount
--;
1442 else if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1444 else if (k
->refcount
== 1)
1450 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1451 || tgt
->list
[i
].always_copy_from
)
1452 gomp_copy_dev2host (devicep
, aq
,
1453 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1454 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1455 + tgt
->list
[i
].offset
),
1456 tgt
->list
[i
].length
);
1459 struct target_mem_desc
*k_tgt
= k
->tgt
;
1460 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1461 /* It would be bad if TGT got unmapped while we're still iterating
1462 over its LIST_COUNT, and also expect to use it in the following
1464 assert (!is_tgt_unmapped
1470 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1473 gomp_unref_tgt ((void *) tgt
);
1475 gomp_mutex_unlock (&devicep
->lock
);
1478 attribute_hidden
void
1479 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1481 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1484 attribute_hidden
void
1485 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1486 struct goacc_asyncqueue
*aq
)
1488 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1492 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1493 size_t *sizes
, void *kinds
, bool short_mapkind
)
1496 struct splay_tree_key_s cur_node
;
1497 const int typemask
= short_mapkind
? 0xff : 0x7;
1505 gomp_mutex_lock (&devicep
->lock
);
1506 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1508 gomp_mutex_unlock (&devicep
->lock
);
1512 for (i
= 0; i
< mapnum
; i
++)
1515 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1516 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1517 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1520 int kind
= get_kind (short_mapkind
, kinds
, i
);
1521 if (n
->host_start
> cur_node
.host_start
1522 || n
->host_end
< cur_node
.host_end
)
1524 gomp_mutex_unlock (&devicep
->lock
);
1525 gomp_fatal ("Trying to update [%p..%p) object when "
1526 "only [%p..%p) is mapped",
1527 (void *) cur_node
.host_start
,
1528 (void *) cur_node
.host_end
,
1529 (void *) n
->host_start
,
1530 (void *) n
->host_end
);
1534 void *hostaddr
= (void *) cur_node
.host_start
;
1535 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1536 + cur_node
.host_start
- n
->host_start
);
1537 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1539 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1540 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1542 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1543 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1546 gomp_mutex_unlock (&devicep
->lock
);
1549 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1550 And insert to splay tree the mapping between addresses from HOST_TABLE and
1551 from loaded target image. We rely in the host and device compiler
1552 emitting variable and functions in the same order. */
1555 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1556 const void *host_table
, const void *target_data
,
1557 bool is_register_lock
)
1559 void **host_func_table
= ((void ***) host_table
)[0];
1560 void **host_funcs_end
= ((void ***) host_table
)[1];
1561 void **host_var_table
= ((void ***) host_table
)[2];
1562 void **host_vars_end
= ((void ***) host_table
)[3];
1564 /* The func table contains only addresses, the var table contains addresses
1565 and corresponding sizes. */
1566 int num_funcs
= host_funcs_end
- host_func_table
;
1567 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1569 /* Load image to device and get target addresses for the image. */
1570 struct addr_pair
*target_table
= NULL
;
1571 int i
, num_target_entries
;
1574 = devicep
->load_image_func (devicep
->target_id
, version
,
1575 target_data
, &target_table
);
1577 if (num_target_entries
!= num_funcs
+ num_vars
)
1579 gomp_mutex_unlock (&devicep
->lock
);
1580 if (is_register_lock
)
1581 gomp_mutex_unlock (®ister_lock
);
1582 gomp_fatal ("Cannot map target functions or variables"
1583 " (expected %u, have %u)", num_funcs
+ num_vars
,
1584 num_target_entries
);
1587 /* Insert host-target address mapping into splay tree. */
1588 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1589 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1590 tgt
->refcount
= REFCOUNT_INFINITY
;
1593 tgt
->to_free
= NULL
;
1595 tgt
->list_count
= 0;
1596 tgt
->device_descr
= devicep
;
1597 splay_tree_node array
= tgt
->array
;
1599 for (i
= 0; i
< num_funcs
; i
++)
1601 splay_tree_key k
= &array
->key
;
1602 k
->host_start
= (uintptr_t) host_func_table
[i
];
1603 k
->host_end
= k
->host_start
+ 1;
1605 k
->tgt_offset
= target_table
[i
].start
;
1606 k
->refcount
= REFCOUNT_INFINITY
;
1607 k
->virtual_refcount
= 0;
1610 array
->right
= NULL
;
1611 splay_tree_insert (&devicep
->mem_map
, array
);
1615 /* Most significant bit of the size in host and target tables marks
1616 "omp declare target link" variables. */
1617 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1618 const uintptr_t size_mask
= ~link_bit
;
1620 for (i
= 0; i
< num_vars
; i
++)
1622 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1623 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1625 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1627 gomp_mutex_unlock (&devicep
->lock
);
1628 if (is_register_lock
)
1629 gomp_mutex_unlock (®ister_lock
);
1630 gomp_fatal ("Cannot map target variables (size mismatch)");
1633 splay_tree_key k
= &array
->key
;
1634 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1636 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1638 k
->tgt_offset
= target_var
->start
;
1639 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1640 k
->virtual_refcount
= 0;
1643 array
->right
= NULL
;
1644 splay_tree_insert (&devicep
->mem_map
, array
);
1648 free (target_table
);
1651 /* Unload the mappings described by target_data from device DEVICE_P.
1652 The device must be locked. */
1655 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1657 const void *host_table
, const void *target_data
)
1659 void **host_func_table
= ((void ***) host_table
)[0];
1660 void **host_funcs_end
= ((void ***) host_table
)[1];
1661 void **host_var_table
= ((void ***) host_table
)[2];
1662 void **host_vars_end
= ((void ***) host_table
)[3];
1664 /* The func table contains only addresses, the var table contains addresses
1665 and corresponding sizes. */
1666 int num_funcs
= host_funcs_end
- host_func_table
;
1667 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1669 struct splay_tree_key_s k
;
1670 splay_tree_key node
= NULL
;
1672 /* Find mapping at start of node array */
1673 if (num_funcs
|| num_vars
)
1675 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1676 : (uintptr_t) host_var_table
[0]);
1677 k
.host_end
= k
.host_start
+ 1;
1678 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1681 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1683 gomp_mutex_unlock (&devicep
->lock
);
1684 gomp_fatal ("image unload fail");
1687 /* Remove mappings from splay tree. */
1689 for (i
= 0; i
< num_funcs
; i
++)
1691 k
.host_start
= (uintptr_t) host_func_table
[i
];
1692 k
.host_end
= k
.host_start
+ 1;
1693 splay_tree_remove (&devicep
->mem_map
, &k
);
1696 /* Most significant bit of the size in host and target tables marks
1697 "omp declare target link" variables. */
1698 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1699 const uintptr_t size_mask
= ~link_bit
;
1700 bool is_tgt_unmapped
= false;
1702 for (i
= 0; i
< num_vars
; i
++)
1704 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1706 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1708 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1709 splay_tree_remove (&devicep
->mem_map
, &k
);
1712 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1713 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1717 if (node
&& !is_tgt_unmapped
)
1724 /* This function should be called from every offload image while loading.
1725 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1726 the target, and TARGET_DATA needed by target plugin. */
1729 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1730 int target_type
, const void *target_data
)
1734 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1735 gomp_fatal ("Library too old for offload (version %u < %u)",
1736 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1738 gomp_mutex_lock (®ister_lock
);
1740 /* Load image to all initialized devices. */
1741 for (i
= 0; i
< num_devices
; i
++)
1743 struct gomp_device_descr
*devicep
= &devices
[i
];
1744 gomp_mutex_lock (&devicep
->lock
);
1745 if (devicep
->type
== target_type
1746 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1747 gomp_load_image_to_device (devicep
, version
,
1748 host_table
, target_data
, true);
1749 gomp_mutex_unlock (&devicep
->lock
);
1752 /* Insert image to array of pending images. */
1754 = gomp_realloc_unlock (offload_images
,
1755 (num_offload_images
+ 1)
1756 * sizeof (struct offload_image_descr
));
1757 offload_images
[num_offload_images
].version
= version
;
1758 offload_images
[num_offload_images
].type
= target_type
;
1759 offload_images
[num_offload_images
].host_table
= host_table
;
1760 offload_images
[num_offload_images
].target_data
= target_data
;
1762 num_offload_images
++;
1763 gomp_mutex_unlock (®ister_lock
);
1767 GOMP_offload_register (const void *host_table
, int target_type
,
1768 const void *target_data
)
1770 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1773 /* This function should be called from every offload image while unloading.
1774 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1775 the target, and TARGET_DATA needed by target plugin. */
1778 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1779 int target_type
, const void *target_data
)
1783 gomp_mutex_lock (®ister_lock
);
1785 /* Unload image from all initialized devices. */
1786 for (i
= 0; i
< num_devices
; i
++)
1788 struct gomp_device_descr
*devicep
= &devices
[i
];
1789 gomp_mutex_lock (&devicep
->lock
);
1790 if (devicep
->type
== target_type
1791 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1792 gomp_unload_image_from_device (devicep
, version
,
1793 host_table
, target_data
);
1794 gomp_mutex_unlock (&devicep
->lock
);
1797 /* Remove image from array of pending images. */
1798 for (i
= 0; i
< num_offload_images
; i
++)
1799 if (offload_images
[i
].target_data
== target_data
)
1801 offload_images
[i
] = offload_images
[--num_offload_images
];
1805 gomp_mutex_unlock (®ister_lock
);
1809 GOMP_offload_unregister (const void *host_table
, int target_type
,
1810 const void *target_data
)
1812 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1815 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1816 must be locked on entry, and remains locked on return. */
1818 attribute_hidden
void
1819 gomp_init_device (struct gomp_device_descr
*devicep
)
1822 if (!devicep
->init_device_func (devicep
->target_id
))
1824 gomp_mutex_unlock (&devicep
->lock
);
1825 gomp_fatal ("device initialization failed");
1828 /* Load to device all images registered by the moment. */
1829 for (i
= 0; i
< num_offload_images
; i
++)
1831 struct offload_image_descr
*image
= &offload_images
[i
];
1832 if (image
->type
== devicep
->type
)
1833 gomp_load_image_to_device (devicep
, image
->version
,
1834 image
->host_table
, image
->target_data
,
1838 /* Initialize OpenACC asynchronous queues. */
1839 goacc_init_asyncqueues (devicep
);
1841 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1844 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1845 must be locked on entry, and remains locked on return. */
1847 attribute_hidden
bool
1848 gomp_fini_device (struct gomp_device_descr
*devicep
)
1850 bool ret
= goacc_fini_asyncqueues (devicep
);
1851 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1852 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1856 attribute_hidden
void
1857 gomp_unload_device (struct gomp_device_descr
*devicep
)
1859 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1863 /* Unload from device all images registered at the moment. */
1864 for (i
= 0; i
< num_offload_images
; i
++)
1866 struct offload_image_descr
*image
= &offload_images
[i
];
1867 if (image
->type
== devicep
->type
)
1868 gomp_unload_image_from_device (devicep
, image
->version
,
1870 image
->target_data
);
1875 /* Host fallback for GOMP_target{,_ext} routines. */
1878 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1880 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1882 memset (thr
, '\0', sizeof (*thr
));
1883 if (gomp_places_list
)
1885 thr
->place
= old_thr
.place
;
1886 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1889 gomp_free_thread (thr
);
1893 /* Calculate alignment and size requirements of a private copy of data shared
1894 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1897 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1898 unsigned short *kinds
, size_t *tgt_align
,
1902 for (i
= 0; i
< mapnum
; i
++)
1903 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1905 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1906 if (*tgt_align
< align
)
1908 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1909 *tgt_size
+= sizes
[i
];
1913 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1916 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1917 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1920 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1922 tgt
+= tgt_align
- al
;
1925 for (i
= 0; i
< mapnum
; i
++)
1926 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1928 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1929 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1930 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1931 hostaddrs
[i
] = tgt
+ tgt_size
;
1932 tgt_size
= tgt_size
+ sizes
[i
];
1936 /* Helper function of GOMP_target{,_ext} routines. */
1939 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1940 void (*host_fn
) (void *))
1942 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1943 return (void *) host_fn
;
1946 gomp_mutex_lock (&devicep
->lock
);
1947 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1949 gomp_mutex_unlock (&devicep
->lock
);
1953 struct splay_tree_key_s k
;
1954 k
.host_start
= (uintptr_t) host_fn
;
1955 k
.host_end
= k
.host_start
+ 1;
1956 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1957 gomp_mutex_unlock (&devicep
->lock
);
1961 return (void *) tgt_fn
->tgt_offset
;
1965 /* Called when encountering a target directive. If DEVICE
1966 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1967 GOMP_DEVICE_HOST_FALLBACK (or any value
1968 larger than last available hw device), use host fallback.
1969 FN is address of host code, UNUSED is part of the current ABI, but
1970 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1971 with MAPNUM entries, with addresses of the host objects,
1972 sizes of the host objects (resp. for pointer kind pointer bias
1973 and assumed sizeof (void *) size) and kinds. */
1976 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1977 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1978 unsigned char *kinds
)
1980 struct gomp_device_descr
*devicep
= resolve_device (device
);
1984 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1985 /* All shared memory devices should use the GOMP_target_ext function. */
1986 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1987 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1988 return gomp_target_fallback (fn
, hostaddrs
);
1990 struct target_mem_desc
*tgt_vars
1991 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1992 GOMP_MAP_VARS_TARGET
);
1993 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1995 gomp_unmap_vars (tgt_vars
, true);
1998 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1999 and several arguments have been added:
2000 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2001 DEPEND is array of dependencies, see GOMP_task for details.
2003 ARGS is a pointer to an array consisting of a variable number of both
2004 device-independent and device-specific arguments, which can take one two
2005 elements where the first specifies for which device it is intended, the type
2006 and optionally also the value. If the value is not present in the first
2007 one, the whole second element the actual value. The last element of the
2008 array is a single NULL. Among the device independent can be for example
2009 NUM_TEAMS and THREAD_LIMIT.
2011 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2012 that value, or 1 if teams construct is not present, or 0, if
2013 teams construct does not have num_teams clause and so the choice is
2014 implementation defined, and -1 if it can't be determined on the host
2015 what value will GOMP_teams have on the device.
2016 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2017 body with that value, or 0, if teams construct does not have thread_limit
2018 clause or the teams construct is not present, or -1 if it can't be
2019 determined on the host what value will GOMP_teams have on the device. */
2022 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2023 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2024 unsigned int flags
, void **depend
, void **args
)
2026 struct gomp_device_descr
*devicep
= resolve_device (device
);
2027 size_t tgt_align
= 0, tgt_size
= 0;
2028 bool fpc_done
= false;
2030 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2032 struct gomp_thread
*thr
= gomp_thread ();
2033 /* Create a team if we don't have any around, as nowait
2034 target tasks make sense to run asynchronously even when
2035 outside of any parallel. */
2036 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2038 struct gomp_team
*team
= gomp_new_team (1);
2039 struct gomp_task
*task
= thr
->task
;
2040 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2041 team
->prev_ts
= thr
->ts
;
2042 thr
->ts
.team
= team
;
2043 thr
->ts
.team_id
= 0;
2044 thr
->ts
.work_share
= &team
->work_shares
[0];
2045 thr
->ts
.last_work_share
= NULL
;
2046 #ifdef HAVE_SYNC_BUILTINS
2047 thr
->ts
.single_count
= 0;
2049 thr
->ts
.static_trip
= 0;
2050 thr
->task
= &team
->implicit_task
[0];
2051 gomp_init_task (thr
->task
, NULL
, icv
);
2057 thr
->task
= &team
->implicit_task
[0];
2060 pthread_setspecific (gomp_thread_destructor
, thr
);
2063 && !thr
->task
->final_task
)
2065 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2066 sizes
, kinds
, flags
, depend
, args
,
2067 GOMP_TARGET_TASK_BEFORE_MAP
);
2072 /* If there are depend clauses, but nowait is not present
2073 (or we are in a final task), block the parent task until the
2074 dependencies are resolved and then just continue with the rest
2075 of the function as if it is a merged task. */
2078 struct gomp_thread
*thr
= gomp_thread ();
2079 if (thr
->task
&& thr
->task
->depend_hash
)
2081 /* If we might need to wait, copy firstprivate now. */
2082 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2083 &tgt_align
, &tgt_size
);
2086 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2087 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2088 tgt_align
, tgt_size
);
2091 gomp_task_maybe_wait_for_dependencies (depend
);
2097 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2098 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2099 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2103 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2104 &tgt_align
, &tgt_size
);
2107 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2108 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2109 tgt_align
, tgt_size
);
2112 gomp_target_fallback (fn
, hostaddrs
);
2116 struct target_mem_desc
*tgt_vars
;
2117 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2121 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2122 &tgt_align
, &tgt_size
);
2125 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2126 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2127 tgt_align
, tgt_size
);
2133 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2134 true, GOMP_MAP_VARS_TARGET
);
2135 devicep
->run_func (devicep
->target_id
, fn_addr
,
2136 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2139 gomp_unmap_vars (tgt_vars
, true);
2142 /* Host fallback for GOMP_target_data{,_ext} routines. */
2145 gomp_target_data_fallback (void)
2147 struct gomp_task_icv
*icv
= gomp_icv (false);
2148 if (icv
->target_data
)
2150 /* Even when doing a host fallback, if there are any active
2151 #pragma omp target data constructs, need to remember the
2152 new #pragma omp target data, otherwise GOMP_target_end_data
2153 would get out of sync. */
2154 struct target_mem_desc
*tgt
2155 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2156 GOMP_MAP_VARS_DATA
);
2157 tgt
->prev
= icv
->target_data
;
2158 icv
->target_data
= tgt
;
2163 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2164 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2166 struct gomp_device_descr
*devicep
= resolve_device (device
);
2169 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2170 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2171 return gomp_target_data_fallback ();
2173 struct target_mem_desc
*tgt
2174 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2175 GOMP_MAP_VARS_DATA
);
2176 struct gomp_task_icv
*icv
= gomp_icv (true);
2177 tgt
->prev
= icv
->target_data
;
2178 icv
->target_data
= tgt
;
2182 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2183 size_t *sizes
, unsigned short *kinds
)
2185 struct gomp_device_descr
*devicep
= resolve_device (device
);
2188 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2189 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2190 return gomp_target_data_fallback ();
2192 struct target_mem_desc
*tgt
2193 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2194 GOMP_MAP_VARS_DATA
);
2195 struct gomp_task_icv
*icv
= gomp_icv (true);
2196 tgt
->prev
= icv
->target_data
;
2197 icv
->target_data
= tgt
;
2201 GOMP_target_end_data (void)
2203 struct gomp_task_icv
*icv
= gomp_icv (false);
2204 if (icv
->target_data
)
2206 struct target_mem_desc
*tgt
= icv
->target_data
;
2207 icv
->target_data
= tgt
->prev
;
2208 gomp_unmap_vars (tgt
, true);
2213 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2214 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2216 struct gomp_device_descr
*devicep
= resolve_device (device
);
2219 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2220 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2223 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2227 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2228 size_t *sizes
, unsigned short *kinds
,
2229 unsigned int flags
, void **depend
)
2231 struct gomp_device_descr
*devicep
= resolve_device (device
);
2233 /* If there are depend clauses, but nowait is not present,
2234 block the parent task until the dependencies are resolved
2235 and then just continue with the rest of the function as if it
2236 is a merged task. Until we are able to schedule task during
2237 variable mapping or unmapping, ignore nowait if depend clauses
2241 struct gomp_thread
*thr
= gomp_thread ();
2242 if (thr
->task
&& thr
->task
->depend_hash
)
2244 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2246 && !thr
->task
->final_task
)
2248 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2249 mapnum
, hostaddrs
, sizes
, kinds
,
2250 flags
| GOMP_TARGET_FLAG_UPDATE
,
2251 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2256 struct gomp_team
*team
= thr
->ts
.team
;
2257 /* If parallel or taskgroup has been cancelled, don't start new
2259 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2261 if (gomp_team_barrier_cancelled (&team
->barrier
))
2263 if (thr
->task
->taskgroup
)
2265 if (thr
->task
->taskgroup
->cancelled
)
2267 if (thr
->task
->taskgroup
->workshare
2268 && thr
->task
->taskgroup
->prev
2269 && thr
->task
->taskgroup
->prev
->cancelled
)
2274 gomp_task_maybe_wait_for_dependencies (depend
);
2280 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2281 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2284 struct gomp_thread
*thr
= gomp_thread ();
2285 struct gomp_team
*team
= thr
->ts
.team
;
2286 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2287 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2289 if (gomp_team_barrier_cancelled (&team
->barrier
))
2291 if (thr
->task
->taskgroup
)
2293 if (thr
->task
->taskgroup
->cancelled
)
2295 if (thr
->task
->taskgroup
->workshare
2296 && thr
->task
->taskgroup
->prev
2297 && thr
->task
->taskgroup
->prev
->cancelled
)
2302 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2306 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2307 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2309 const int typemask
= 0xff;
2311 gomp_mutex_lock (&devicep
->lock
);
2312 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2314 gomp_mutex_unlock (&devicep
->lock
);
2318 for (i
= 0; i
< mapnum
; i
++)
2320 struct splay_tree_key_s cur_node
;
2321 unsigned char kind
= kinds
[i
] & typemask
;
2325 case GOMP_MAP_ALWAYS_FROM
:
2326 case GOMP_MAP_DELETE
:
2327 case GOMP_MAP_RELEASE
:
2328 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2329 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2330 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2331 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2332 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2333 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2334 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2335 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2339 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2341 if ((kind
== GOMP_MAP_DELETE
2342 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2343 && k
->refcount
!= REFCOUNT_INFINITY
)
2346 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2347 || kind
== GOMP_MAP_ALWAYS_FROM
)
2348 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2349 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2350 + cur_node
.host_start
2352 cur_node
.host_end
- cur_node
.host_start
);
2353 if (k
->refcount
== 0)
2354 gomp_remove_var (devicep
, k
);
2358 gomp_mutex_unlock (&devicep
->lock
);
2359 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2364 gomp_mutex_unlock (&devicep
->lock
);
2368 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2369 size_t *sizes
, unsigned short *kinds
,
2370 unsigned int flags
, void **depend
)
2372 struct gomp_device_descr
*devicep
= resolve_device (device
);
2374 /* If there are depend clauses, but nowait is not present,
2375 block the parent task until the dependencies are resolved
2376 and then just continue with the rest of the function as if it
2377 is a merged task. Until we are able to schedule task during
2378 variable mapping or unmapping, ignore nowait if depend clauses
2382 struct gomp_thread
*thr
= gomp_thread ();
2383 if (thr
->task
&& thr
->task
->depend_hash
)
2385 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2387 && !thr
->task
->final_task
)
2389 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2390 mapnum
, hostaddrs
, sizes
, kinds
,
2391 flags
, depend
, NULL
,
2392 GOMP_TARGET_TASK_DATA
))
2397 struct gomp_team
*team
= thr
->ts
.team
;
2398 /* If parallel or taskgroup has been cancelled, don't start new
2400 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2402 if (gomp_team_barrier_cancelled (&team
->barrier
))
2404 if (thr
->task
->taskgroup
)
2406 if (thr
->task
->taskgroup
->cancelled
)
2408 if (thr
->task
->taskgroup
->workshare
2409 && thr
->task
->taskgroup
->prev
2410 && thr
->task
->taskgroup
->prev
->cancelled
)
2415 gomp_task_maybe_wait_for_dependencies (depend
);
2421 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2422 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2425 struct gomp_thread
*thr
= gomp_thread ();
2426 struct gomp_team
*team
= thr
->ts
.team
;
2427 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2428 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2430 if (gomp_team_barrier_cancelled (&team
->barrier
))
2432 if (thr
->task
->taskgroup
)
2434 if (thr
->task
->taskgroup
->cancelled
)
2436 if (thr
->task
->taskgroup
->workshare
2437 && thr
->task
->taskgroup
->prev
2438 && thr
->task
->taskgroup
->prev
->cancelled
)
2444 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2445 for (i
= 0; i
< mapnum
; i
++)
2446 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2448 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2449 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2453 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2454 true, GOMP_MAP_VARS_ENTER_DATA
);
2456 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2460 gomp_target_task_fn (void *data
)
2462 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2463 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2465 if (ttask
->fn
!= NULL
)
2469 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2470 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2471 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2473 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2474 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2478 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2481 gomp_unmap_vars (ttask
->tgt
, true);
2485 void *actual_arguments
;
2486 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2489 actual_arguments
= ttask
->hostaddrs
;
2493 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2494 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2495 GOMP_MAP_VARS_TARGET
);
2496 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2498 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2500 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2501 ttask
->args
, (void *) ttask
);
2504 else if (devicep
== NULL
2505 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2506 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2510 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2511 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2512 ttask
->kinds
, true);
2513 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2514 for (i
= 0; i
< ttask
->mapnum
; i
++)
2515 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2517 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2518 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2519 GOMP_MAP_VARS_ENTER_DATA
);
2520 i
+= ttask
->sizes
[i
];
2523 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2524 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2526 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2532 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2536 struct gomp_task_icv
*icv
= gomp_icv (true);
2537 icv
->thread_limit_var
2538 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2544 omp_target_alloc (size_t size
, int device_num
)
2546 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2547 return malloc (size
);
2552 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2553 if (devicep
== NULL
)
2556 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2557 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2558 return malloc (size
);
2560 gomp_mutex_lock (&devicep
->lock
);
2561 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2562 gomp_mutex_unlock (&devicep
->lock
);
2567 omp_target_free (void *device_ptr
, int device_num
)
2569 if (device_ptr
== NULL
)
2572 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2581 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2582 if (devicep
== NULL
)
2585 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2586 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2592 gomp_mutex_lock (&devicep
->lock
);
2593 gomp_free_device_memory (devicep
, device_ptr
);
2594 gomp_mutex_unlock (&devicep
->lock
);
2598 omp_target_is_present (const void *ptr
, int device_num
)
2603 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2609 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2610 if (devicep
== NULL
)
2613 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2614 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2617 gomp_mutex_lock (&devicep
->lock
);
2618 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2619 struct splay_tree_key_s cur_node
;
2621 cur_node
.host_start
= (uintptr_t) ptr
;
2622 cur_node
.host_end
= cur_node
.host_start
;
2623 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2624 int ret
= n
!= NULL
;
2625 gomp_mutex_unlock (&devicep
->lock
);
2630 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2631 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2634 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2637 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2639 if (dst_device_num
< 0)
2642 dst_devicep
= resolve_device (dst_device_num
);
2643 if (dst_devicep
== NULL
)
2646 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2647 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2650 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2652 if (src_device_num
< 0)
2655 src_devicep
= resolve_device (src_device_num
);
2656 if (src_devicep
== NULL
)
2659 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2660 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2663 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2665 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2668 if (src_devicep
== NULL
)
2670 gomp_mutex_lock (&dst_devicep
->lock
);
2671 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2672 (char *) dst
+ dst_offset
,
2673 (char *) src
+ src_offset
, length
);
2674 gomp_mutex_unlock (&dst_devicep
->lock
);
2675 return (ret
? 0 : EINVAL
);
2677 if (dst_devicep
== NULL
)
2679 gomp_mutex_lock (&src_devicep
->lock
);
2680 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2681 (char *) dst
+ dst_offset
,
2682 (char *) src
+ src_offset
, length
);
2683 gomp_mutex_unlock (&src_devicep
->lock
);
2684 return (ret
? 0 : EINVAL
);
2686 if (src_devicep
== dst_devicep
)
2688 gomp_mutex_lock (&src_devicep
->lock
);
2689 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2690 (char *) dst
+ dst_offset
,
2691 (char *) src
+ src_offset
, length
);
2692 gomp_mutex_unlock (&src_devicep
->lock
);
2693 return (ret
? 0 : EINVAL
);
2699 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2700 int num_dims
, const size_t *volume
,
2701 const size_t *dst_offsets
,
2702 const size_t *src_offsets
,
2703 const size_t *dst_dimensions
,
2704 const size_t *src_dimensions
,
2705 struct gomp_device_descr
*dst_devicep
,
2706 struct gomp_device_descr
*src_devicep
)
2708 size_t dst_slice
= element_size
;
2709 size_t src_slice
= element_size
;
2710 size_t j
, dst_off
, src_off
, length
;
2715 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2716 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2717 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2719 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2721 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2725 else if (src_devicep
== NULL
)
2726 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2727 (char *) dst
+ dst_off
,
2728 (const char *) src
+ src_off
,
2730 else if (dst_devicep
== NULL
)
2731 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2732 (char *) dst
+ dst_off
,
2733 (const char *) src
+ src_off
,
2735 else if (src_devicep
== dst_devicep
)
2736 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2737 (char *) dst
+ dst_off
,
2738 (const char *) src
+ src_off
,
2742 return ret
? 0 : EINVAL
;
2745 /* FIXME: it would be nice to have some plugin function to handle
2746 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2747 be handled in the generic recursion below, and for host-host it
2748 should be used even for any num_dims >= 2. */
2750 for (i
= 1; i
< num_dims
; i
++)
2751 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2752 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2754 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2755 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2757 for (j
= 0; j
< volume
[0]; j
++)
2759 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2760 (const char *) src
+ src_off
,
2761 element_size
, num_dims
- 1,
2762 volume
+ 1, dst_offsets
+ 1,
2763 src_offsets
+ 1, dst_dimensions
+ 1,
2764 src_dimensions
+ 1, dst_devicep
,
2768 dst_off
+= dst_slice
;
2769 src_off
+= src_slice
;
2775 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2776 int num_dims
, const size_t *volume
,
2777 const size_t *dst_offsets
,
2778 const size_t *src_offsets
,
2779 const size_t *dst_dimensions
,
2780 const size_t *src_dimensions
,
2781 int dst_device_num
, int src_device_num
)
2783 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2788 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2790 if (dst_device_num
< 0)
2793 dst_devicep
= resolve_device (dst_device_num
);
2794 if (dst_devicep
== NULL
)
2797 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2798 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2801 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2803 if (src_device_num
< 0)
2806 src_devicep
= resolve_device (src_device_num
);
2807 if (src_devicep
== NULL
)
2810 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2811 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2815 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2819 gomp_mutex_lock (&src_devicep
->lock
);
2820 else if (dst_devicep
)
2821 gomp_mutex_lock (&dst_devicep
->lock
);
2822 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2823 volume
, dst_offsets
, src_offsets
,
2824 dst_dimensions
, src_dimensions
,
2825 dst_devicep
, src_devicep
);
2827 gomp_mutex_unlock (&src_devicep
->lock
);
2828 else if (dst_devicep
)
2829 gomp_mutex_unlock (&dst_devicep
->lock
);
2834 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2835 size_t size
, size_t device_offset
, int device_num
)
2837 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2843 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2844 if (devicep
== NULL
)
2847 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2848 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2851 gomp_mutex_lock (&devicep
->lock
);
2853 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2854 struct splay_tree_key_s cur_node
;
2857 cur_node
.host_start
= (uintptr_t) host_ptr
;
2858 cur_node
.host_end
= cur_node
.host_start
+ size
;
2859 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2862 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2863 == (uintptr_t) device_ptr
+ device_offset
2864 && n
->host_start
<= cur_node
.host_start
2865 && n
->host_end
>= cur_node
.host_end
)
2870 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2871 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2875 tgt
->to_free
= NULL
;
2877 tgt
->list_count
= 0;
2878 tgt
->device_descr
= devicep
;
2879 splay_tree_node array
= tgt
->array
;
2880 splay_tree_key k
= &array
->key
;
2881 k
->host_start
= cur_node
.host_start
;
2882 k
->host_end
= cur_node
.host_end
;
2884 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2885 k
->refcount
= REFCOUNT_INFINITY
;
2886 k
->virtual_refcount
= 0;
2889 array
->right
= NULL
;
2890 splay_tree_insert (&devicep
->mem_map
, array
);
2893 gomp_mutex_unlock (&devicep
->lock
);
2898 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2900 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2906 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2907 if (devicep
== NULL
)
2910 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2913 gomp_mutex_lock (&devicep
->lock
);
2915 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2916 struct splay_tree_key_s cur_node
;
2919 cur_node
.host_start
= (uintptr_t) ptr
;
2920 cur_node
.host_end
= cur_node
.host_start
;
2921 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2923 && n
->host_start
== cur_node
.host_start
2924 && n
->refcount
== REFCOUNT_INFINITY
2925 && n
->tgt
->tgt_start
== 0
2926 && n
->tgt
->to_free
== NULL
2927 && n
->tgt
->refcount
== 1
2928 && n
->tgt
->list_count
== 0)
2930 splay_tree_remove (&devicep
->mem_map
, n
);
2931 gomp_unmap_tgt (n
->tgt
);
2935 gomp_mutex_unlock (&devicep
->lock
);
2940 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2943 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2944 return gomp_pause_host ();
2945 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2947 /* Do nothing for target devices for now. */
2952 omp_pause_resource_all (omp_pause_resource_t kind
)
2955 if (gomp_pause_host ())
2957 /* Do nothing for target devices for now. */
2961 ialias (omp_pause_resource
)
2962 ialias (omp_pause_resource_all
)
2964 #ifdef PLUGIN_SUPPORT
2966 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2968 The handles of the found functions are stored in the corresponding fields
2969 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2972 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2973 const char *plugin_name
)
2975 const char *err
= NULL
, *last_missing
= NULL
;
2977 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2981 /* Check if all required functions are available in the plugin and store
2982 their handlers. None of the symbols can legitimately be NULL,
2983 so we don't need to check dlerror all the time. */
2985 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2987 /* Similar, but missing functions are not an error. Return false if
2988 failed, true otherwise. */
2989 #define DLSYM_OPT(f, n) \
2990 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2991 || (last_missing = #n, 0))
2994 if (device
->version_func () != GOMP_VERSION
)
2996 err
= "plugin version mismatch";
3003 DLSYM (get_num_devices
);
3004 DLSYM (get_property
);
3005 DLSYM (init_device
);
3006 DLSYM (fini_device
);
3008 DLSYM (unload_image
);
3013 device
->capabilities
= device
->get_caps_func ();
3014 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3018 DLSYM_OPT (can_run
, can_run
);
3021 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3023 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3024 || !DLSYM_OPT (openacc
.create_thread_data
,
3025 openacc_create_thread_data
)
3026 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3027 openacc_destroy_thread_data
)
3028 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3029 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3030 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3031 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3032 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3033 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3034 openacc_async_queue_callback
)
3035 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3036 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3037 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
))
3039 /* Require all the OpenACC handlers if we have
3040 GOMP_OFFLOAD_CAP_OPENACC_200. */
3041 err
= "plugin missing OpenACC handler function";
3046 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3047 openacc_cuda_get_current_device
);
3048 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3049 openacc_cuda_get_current_context
);
3050 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3051 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3052 if (cuda
&& cuda
!= 4)
3054 /* Make sure all the CUDA functions are there if any of them are. */
3055 err
= "plugin missing OpenACC CUDA handler function";
3067 gomp_error ("while loading %s: %s", plugin_name
, err
);
3069 gomp_error ("missing function was %s", last_missing
);
3071 dlclose (plugin_handle
);
3076 /* This function finalizes all initialized devices. */
3079 gomp_target_fini (void)
3082 for (i
= 0; i
< num_devices
; i
++)
3085 struct gomp_device_descr
*devicep
= &devices
[i
];
3086 gomp_mutex_lock (&devicep
->lock
);
3087 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3088 ret
= gomp_fini_device (devicep
);
3089 gomp_mutex_unlock (&devicep
->lock
);
3091 gomp_fatal ("device finalization failed");
3095 /* This function initializes the runtime for offloading.
3096 It parses the list of offload plugins, and tries to load these.
3097 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3098 will be set, and the array DEVICES initialized, containing descriptors for
3099 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3103 gomp_target_init (void)
3105 const char *prefix
="libgomp-plugin-";
3106 const char *suffix
= SONAME_SUFFIX (1);
3107 const char *cur
, *next
;
3109 int i
, new_num_devices
;
3114 cur
= OFFLOAD_PLUGINS
;
3118 struct gomp_device_descr current_device
;
3119 size_t prefix_len
, suffix_len
, cur_len
;
3121 next
= strchr (cur
, ',');
3123 prefix_len
= strlen (prefix
);
3124 cur_len
= next
? next
- cur
: strlen (cur
);
3125 suffix_len
= strlen (suffix
);
3127 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3134 memcpy (plugin_name
, prefix
, prefix_len
);
3135 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3136 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3138 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3140 new_num_devices
= current_device
.get_num_devices_func ();
3141 if (new_num_devices
>= 1)
3143 /* Augment DEVICES and NUM_DEVICES. */
3145 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
3146 * sizeof (struct gomp_device_descr
));
3154 current_device
.name
= current_device
.get_name_func ();
3155 /* current_device.capabilities has already been set. */
3156 current_device
.type
= current_device
.get_type_func ();
3157 current_device
.mem_map
.root
= NULL
;
3158 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3159 for (i
= 0; i
< new_num_devices
; i
++)
3161 current_device
.target_id
= i
;
3162 devices
[num_devices
] = current_device
;
3163 gomp_mutex_init (&devices
[num_devices
].lock
);
3174 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3175 NUM_DEVICES_OPENMP. */
3176 struct gomp_device_descr
*devices_s
3177 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
3184 num_devices_openmp
= 0;
3185 for (i
= 0; i
< num_devices
; i
++)
3186 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3187 devices_s
[num_devices_openmp
++] = devices
[i
];
3188 int num_devices_after_openmp
= num_devices_openmp
;
3189 for (i
= 0; i
< num_devices
; i
++)
3190 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3191 devices_s
[num_devices_after_openmp
++] = devices
[i
];
3193 devices
= devices_s
;
3195 for (i
= 0; i
< num_devices
; i
++)
3197 /* The 'devices' array can be moved (by the realloc call) until we have
3198 found all the plugins, so registering with the OpenACC runtime (which
3199 takes a copy of the pointer argument) must be delayed until now. */
3200 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3201 goacc_register (&devices
[i
]);
3204 if (atexit (gomp_target_fini
) != 0)
3205 gomp_fatal ("atexit failed");
3208 #else /* PLUGIN_SUPPORT */
3209 /* If dlfcn.h is unavailable we always fallback to host execution.
3210 GOMP_target* routines are just stubs for this case. */
3212 gomp_target_init (void)
3215 #endif /* PLUGIN_SUPPORT */