1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
121 gomp_mutex_lock (&devices
[device_id
].lock
);
122 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
123 gomp_init_device (&devices
[device_id
]);
124 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
126 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 gomp_mutex_unlock (&devices
[device_id
].lock
);
131 return &devices
[device_id
];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
138 if (key
->host_start
!= key
->host_end
)
139 return splay_tree_lookup (mem_map
, key
);
142 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
147 n
= splay_tree_lookup (mem_map
, key
);
151 return splay_tree_lookup (mem_map
, key
);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
157 if (key
->host_start
!= key
->host_end
)
158 return splay_tree_lookup (mem_map
, key
);
161 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
167 gomp_device_copy (struct gomp_device_descr
*devicep
,
168 bool (*copy_func
) (int, void *, const void *, size_t),
169 const char *dst
, void *dstaddr
,
170 const char *src
, const void *srcaddr
,
173 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
175 gomp_mutex_unlock (&devicep
->lock
);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
182 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
183 bool (*copy_func
) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue
*),
185 const char *dst
, void *dstaddr
,
186 const char *src
, const void *srcaddr
,
187 size_t size
, struct goacc_asyncqueue
*aq
)
189 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
191 gomp_mutex_unlock (&devicep
->lock
);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
211 struct target_mem_desc
*tgt
;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk
*chunks
;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
242 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
246 if (cbuf
->chunk_cnt
< 0)
248 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
250 cbuf
->chunk_cnt
= -1;
253 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
255 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf
->use_cnt
== 1)
265 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
266 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
275 gomp_to_device_kind_p (int kind
)
281 case GOMP_MAP_FORCE_ALLOC
:
282 case GOMP_MAP_FORCE_FROM
:
283 case GOMP_MAP_ALWAYS_FROM
:
290 attribute_hidden
void
291 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
292 struct goacc_asyncqueue
*aq
,
293 void *d
, const void *h
, size_t sz
,
294 struct gomp_coalesce_buf
*cbuf
)
298 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
299 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
302 long last
= cbuf
->chunk_cnt
- 1;
303 while (first
<= last
)
305 long middle
= (first
+ last
) >> 1;
306 if (cbuf
->chunks
[middle
].end
<= doff
)
308 else if (cbuf
->chunks
[middle
].start
<= doff
)
310 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
321 if (__builtin_expect (aq
!= NULL
, 0))
322 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
323 "dev", d
, "host", h
, sz
, aq
);
325 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
328 attribute_hidden
void
329 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
330 struct goacc_asyncqueue
*aq
,
331 void *h
, const void *d
, size_t sz
)
333 if (__builtin_expect (aq
!= NULL
, 0))
334 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
335 "host", h
, "dev", d
, sz
, aq
);
337 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
341 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
343 if (!devicep
->free_func (devicep
->target_id
, devptr
))
345 gomp_mutex_unlock (&devicep
->lock
);
346 gomp_fatal ("error in freeing device memory block at %p", devptr
);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
355 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
356 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
357 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
358 unsigned char kind
, bool always_to_flag
,
359 struct gomp_coalesce_buf
*cbuf
)
361 assert (kind
!= GOMP_MAP_ATTACH
);
364 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
365 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
366 tgt_var
->is_attach
= false;
367 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
368 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
370 if ((kind
& GOMP_MAP_FLAG_FORCE
)
371 || oldn
->host_start
> newn
->host_start
372 || oldn
->host_end
< newn
->host_end
)
374 gomp_mutex_unlock (&devicep
->lock
);
375 gomp_fatal ("Trying to map into device [%p..%p) object when "
376 "[%p..%p) is already mapped",
377 (void *) newn
->host_start
, (void *) newn
->host_end
,
378 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
381 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
382 gomp_copy_host2dev (devicep
, aq
,
383 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
384 + newn
->host_start
- oldn
->host_start
),
385 (void *) newn
->host_start
,
386 newn
->host_end
- newn
->host_start
, cbuf
);
388 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
393 get_kind (bool short_mapkind
, void *kinds
, int idx
)
395 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
396 : ((unsigned char *) kinds
)[idx
];
400 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
401 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
402 struct gomp_coalesce_buf
*cbuf
)
404 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
405 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
406 struct splay_tree_key_s cur_node
;
408 cur_node
.host_start
= host_ptr
;
409 if (cur_node
.host_start
== (uintptr_t) NULL
)
411 cur_node
.tgt_offset
= (uintptr_t) NULL
;
412 gomp_copy_host2dev (devicep
, aq
,
413 (void *) (tgt
->tgt_start
+ target_offset
),
414 (void *) &cur_node
.tgt_offset
,
415 sizeof (void *), cbuf
);
418 /* Add bias to the pointer value. */
419 cur_node
.host_start
+= bias
;
420 cur_node
.host_end
= cur_node
.host_start
;
421 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
424 gomp_mutex_unlock (&devicep
->lock
);
425 gomp_fatal ("Pointer target of array section wasn't mapped");
427 cur_node
.host_start
-= n
->host_start
;
429 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
430 /* At this point tgt_offset is target address of the
431 array section. Now subtract bias to get what we want
432 to initialize the pointer with. */
433 cur_node
.tgt_offset
-= bias
;
434 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
435 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
439 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
440 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
441 size_t first
, size_t i
, void **hostaddrs
,
442 size_t *sizes
, void *kinds
,
443 struct gomp_coalesce_buf
*cbuf
)
445 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
446 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
447 struct splay_tree_key_s cur_node
;
449 const bool short_mapkind
= true;
450 const int typemask
= short_mapkind
? 0xff : 0x7;
452 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
453 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
454 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
455 kind
= get_kind (short_mapkind
, kinds
, i
);
458 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
460 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
461 kind
& typemask
, false, cbuf
);
466 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
468 cur_node
.host_start
--;
469 n2
= splay_tree_lookup (mem_map
, &cur_node
);
470 cur_node
.host_start
++;
473 && n2
->host_start
- n
->host_start
474 == n2
->tgt_offset
- n
->tgt_offset
)
476 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
477 kind
& typemask
, false, cbuf
);
482 n2
= splay_tree_lookup (mem_map
, &cur_node
);
486 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
488 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
489 kind
& typemask
, false, cbuf
);
493 gomp_mutex_unlock (&devicep
->lock
);
494 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
495 "other mapped elements from the same structure weren't mapped "
496 "together with it", (void *) cur_node
.host_start
,
497 (void *) cur_node
.host_end
);
500 attribute_hidden
void
501 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
502 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
503 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
504 struct gomp_coalesce_buf
*cbufp
)
506 struct splay_tree_key_s s
;
511 gomp_mutex_unlock (&devicep
->lock
);
512 gomp_fatal ("enclosing struct not mapped for attach");
515 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
516 /* We might have a pointer in a packed struct: however we cannot have more
517 than one such pointer in each pointer-sized portion of the struct, so
519 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
522 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
524 if (!n
->aux
->attach_count
)
526 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
528 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
529 n
->aux
->attach_count
[idx
]++;
532 gomp_mutex_unlock (&devicep
->lock
);
533 gomp_fatal ("attach count overflow");
536 if (n
->aux
->attach_count
[idx
] == 1)
538 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
540 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
544 if ((void *) target
== NULL
)
546 gomp_mutex_unlock (&devicep
->lock
);
547 gomp_fatal ("attempt to attach null pointer");
550 s
.host_start
= target
+ bias
;
551 s
.host_end
= s
.host_start
+ 1;
552 tn
= splay_tree_lookup (mem_map
, &s
);
556 gomp_mutex_unlock (&devicep
->lock
);
557 gomp_fatal ("pointer target not mapped for attach");
560 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
563 "%s: attaching host %p, target %p (struct base %p) to %p\n",
564 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
565 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
567 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
568 sizeof (void *), cbufp
);
571 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
572 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
575 attribute_hidden
void
576 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
577 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
578 uintptr_t detach_from
, bool finalize
,
579 struct gomp_coalesce_buf
*cbufp
)
585 gomp_mutex_unlock (&devicep
->lock
);
586 gomp_fatal ("enclosing struct not mapped for detach");
589 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
591 if (!n
->aux
|| !n
->aux
->attach_count
)
593 gomp_mutex_unlock (&devicep
->lock
);
594 gomp_fatal ("no attachment counters for struct");
598 n
->aux
->attach_count
[idx
] = 1;
600 if (n
->aux
->attach_count
[idx
] == 0)
602 gomp_mutex_unlock (&devicep
->lock
);
603 gomp_fatal ("attach count underflow");
606 n
->aux
->attach_count
[idx
]--;
608 if (n
->aux
->attach_count
[idx
] == 0)
610 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
612 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
615 "%s: detaching host %p, target %p (struct base %p) to %p\n",
616 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
617 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
620 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
621 sizeof (void *), cbufp
);
624 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
625 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
628 attribute_hidden
uintptr_t
629 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
631 if (tgt
->list
[i
].key
!= NULL
)
632 return tgt
->list
[i
].key
->tgt
->tgt_start
633 + tgt
->list
[i
].key
->tgt_offset
634 + tgt
->list
[i
].offset
;
636 switch (tgt
->list
[i
].offset
)
639 return (uintptr_t) hostaddrs
[i
];
645 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
646 + tgt
->list
[i
+ 1].key
->tgt_offset
647 + tgt
->list
[i
+ 1].offset
648 + (uintptr_t) hostaddrs
[i
]
649 - (uintptr_t) hostaddrs
[i
+ 1];
652 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
656 static inline __attribute__((always_inline
)) struct target_mem_desc
*
657 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
658 struct goacc_asyncqueue
*aq
, size_t mapnum
,
659 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
660 void *kinds
, bool short_mapkind
,
661 enum gomp_map_vars_kind pragma_kind
)
663 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
664 bool has_firstprivate
= false;
665 bool has_always_ptrset
= false;
666 const int rshift
= short_mapkind
? 8 : 3;
667 const int typemask
= short_mapkind
? 0xff : 0x7;
668 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
669 struct splay_tree_key_s cur_node
;
670 struct target_mem_desc
*tgt
671 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
672 tgt
->list_count
= mapnum
;
673 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
674 tgt
->device_descr
= devicep
;
676 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
685 tgt_align
= sizeof (void *);
691 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
693 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
694 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
697 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
699 size_t align
= 4 * sizeof (void *);
701 tgt_size
= mapnum
* sizeof (void *);
703 cbuf
.use_cnt
= 1 + (mapnum
> 1);
704 cbuf
.chunks
[0].start
= 0;
705 cbuf
.chunks
[0].end
= tgt_size
;
708 gomp_mutex_lock (&devicep
->lock
);
709 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
711 gomp_mutex_unlock (&devicep
->lock
);
716 for (i
= 0; i
< mapnum
; i
++)
718 int kind
= get_kind (short_mapkind
, kinds
, i
);
719 if (hostaddrs
[i
] == NULL
720 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
722 tgt
->list
[i
].key
= NULL
;
723 tgt
->list
[i
].offset
= OFFSET_INLINED
;
726 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
727 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
729 tgt
->list
[i
].key
= NULL
;
732 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
733 on a separate construct prior to using use_device_{addr,ptr}.
734 In OpenMP 5.0, map directives need to be ordered by the
735 middle-end before the use_device_* clauses. If
736 !not_found_cnt, all mappings requested (if any) are already
737 mapped, so use_device_{addr,ptr} can be resolved right away.
738 Otherwise, if not_found_cnt, gomp_map_lookup might fail
739 now but would succeed after performing the mappings in the
740 following loop. We can't defer this always to the second
741 loop, because it is not even invoked when !not_found_cnt
742 after the first loop. */
743 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
744 cur_node
.host_end
= cur_node
.host_start
;
745 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
748 cur_node
.host_start
-= n
->host_start
;
750 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
751 + cur_node
.host_start
);
753 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
755 gomp_mutex_unlock (&devicep
->lock
);
756 gomp_fatal ("use_device_ptr pointer wasn't mapped");
758 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
759 /* If not present, continue using the host address. */
762 __builtin_unreachable ();
763 tgt
->list
[i
].offset
= OFFSET_INLINED
;
766 tgt
->list
[i
].offset
= 0;
769 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
771 size_t first
= i
+ 1;
772 size_t last
= i
+ sizes
[i
];
773 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
774 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
776 tgt
->list
[i
].key
= NULL
;
777 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
778 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
781 size_t align
= (size_t) 1 << (kind
>> rshift
);
782 if (tgt_align
< align
)
784 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
785 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
786 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
787 not_found_cnt
+= last
- i
;
788 for (i
= first
; i
<= last
; i
++)
790 tgt
->list
[i
].key
= NULL
;
791 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
793 gomp_coalesce_buf_add (&cbuf
,
794 tgt_size
- cur_node
.host_end
795 + (uintptr_t) hostaddrs
[i
],
801 for (i
= first
; i
<= last
; i
++)
802 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
807 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
809 tgt
->list
[i
].key
= NULL
;
810 tgt
->list
[i
].offset
= OFFSET_POINTER
;
811 has_firstprivate
= true;
814 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
816 tgt
->list
[i
].key
= NULL
;
817 has_firstprivate
= true;
820 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
821 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
822 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
824 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
825 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
827 tgt
->list
[i
].key
= NULL
;
829 size_t align
= (size_t) 1 << (kind
>> rshift
);
830 if (tgt_align
< align
)
832 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
833 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
834 cur_node
.host_end
- cur_node
.host_start
);
835 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
836 has_firstprivate
= true;
840 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
842 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
845 tgt
->list
[i
].key
= NULL
;
846 tgt
->list
[i
].offset
= OFFSET_POINTER
;
851 n
= splay_tree_lookup (mem_map
, &cur_node
);
852 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
854 int always_to_cnt
= 0;
855 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
857 bool has_nullptr
= false;
859 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
860 if (n
->tgt
->list
[j
].key
== n
)
862 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
865 if (n
->tgt
->list_count
== 0)
867 /* 'declare target'; assume has_nullptr; it could also be
868 statically assigned pointer, but that it should be to
869 the equivalent variable on the host. */
870 assert (n
->refcount
== REFCOUNT_INFINITY
);
874 assert (j
< n
->tgt
->list_count
);
875 /* Re-map the data if there is an 'always' modifier or if it a
876 null pointer was there and non a nonnull has been found; that
877 permits transparent re-mapping for Fortran array descriptors
878 which were previously mapped unallocated. */
879 for (j
= i
+ 1; j
< mapnum
; j
++)
881 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
882 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
884 || !GOMP_MAP_POINTER_P (ptr_kind
)
885 || *(void **) hostaddrs
[j
] == NULL
))
887 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
888 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
889 > cur_node
.host_end
))
893 has_always_ptrset
= true;
898 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
899 kind
& typemask
, always_to_cnt
> 0, NULL
);
904 tgt
->list
[i
].key
= NULL
;
906 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
908 /* Not present, hence, skip entry - including its MAP_POINTER,
910 tgt
->list
[i
].offset
= OFFSET_POINTER
;
912 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
913 == GOMP_MAP_POINTER
))
916 tgt
->list
[i
].key
= NULL
;
917 tgt
->list
[i
].offset
= 0;
921 size_t align
= (size_t) 1 << (kind
>> rshift
);
923 if (tgt_align
< align
)
925 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
926 if (gomp_to_device_kind_p (kind
& typemask
))
927 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
928 cur_node
.host_end
- cur_node
.host_start
);
929 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
930 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
934 for (j
= i
+ 1; j
< mapnum
; j
++)
935 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
936 kinds
, j
)) & typemask
))
937 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
939 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
940 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
941 > cur_node
.host_end
))
945 tgt
->list
[j
].key
= NULL
;
956 gomp_mutex_unlock (&devicep
->lock
);
957 gomp_fatal ("unexpected aggregation");
959 tgt
->to_free
= devaddrs
[0];
960 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
961 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
963 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
965 /* Allocate tgt_align aligned tgt_size block of memory. */
966 /* FIXME: Perhaps change interface to allocate properly aligned
968 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
969 tgt_size
+ tgt_align
- 1);
972 gomp_mutex_unlock (&devicep
->lock
);
973 gomp_fatal ("device memory allocation fail");
976 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
977 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
978 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
980 if (cbuf
.use_cnt
== 1)
982 if (cbuf
.chunk_cnt
> 0)
985 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1001 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1002 tgt_size
= mapnum
* sizeof (void *);
1005 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1008 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1009 splay_tree_node array
= tgt
->array
;
1010 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
1011 uintptr_t field_tgt_base
= 0;
1013 for (i
= 0; i
< mapnum
; i
++)
1014 if (has_always_ptrset
1016 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1017 == GOMP_MAP_TO_PSET
)
1019 splay_tree_key k
= tgt
->list
[i
].key
;
1020 bool has_nullptr
= false;
1022 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1023 if (k
->tgt
->list
[j
].key
== k
)
1025 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1028 if (k
->tgt
->list_count
== 0)
1031 assert (j
< k
->tgt
->list_count
);
1033 tgt
->list
[i
].has_null_ptr_assoc
= false;
1034 for (j
= i
+ 1; j
< mapnum
; j
++)
1036 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1037 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1039 || !GOMP_MAP_POINTER_P (ptr_kind
)
1040 || *(void **) hostaddrs
[j
] == NULL
))
1042 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1043 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1048 if (*(void **) hostaddrs
[j
] == NULL
)
1049 tgt
->list
[i
].has_null_ptr_assoc
= true;
1050 tgt
->list
[j
].key
= k
;
1051 tgt
->list
[j
].copy_from
= false;
1052 tgt
->list
[j
].always_copy_from
= false;
1053 tgt
->list
[j
].is_attach
= false;
1054 if (k
->refcount
!= REFCOUNT_INFINITY
)
1056 gomp_map_pointer (k
->tgt
, aq
,
1057 (uintptr_t) *(void **) hostaddrs
[j
],
1058 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1065 else if (tgt
->list
[i
].key
== NULL
)
1067 int kind
= get_kind (short_mapkind
, kinds
, i
);
1068 if (hostaddrs
[i
] == NULL
)
1070 switch (kind
& typemask
)
1072 size_t align
, len
, first
, last
;
1074 case GOMP_MAP_FIRSTPRIVATE
:
1075 align
= (size_t) 1 << (kind
>> rshift
);
1076 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1077 tgt
->list
[i
].offset
= tgt_size
;
1079 gomp_copy_host2dev (devicep
, aq
,
1080 (void *) (tgt
->tgt_start
+ tgt_size
),
1081 (void *) hostaddrs
[i
], len
, cbufp
);
1084 case GOMP_MAP_FIRSTPRIVATE_INT
:
1085 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1087 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1088 /* The OpenACC 'host_data' construct only allows 'use_device'
1089 "mapping" clauses, so in the first loop, 'not_found_cnt'
1090 must always have been zero, so all OpenACC 'use_device'
1091 clauses have already been handled. (We can only easily test
1092 'use_device' with 'if_present' clause here.) */
1093 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1094 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1095 code conceptually simple, similar to the first loop. */
1096 case GOMP_MAP_USE_DEVICE_PTR
:
1097 if (tgt
->list
[i
].offset
== 0)
1099 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1100 cur_node
.host_end
= cur_node
.host_start
;
1101 n
= gomp_map_lookup (mem_map
, &cur_node
);
1104 cur_node
.host_start
-= n
->host_start
;
1106 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1107 + cur_node
.host_start
);
1109 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1111 gomp_mutex_unlock (&devicep
->lock
);
1112 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1114 else if ((kind
& typemask
)
1115 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1116 /* If not present, continue using the host address. */
1119 __builtin_unreachable ();
1120 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1123 case GOMP_MAP_STRUCT
:
1125 last
= i
+ sizes
[i
];
1126 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1127 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1129 if (tgt
->list
[first
].key
!= NULL
)
1131 n
= splay_tree_lookup (mem_map
, &cur_node
);
1134 size_t align
= (size_t) 1 << (kind
>> rshift
);
1135 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1136 - (uintptr_t) hostaddrs
[i
];
1137 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1138 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1139 - (uintptr_t) hostaddrs
[i
];
1140 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1141 field_tgt_offset
= tgt_size
;
1142 field_tgt_clear
= last
;
1143 tgt_size
+= cur_node
.host_end
1144 - (uintptr_t) hostaddrs
[first
];
1147 for (i
= first
; i
<= last
; i
++)
1148 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1149 sizes
, kinds
, cbufp
);
1152 case GOMP_MAP_ALWAYS_POINTER
:
1153 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1154 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1155 n
= splay_tree_lookup (mem_map
, &cur_node
);
1157 || n
->host_start
> cur_node
.host_start
1158 || n
->host_end
< cur_node
.host_end
)
1160 gomp_mutex_unlock (&devicep
->lock
);
1161 gomp_fatal ("always pointer not mapped");
1163 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1164 != GOMP_MAP_ALWAYS_POINTER
)
1165 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1166 if (cur_node
.tgt_offset
)
1167 cur_node
.tgt_offset
-= sizes
[i
];
1168 gomp_copy_host2dev (devicep
, aq
,
1169 (void *) (n
->tgt
->tgt_start
1171 + cur_node
.host_start
1173 (void *) &cur_node
.tgt_offset
,
1174 sizeof (void *), cbufp
);
1175 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1176 + cur_node
.host_start
- n
->host_start
;
1178 case GOMP_MAP_IF_PRESENT
:
1179 /* Not present - otherwise handled above. Skip over its
1180 MAP_POINTER as well. */
1182 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1183 == GOMP_MAP_POINTER
))
1186 case GOMP_MAP_ATTACH
:
1188 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1189 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1190 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1193 tgt
->list
[i
].key
= n
;
1194 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1195 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1196 tgt
->list
[i
].copy_from
= false;
1197 tgt
->list
[i
].always_copy_from
= false;
1198 tgt
->list
[i
].is_attach
= true;
1199 /* OpenACC 'attach'/'detach' doesn't affect
1200 structured/dynamic reference counts ('n->refcount',
1201 'n->dynamic_refcount'). */
1205 gomp_mutex_unlock (&devicep
->lock
);
1206 gomp_fatal ("outer struct not mapped for attach");
1208 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1209 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1216 splay_tree_key k
= &array
->key
;
1217 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1218 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1219 k
->host_end
= k
->host_start
+ sizes
[i
];
1221 k
->host_end
= k
->host_start
+ sizeof (void *);
1222 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1223 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1224 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1225 kind
& typemask
, false, cbufp
);
1229 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1231 /* Replace target address of the pointer with target address
1232 of mapped object in the splay tree. */
1233 splay_tree_remove (mem_map
, n
);
1235 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1236 k
->aux
->link_key
= n
;
1238 size_t align
= (size_t) 1 << (kind
>> rshift
);
1239 tgt
->list
[i
].key
= k
;
1241 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1243 k
->tgt_offset
= k
->host_start
- field_tgt_base
1245 if (i
== field_tgt_clear
)
1246 field_tgt_clear
= FIELD_TGT_EMPTY
;
1250 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1251 k
->tgt_offset
= tgt_size
;
1252 tgt_size
+= k
->host_end
- k
->host_start
;
1254 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1255 tgt
->list
[i
].always_copy_from
1256 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1257 tgt
->list
[i
].is_attach
= false;
1258 tgt
->list
[i
].offset
= 0;
1259 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1261 k
->dynamic_refcount
= 0;
1264 array
->right
= NULL
;
1265 splay_tree_insert (mem_map
, array
);
1266 switch (kind
& typemask
)
1268 case GOMP_MAP_ALLOC
:
1270 case GOMP_MAP_FORCE_ALLOC
:
1271 case GOMP_MAP_FORCE_FROM
:
1272 case GOMP_MAP_ALWAYS_FROM
:
1275 case GOMP_MAP_TOFROM
:
1276 case GOMP_MAP_FORCE_TO
:
1277 case GOMP_MAP_FORCE_TOFROM
:
1278 case GOMP_MAP_ALWAYS_TO
:
1279 case GOMP_MAP_ALWAYS_TOFROM
:
1280 gomp_copy_host2dev (devicep
, aq
,
1281 (void *) (tgt
->tgt_start
1283 (void *) k
->host_start
,
1284 k
->host_end
- k
->host_start
, cbufp
);
1286 case GOMP_MAP_POINTER
:
1287 gomp_map_pointer (tgt
, aq
,
1288 (uintptr_t) *(void **) k
->host_start
,
1289 k
->tgt_offset
, sizes
[i
], cbufp
);
1291 case GOMP_MAP_TO_PSET
:
1292 gomp_copy_host2dev (devicep
, aq
,
1293 (void *) (tgt
->tgt_start
1295 (void *) k
->host_start
,
1296 k
->host_end
- k
->host_start
, cbufp
);
1297 tgt
->list
[i
].has_null_ptr_assoc
= false;
1299 for (j
= i
+ 1; j
< mapnum
; j
++)
1301 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1303 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1304 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1306 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1307 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1312 tgt
->list
[j
].key
= k
;
1313 tgt
->list
[j
].copy_from
= false;
1314 tgt
->list
[j
].always_copy_from
= false;
1315 tgt
->list
[j
].is_attach
= false;
1316 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1317 if (k
->refcount
!= REFCOUNT_INFINITY
)
1319 gomp_map_pointer (tgt
, aq
,
1320 (uintptr_t) *(void **) hostaddrs
[j
],
1322 + ((uintptr_t) hostaddrs
[j
]
1329 case GOMP_MAP_FORCE_PRESENT
:
1331 /* We already looked up the memory region above and it
1333 size_t size
= k
->host_end
- k
->host_start
;
1334 gomp_mutex_unlock (&devicep
->lock
);
1335 #ifdef HAVE_INTTYPES_H
1336 gomp_fatal ("present clause: !acc_is_present (%p, "
1337 "%"PRIu64
" (0x%"PRIx64
"))",
1338 (void *) k
->host_start
,
1339 (uint64_t) size
, (uint64_t) size
);
1341 gomp_fatal ("present clause: !acc_is_present (%p, "
1342 "%lu (0x%lx))", (void *) k
->host_start
,
1343 (unsigned long) size
, (unsigned long) size
);
1347 case GOMP_MAP_FORCE_DEVICEPTR
:
1348 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1349 gomp_copy_host2dev (devicep
, aq
,
1350 (void *) (tgt
->tgt_start
1352 (void *) k
->host_start
,
1353 sizeof (void *), cbufp
);
1356 gomp_mutex_unlock (&devicep
->lock
);
1357 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1361 if (k
->aux
&& k
->aux
->link_key
)
1363 /* Set link pointer on target to the device address of the
1365 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1366 /* We intentionally do not use coalescing here, as it's not
1367 data allocated by the current call to this function. */
1368 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1369 &tgt_addr
, sizeof (void *), NULL
);
1376 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1378 for (i
= 0; i
< mapnum
; i
++)
1380 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1381 gomp_copy_host2dev (devicep
, aq
,
1382 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1383 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1391 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1392 gomp_copy_host2dev (devicep
, aq
,
1393 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1394 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1395 - cbuf
.chunks
[0].start
),
1396 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1402 /* If the variable from "omp target enter data" map-list was already mapped,
1403 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1405 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
1411 gomp_mutex_unlock (&devicep
->lock
);
1415 attribute_hidden
struct target_mem_desc
*
1416 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1417 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1418 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1420 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1421 sizes
, kinds
, short_mapkind
, pragma_kind
);
1424 attribute_hidden
struct target_mem_desc
*
1425 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1426 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1427 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1428 void *kinds
, bool short_mapkind
,
1429 enum gomp_map_vars_kind pragma_kind
)
1431 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1432 sizes
, kinds
, short_mapkind
, pragma_kind
);
1436 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1438 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1440 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1447 gomp_unref_tgt (void *ptr
)
1449 bool is_tgt_unmapped
= false;
1451 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1453 if (tgt
->refcount
> 1)
1457 gomp_unmap_tgt (tgt
);
1458 is_tgt_unmapped
= true;
1461 return is_tgt_unmapped
;
1465 gomp_unref_tgt_void (void *ptr
)
1467 (void) gomp_unref_tgt (ptr
);
1470 static inline __attribute__((always_inline
)) bool
1471 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1472 struct goacc_asyncqueue
*aq
)
1474 bool is_tgt_unmapped
= false;
1475 splay_tree_remove (&devicep
->mem_map
, k
);
1478 if (k
->aux
->link_key
)
1479 splay_tree_insert (&devicep
->mem_map
,
1480 (splay_tree_node
) k
->aux
->link_key
);
1481 if (k
->aux
->attach_count
)
1482 free (k
->aux
->attach_count
);
1487 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1490 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1491 return is_tgt_unmapped
;
1494 attribute_hidden
bool
1495 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1497 return gomp_remove_var_internal (devicep
, k
, NULL
);
1500 /* Remove a variable asynchronously. This actually removes the variable
1501 mapping immediately, but retains the linked target_mem_desc until the
1502 asynchronous operation has completed (as it may still refer to target
1503 memory). The device lock must be held before entry, and remains locked on
1506 attribute_hidden
void
1507 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1508 struct goacc_asyncqueue
*aq
)
1510 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1513 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1514 variables back from device to host: if it is false, it is assumed that this
1515 has been done already. */
1517 static inline __attribute__((always_inline
)) void
1518 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1519 struct goacc_asyncqueue
*aq
)
1521 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1523 if (tgt
->list_count
== 0)
1529 gomp_mutex_lock (&devicep
->lock
);
1530 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1532 gomp_mutex_unlock (&devicep
->lock
);
1540 /* We must perform detachments before any copies back to the host. */
1541 for (i
= 0; i
< tgt
->list_count
; i
++)
1543 splay_tree_key k
= tgt
->list
[i
].key
;
1545 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1546 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1547 + tgt
->list
[i
].offset
,
1551 for (i
= 0; i
< tgt
->list_count
; i
++)
1553 splay_tree_key k
= tgt
->list
[i
].key
;
1557 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1558 counts ('n->refcount', 'n->dynamic_refcount'). */
1559 if (tgt
->list
[i
].is_attach
)
1562 bool do_unmap
= false;
1563 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1565 else if (k
->refcount
== 1)
1571 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1572 || tgt
->list
[i
].always_copy_from
)
1573 gomp_copy_dev2host (devicep
, aq
,
1574 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1575 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1576 + tgt
->list
[i
].offset
),
1577 tgt
->list
[i
].length
);
1580 struct target_mem_desc
*k_tgt
= k
->tgt
;
1581 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1582 /* It would be bad if TGT got unmapped while we're still iterating
1583 over its LIST_COUNT, and also expect to use it in the following
1585 assert (!is_tgt_unmapped
1591 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1594 gomp_unref_tgt ((void *) tgt
);
1596 gomp_mutex_unlock (&devicep
->lock
);
1599 attribute_hidden
void
1600 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1602 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1605 attribute_hidden
void
1606 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1607 struct goacc_asyncqueue
*aq
)
1609 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1613 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1614 size_t *sizes
, void *kinds
, bool short_mapkind
)
1617 struct splay_tree_key_s cur_node
;
1618 const int typemask
= short_mapkind
? 0xff : 0x7;
1626 gomp_mutex_lock (&devicep
->lock
);
1627 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1629 gomp_mutex_unlock (&devicep
->lock
);
1633 for (i
= 0; i
< mapnum
; i
++)
1636 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1637 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1638 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1641 int kind
= get_kind (short_mapkind
, kinds
, i
);
1642 if (n
->host_start
> cur_node
.host_start
1643 || n
->host_end
< cur_node
.host_end
)
1645 gomp_mutex_unlock (&devicep
->lock
);
1646 gomp_fatal ("Trying to update [%p..%p) object when "
1647 "only [%p..%p) is mapped",
1648 (void *) cur_node
.host_start
,
1649 (void *) cur_node
.host_end
,
1650 (void *) n
->host_start
,
1651 (void *) n
->host_end
);
1655 void *hostaddr
= (void *) cur_node
.host_start
;
1656 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1657 + cur_node
.host_start
- n
->host_start
);
1658 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1660 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1661 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1663 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1664 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1667 gomp_mutex_unlock (&devicep
->lock
);
1670 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1671 And insert to splay tree the mapping between addresses from HOST_TABLE and
1672 from loaded target image. We rely in the host and device compiler
1673 emitting variable and functions in the same order. */
1676 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1677 const void *host_table
, const void *target_data
,
1678 bool is_register_lock
)
1680 void **host_func_table
= ((void ***) host_table
)[0];
1681 void **host_funcs_end
= ((void ***) host_table
)[1];
1682 void **host_var_table
= ((void ***) host_table
)[2];
1683 void **host_vars_end
= ((void ***) host_table
)[3];
1685 /* The func table contains only addresses, the var table contains addresses
1686 and corresponding sizes. */
1687 int num_funcs
= host_funcs_end
- host_func_table
;
1688 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1690 /* Load image to device and get target addresses for the image. */
1691 struct addr_pair
*target_table
= NULL
;
1692 int i
, num_target_entries
;
1695 = devicep
->load_image_func (devicep
->target_id
, version
,
1696 target_data
, &target_table
);
1698 if (num_target_entries
!= num_funcs
+ num_vars
)
1700 gomp_mutex_unlock (&devicep
->lock
);
1701 if (is_register_lock
)
1702 gomp_mutex_unlock (®ister_lock
);
1703 gomp_fatal ("Cannot map target functions or variables"
1704 " (expected %u, have %u)", num_funcs
+ num_vars
,
1705 num_target_entries
);
1708 /* Insert host-target address mapping into splay tree. */
1709 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1710 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1711 tgt
->refcount
= REFCOUNT_INFINITY
;
1714 tgt
->to_free
= NULL
;
1716 tgt
->list_count
= 0;
1717 tgt
->device_descr
= devicep
;
1718 splay_tree_node array
= tgt
->array
;
1720 for (i
= 0; i
< num_funcs
; i
++)
1722 splay_tree_key k
= &array
->key
;
1723 k
->host_start
= (uintptr_t) host_func_table
[i
];
1724 k
->host_end
= k
->host_start
+ 1;
1726 k
->tgt_offset
= target_table
[i
].start
;
1727 k
->refcount
= REFCOUNT_INFINITY
;
1728 k
->dynamic_refcount
= 0;
1731 array
->right
= NULL
;
1732 splay_tree_insert (&devicep
->mem_map
, array
);
1736 /* Most significant bit of the size in host and target tables marks
1737 "omp declare target link" variables. */
1738 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1739 const uintptr_t size_mask
= ~link_bit
;
1741 for (i
= 0; i
< num_vars
; i
++)
1743 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1744 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1745 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
1747 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1749 gomp_mutex_unlock (&devicep
->lock
);
1750 if (is_register_lock
)
1751 gomp_mutex_unlock (®ister_lock
);
1752 gomp_fatal ("Cannot map target variables (size mismatch)");
1755 splay_tree_key k
= &array
->key
;
1756 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1758 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1760 k
->tgt_offset
= target_var
->start
;
1761 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1762 k
->dynamic_refcount
= 0;
1765 array
->right
= NULL
;
1766 splay_tree_insert (&devicep
->mem_map
, array
);
1770 free (target_table
);
1773 /* Unload the mappings described by target_data from device DEVICE_P.
1774 The device must be locked. */
1777 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1779 const void *host_table
, const void *target_data
)
1781 void **host_func_table
= ((void ***) host_table
)[0];
1782 void **host_funcs_end
= ((void ***) host_table
)[1];
1783 void **host_var_table
= ((void ***) host_table
)[2];
1784 void **host_vars_end
= ((void ***) host_table
)[3];
1786 /* The func table contains only addresses, the var table contains addresses
1787 and corresponding sizes. */
1788 int num_funcs
= host_funcs_end
- host_func_table
;
1789 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1791 struct splay_tree_key_s k
;
1792 splay_tree_key node
= NULL
;
1794 /* Find mapping at start of node array */
1795 if (num_funcs
|| num_vars
)
1797 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1798 : (uintptr_t) host_var_table
[0]);
1799 k
.host_end
= k
.host_start
+ 1;
1800 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1803 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1805 gomp_mutex_unlock (&devicep
->lock
);
1806 gomp_fatal ("image unload fail");
1809 /* Remove mappings from splay tree. */
1811 for (i
= 0; i
< num_funcs
; i
++)
1813 k
.host_start
= (uintptr_t) host_func_table
[i
];
1814 k
.host_end
= k
.host_start
+ 1;
1815 splay_tree_remove (&devicep
->mem_map
, &k
);
1818 /* Most significant bit of the size in host and target tables marks
1819 "omp declare target link" variables. */
1820 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1821 const uintptr_t size_mask
= ~link_bit
;
1822 bool is_tgt_unmapped
= false;
1824 for (i
= 0; i
< num_vars
; i
++)
1826 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1828 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1830 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1831 splay_tree_remove (&devicep
->mem_map
, &k
);
1834 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1835 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1839 if (node
&& !is_tgt_unmapped
)
1846 /* This function should be called from every offload image while loading.
1847 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1848 the target, and TARGET_DATA needed by target plugin. */
1851 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1852 int target_type
, const void *target_data
)
1856 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1857 gomp_fatal ("Library too old for offload (version %u < %u)",
1858 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1860 gomp_mutex_lock (®ister_lock
);
1862 /* Load image to all initialized devices. */
1863 for (i
= 0; i
< num_devices
; i
++)
1865 struct gomp_device_descr
*devicep
= &devices
[i
];
1866 gomp_mutex_lock (&devicep
->lock
);
1867 if (devicep
->type
== target_type
1868 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1869 gomp_load_image_to_device (devicep
, version
,
1870 host_table
, target_data
, true);
1871 gomp_mutex_unlock (&devicep
->lock
);
1874 /* Insert image to array of pending images. */
1876 = gomp_realloc_unlock (offload_images
,
1877 (num_offload_images
+ 1)
1878 * sizeof (struct offload_image_descr
));
1879 offload_images
[num_offload_images
].version
= version
;
1880 offload_images
[num_offload_images
].type
= target_type
;
1881 offload_images
[num_offload_images
].host_table
= host_table
;
1882 offload_images
[num_offload_images
].target_data
= target_data
;
1884 num_offload_images
++;
1885 gomp_mutex_unlock (®ister_lock
);
1889 GOMP_offload_register (const void *host_table
, int target_type
,
1890 const void *target_data
)
1892 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1895 /* This function should be called from every offload image while unloading.
1896 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1897 the target, and TARGET_DATA needed by target plugin. */
1900 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1901 int target_type
, const void *target_data
)
1905 gomp_mutex_lock (®ister_lock
);
1907 /* Unload image from all initialized devices. */
1908 for (i
= 0; i
< num_devices
; i
++)
1910 struct gomp_device_descr
*devicep
= &devices
[i
];
1911 gomp_mutex_lock (&devicep
->lock
);
1912 if (devicep
->type
== target_type
1913 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1914 gomp_unload_image_from_device (devicep
, version
,
1915 host_table
, target_data
);
1916 gomp_mutex_unlock (&devicep
->lock
);
1919 /* Remove image from array of pending images. */
1920 for (i
= 0; i
< num_offload_images
; i
++)
1921 if (offload_images
[i
].target_data
== target_data
)
1923 offload_images
[i
] = offload_images
[--num_offload_images
];
1927 gomp_mutex_unlock (®ister_lock
);
1931 GOMP_offload_unregister (const void *host_table
, int target_type
,
1932 const void *target_data
)
1934 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1937 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1938 must be locked on entry, and remains locked on return. */
1940 attribute_hidden
void
1941 gomp_init_device (struct gomp_device_descr
*devicep
)
1944 if (!devicep
->init_device_func (devicep
->target_id
))
1946 gomp_mutex_unlock (&devicep
->lock
);
1947 gomp_fatal ("device initialization failed");
1950 /* Load to device all images registered by the moment. */
1951 for (i
= 0; i
< num_offload_images
; i
++)
1953 struct offload_image_descr
*image
= &offload_images
[i
];
1954 if (image
->type
== devicep
->type
)
1955 gomp_load_image_to_device (devicep
, image
->version
,
1956 image
->host_table
, image
->target_data
,
1960 /* Initialize OpenACC asynchronous queues. */
1961 goacc_init_asyncqueues (devicep
);
1963 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1966 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1967 must be locked on entry, and remains locked on return. */
1969 attribute_hidden
bool
1970 gomp_fini_device (struct gomp_device_descr
*devicep
)
1972 bool ret
= goacc_fini_asyncqueues (devicep
);
1973 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1974 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1978 attribute_hidden
void
1979 gomp_unload_device (struct gomp_device_descr
*devicep
)
1981 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1985 /* Unload from device all images registered at the moment. */
1986 for (i
= 0; i
< num_offload_images
; i
++)
1988 struct offload_image_descr
*image
= &offload_images
[i
];
1989 if (image
->type
== devicep
->type
)
1990 gomp_unload_image_from_device (devicep
, image
->version
,
1992 image
->target_data
);
1997 /* Host fallback for GOMP_target{,_ext} routines. */
2000 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
2002 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2004 memset (thr
, '\0', sizeof (*thr
));
2005 if (gomp_places_list
)
2007 thr
->place
= old_thr
.place
;
2008 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2011 gomp_free_thread (thr
);
2015 /* Calculate alignment and size requirements of a private copy of data shared
2016 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2019 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2020 unsigned short *kinds
, size_t *tgt_align
,
2024 for (i
= 0; i
< mapnum
; i
++)
2025 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2027 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2028 if (*tgt_align
< align
)
2030 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2031 *tgt_size
+= sizes
[i
];
2035 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2038 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2039 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2042 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2044 tgt
+= tgt_align
- al
;
2047 for (i
= 0; i
< mapnum
; i
++)
2048 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2050 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2051 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2052 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2053 hostaddrs
[i
] = tgt
+ tgt_size
;
2054 tgt_size
= tgt_size
+ sizes
[i
];
2058 /* Helper function of GOMP_target{,_ext} routines. */
2061 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2062 void (*host_fn
) (void *))
2064 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2065 return (void *) host_fn
;
2068 gomp_mutex_lock (&devicep
->lock
);
2069 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2071 gomp_mutex_unlock (&devicep
->lock
);
2075 struct splay_tree_key_s k
;
2076 k
.host_start
= (uintptr_t) host_fn
;
2077 k
.host_end
= k
.host_start
+ 1;
2078 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2079 gomp_mutex_unlock (&devicep
->lock
);
2083 return (void *) tgt_fn
->tgt_offset
;
2087 /* Called when encountering a target directive. If DEVICE
2088 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2089 GOMP_DEVICE_HOST_FALLBACK (or any value
2090 larger than last available hw device), use host fallback.
2091 FN is address of host code, UNUSED is part of the current ABI, but
2092 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2093 with MAPNUM entries, with addresses of the host objects,
2094 sizes of the host objects (resp. for pointer kind pointer bias
2095 and assumed sizeof (void *) size) and kinds. */
2098 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2099 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2100 unsigned char *kinds
)
2102 struct gomp_device_descr
*devicep
= resolve_device (device
);
2106 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2107 /* All shared memory devices should use the GOMP_target_ext function. */
2108 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2109 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2110 return gomp_target_fallback (fn
, hostaddrs
);
2112 struct target_mem_desc
*tgt_vars
2113 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2114 GOMP_MAP_VARS_TARGET
);
2115 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2117 gomp_unmap_vars (tgt_vars
, true);
2120 static inline unsigned int
2121 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2123 /* If we cannot run asynchronously, simply ignore nowait. */
2124 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2125 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2130 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2131 and several arguments have been added:
2132 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2133 DEPEND is array of dependencies, see GOMP_task for details.
2135 ARGS is a pointer to an array consisting of a variable number of both
2136 device-independent and device-specific arguments, which can take one two
2137 elements where the first specifies for which device it is intended, the type
2138 and optionally also the value. If the value is not present in the first
2139 one, the whole second element the actual value. The last element of the
2140 array is a single NULL. Among the device independent can be for example
2141 NUM_TEAMS and THREAD_LIMIT.
2143 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2144 that value, or 1 if teams construct is not present, or 0, if
2145 teams construct does not have num_teams clause and so the choice is
2146 implementation defined, and -1 if it can't be determined on the host
2147 what value will GOMP_teams have on the device.
2148 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2149 body with that value, or 0, if teams construct does not have thread_limit
2150 clause or the teams construct is not present, or -1 if it can't be
2151 determined on the host what value will GOMP_teams have on the device. */
2154 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2155 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2156 unsigned int flags
, void **depend
, void **args
)
2158 struct gomp_device_descr
*devicep
= resolve_device (device
);
2159 size_t tgt_align
= 0, tgt_size
= 0;
2160 bool fpc_done
= false;
2162 flags
= clear_unsupported_flags (devicep
, flags
);
2164 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2166 struct gomp_thread
*thr
= gomp_thread ();
2167 /* Create a team if we don't have any around, as nowait
2168 target tasks make sense to run asynchronously even when
2169 outside of any parallel. */
2170 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2172 struct gomp_team
*team
= gomp_new_team (1);
2173 struct gomp_task
*task
= thr
->task
;
2174 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2175 team
->prev_ts
= thr
->ts
;
2176 thr
->ts
.team
= team
;
2177 thr
->ts
.team_id
= 0;
2178 thr
->ts
.work_share
= &team
->work_shares
[0];
2179 thr
->ts
.last_work_share
= NULL
;
2180 #ifdef HAVE_SYNC_BUILTINS
2181 thr
->ts
.single_count
= 0;
2183 thr
->ts
.static_trip
= 0;
2184 thr
->task
= &team
->implicit_task
[0];
2185 gomp_init_task (thr
->task
, NULL
, icv
);
2191 thr
->task
= &team
->implicit_task
[0];
2194 pthread_setspecific (gomp_thread_destructor
, thr
);
2197 && !thr
->task
->final_task
)
2199 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2200 sizes
, kinds
, flags
, depend
, args
,
2201 GOMP_TARGET_TASK_BEFORE_MAP
);
2206 /* If there are depend clauses, but nowait is not present
2207 (or we are in a final task), block the parent task until the
2208 dependencies are resolved and then just continue with the rest
2209 of the function as if it is a merged task. */
2212 struct gomp_thread
*thr
= gomp_thread ();
2213 if (thr
->task
&& thr
->task
->depend_hash
)
2215 /* If we might need to wait, copy firstprivate now. */
2216 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2217 &tgt_align
, &tgt_size
);
2220 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2221 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2222 tgt_align
, tgt_size
);
2225 gomp_task_maybe_wait_for_dependencies (depend
);
2231 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2232 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2233 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2237 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2238 &tgt_align
, &tgt_size
);
2241 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2242 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2243 tgt_align
, tgt_size
);
2246 gomp_target_fallback (fn
, hostaddrs
);
2250 struct target_mem_desc
*tgt_vars
;
2251 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2255 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2256 &tgt_align
, &tgt_size
);
2259 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2260 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2261 tgt_align
, tgt_size
);
2267 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2268 true, GOMP_MAP_VARS_TARGET
);
2269 devicep
->run_func (devicep
->target_id
, fn_addr
,
2270 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2273 gomp_unmap_vars (tgt_vars
, true);
2276 /* Host fallback for GOMP_target_data{,_ext} routines. */
2279 gomp_target_data_fallback (void)
2281 struct gomp_task_icv
*icv
= gomp_icv (false);
2282 if (icv
->target_data
)
2284 /* Even when doing a host fallback, if there are any active
2285 #pragma omp target data constructs, need to remember the
2286 new #pragma omp target data, otherwise GOMP_target_end_data
2287 would get out of sync. */
2288 struct target_mem_desc
*tgt
2289 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2290 GOMP_MAP_VARS_DATA
);
2291 tgt
->prev
= icv
->target_data
;
2292 icv
->target_data
= tgt
;
2297 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2298 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2300 struct gomp_device_descr
*devicep
= resolve_device (device
);
2303 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2304 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2305 return gomp_target_data_fallback ();
2307 struct target_mem_desc
*tgt
2308 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2309 GOMP_MAP_VARS_DATA
);
2310 struct gomp_task_icv
*icv
= gomp_icv (true);
2311 tgt
->prev
= icv
->target_data
;
2312 icv
->target_data
= tgt
;
2316 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2317 size_t *sizes
, unsigned short *kinds
)
2319 struct gomp_device_descr
*devicep
= resolve_device (device
);
2322 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2323 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2324 return gomp_target_data_fallback ();
2326 struct target_mem_desc
*tgt
2327 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2328 GOMP_MAP_VARS_DATA
);
2329 struct gomp_task_icv
*icv
= gomp_icv (true);
2330 tgt
->prev
= icv
->target_data
;
2331 icv
->target_data
= tgt
;
2335 GOMP_target_end_data (void)
2337 struct gomp_task_icv
*icv
= gomp_icv (false);
2338 if (icv
->target_data
)
2340 struct target_mem_desc
*tgt
= icv
->target_data
;
2341 icv
->target_data
= tgt
->prev
;
2342 gomp_unmap_vars (tgt
, true);
2347 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2348 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2350 struct gomp_device_descr
*devicep
= resolve_device (device
);
2353 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2354 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2357 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2361 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2362 size_t *sizes
, unsigned short *kinds
,
2363 unsigned int flags
, void **depend
)
2365 struct gomp_device_descr
*devicep
= resolve_device (device
);
2367 /* If there are depend clauses, but nowait is not present,
2368 block the parent task until the dependencies are resolved
2369 and then just continue with the rest of the function as if it
2370 is a merged task. Until we are able to schedule task during
2371 variable mapping or unmapping, ignore nowait if depend clauses
2375 struct gomp_thread
*thr
= gomp_thread ();
2376 if (thr
->task
&& thr
->task
->depend_hash
)
2378 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2380 && !thr
->task
->final_task
)
2382 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2383 mapnum
, hostaddrs
, sizes
, kinds
,
2384 flags
| GOMP_TARGET_FLAG_UPDATE
,
2385 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2390 struct gomp_team
*team
= thr
->ts
.team
;
2391 /* If parallel or taskgroup has been cancelled, don't start new
2393 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2395 if (gomp_team_barrier_cancelled (&team
->barrier
))
2397 if (thr
->task
->taskgroup
)
2399 if (thr
->task
->taskgroup
->cancelled
)
2401 if (thr
->task
->taskgroup
->workshare
2402 && thr
->task
->taskgroup
->prev
2403 && thr
->task
->taskgroup
->prev
->cancelled
)
2408 gomp_task_maybe_wait_for_dependencies (depend
);
2414 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2415 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2418 struct gomp_thread
*thr
= gomp_thread ();
2419 struct gomp_team
*team
= thr
->ts
.team
;
2420 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2421 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2423 if (gomp_team_barrier_cancelled (&team
->barrier
))
2425 if (thr
->task
->taskgroup
)
2427 if (thr
->task
->taskgroup
->cancelled
)
2429 if (thr
->task
->taskgroup
->workshare
2430 && thr
->task
->taskgroup
->prev
2431 && thr
->task
->taskgroup
->prev
->cancelled
)
2436 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2440 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2441 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2443 const int typemask
= 0xff;
2445 gomp_mutex_lock (&devicep
->lock
);
2446 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2448 gomp_mutex_unlock (&devicep
->lock
);
2452 for (i
= 0; i
< mapnum
; i
++)
2454 struct splay_tree_key_s cur_node
;
2455 unsigned char kind
= kinds
[i
] & typemask
;
2459 case GOMP_MAP_ALWAYS_FROM
:
2460 case GOMP_MAP_DELETE
:
2461 case GOMP_MAP_RELEASE
:
2462 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2463 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2464 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2465 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2466 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2467 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2468 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2469 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2473 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2475 if ((kind
== GOMP_MAP_DELETE
2476 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2477 && k
->refcount
!= REFCOUNT_INFINITY
)
2480 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2481 || kind
== GOMP_MAP_ALWAYS_FROM
)
2482 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2483 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2484 + cur_node
.host_start
2486 cur_node
.host_end
- cur_node
.host_start
);
2487 if (k
->refcount
== 0)
2488 gomp_remove_var (devicep
, k
);
2492 gomp_mutex_unlock (&devicep
->lock
);
2493 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2498 gomp_mutex_unlock (&devicep
->lock
);
2502 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2503 size_t *sizes
, unsigned short *kinds
,
2504 unsigned int flags
, void **depend
)
2506 struct gomp_device_descr
*devicep
= resolve_device (device
);
2508 /* If there are depend clauses, but nowait is not present,
2509 block the parent task until the dependencies are resolved
2510 and then just continue with the rest of the function as if it
2511 is a merged task. Until we are able to schedule task during
2512 variable mapping or unmapping, ignore nowait if depend clauses
2516 struct gomp_thread
*thr
= gomp_thread ();
2517 if (thr
->task
&& thr
->task
->depend_hash
)
2519 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2521 && !thr
->task
->final_task
)
2523 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2524 mapnum
, hostaddrs
, sizes
, kinds
,
2525 flags
, depend
, NULL
,
2526 GOMP_TARGET_TASK_DATA
))
2531 struct gomp_team
*team
= thr
->ts
.team
;
2532 /* If parallel or taskgroup has been cancelled, don't start new
2534 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2536 if (gomp_team_barrier_cancelled (&team
->barrier
))
2538 if (thr
->task
->taskgroup
)
2540 if (thr
->task
->taskgroup
->cancelled
)
2542 if (thr
->task
->taskgroup
->workshare
2543 && thr
->task
->taskgroup
->prev
2544 && thr
->task
->taskgroup
->prev
->cancelled
)
2549 gomp_task_maybe_wait_for_dependencies (depend
);
2555 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2556 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2559 struct gomp_thread
*thr
= gomp_thread ();
2560 struct gomp_team
*team
= thr
->ts
.team
;
2561 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2562 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2564 if (gomp_team_barrier_cancelled (&team
->barrier
))
2566 if (thr
->task
->taskgroup
)
2568 if (thr
->task
->taskgroup
->cancelled
)
2570 if (thr
->task
->taskgroup
->workshare
2571 && thr
->task
->taskgroup
->prev
2572 && thr
->task
->taskgroup
->prev
->cancelled
)
2577 /* The variables are mapped separately such that they can be released
2580 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2581 for (i
= 0; i
< mapnum
; i
++)
2582 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2584 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2585 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2588 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
2590 for (j
= i
+ 1; j
< mapnum
; j
++)
2591 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
2592 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
2594 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
2595 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2599 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2600 true, GOMP_MAP_VARS_ENTER_DATA
);
2602 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2606 gomp_target_task_fn (void *data
)
2608 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2609 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2611 if (ttask
->fn
!= NULL
)
2615 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2616 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2617 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2619 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2620 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2624 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2627 gomp_unmap_vars (ttask
->tgt
, true);
2631 void *actual_arguments
;
2632 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2635 actual_arguments
= ttask
->hostaddrs
;
2639 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2640 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2641 GOMP_MAP_VARS_TARGET
);
2642 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2644 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2646 assert (devicep
->async_run_func
);
2647 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2648 ttask
->args
, (void *) ttask
);
2651 else if (devicep
== NULL
2652 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2653 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2657 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2658 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2659 ttask
->kinds
, true);
2660 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2661 for (i
= 0; i
< ttask
->mapnum
; i
++)
2662 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2664 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2665 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2666 GOMP_MAP_VARS_ENTER_DATA
);
2667 i
+= ttask
->sizes
[i
];
2670 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2671 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2673 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2679 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2683 struct gomp_task_icv
*icv
= gomp_icv (true);
2684 icv
->thread_limit_var
2685 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2691 omp_target_alloc (size_t size
, int device_num
)
2693 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2694 return malloc (size
);
2699 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2700 if (devicep
== NULL
)
2703 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2704 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2705 return malloc (size
);
2707 gomp_mutex_lock (&devicep
->lock
);
2708 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2709 gomp_mutex_unlock (&devicep
->lock
);
2714 omp_target_free (void *device_ptr
, int device_num
)
2716 if (device_ptr
== NULL
)
2719 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2728 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2729 if (devicep
== NULL
)
2732 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2733 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2739 gomp_mutex_lock (&devicep
->lock
);
2740 gomp_free_device_memory (devicep
, device_ptr
);
2741 gomp_mutex_unlock (&devicep
->lock
);
2745 omp_target_is_present (const void *ptr
, int device_num
)
2750 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2756 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2757 if (devicep
== NULL
)
2760 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2761 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2764 gomp_mutex_lock (&devicep
->lock
);
2765 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2766 struct splay_tree_key_s cur_node
;
2768 cur_node
.host_start
= (uintptr_t) ptr
;
2769 cur_node
.host_end
= cur_node
.host_start
;
2770 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2771 int ret
= n
!= NULL
;
2772 gomp_mutex_unlock (&devicep
->lock
);
2777 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2778 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2781 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2784 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2786 if (dst_device_num
< 0)
2789 dst_devicep
= resolve_device (dst_device_num
);
2790 if (dst_devicep
== NULL
)
2793 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2794 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2797 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2799 if (src_device_num
< 0)
2802 src_devicep
= resolve_device (src_device_num
);
2803 if (src_devicep
== NULL
)
2806 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2807 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2810 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2812 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2815 if (src_devicep
== NULL
)
2817 gomp_mutex_lock (&dst_devicep
->lock
);
2818 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2819 (char *) dst
+ dst_offset
,
2820 (char *) src
+ src_offset
, length
);
2821 gomp_mutex_unlock (&dst_devicep
->lock
);
2822 return (ret
? 0 : EINVAL
);
2824 if (dst_devicep
== NULL
)
2826 gomp_mutex_lock (&src_devicep
->lock
);
2827 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2828 (char *) dst
+ dst_offset
,
2829 (char *) src
+ src_offset
, length
);
2830 gomp_mutex_unlock (&src_devicep
->lock
);
2831 return (ret
? 0 : EINVAL
);
2833 if (src_devicep
== dst_devicep
)
2835 gomp_mutex_lock (&src_devicep
->lock
);
2836 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2837 (char *) dst
+ dst_offset
,
2838 (char *) src
+ src_offset
, length
);
2839 gomp_mutex_unlock (&src_devicep
->lock
);
2840 return (ret
? 0 : EINVAL
);
2846 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2847 int num_dims
, const size_t *volume
,
2848 const size_t *dst_offsets
,
2849 const size_t *src_offsets
,
2850 const size_t *dst_dimensions
,
2851 const size_t *src_dimensions
,
2852 struct gomp_device_descr
*dst_devicep
,
2853 struct gomp_device_descr
*src_devicep
)
2855 size_t dst_slice
= element_size
;
2856 size_t src_slice
= element_size
;
2857 size_t j
, dst_off
, src_off
, length
;
2862 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2863 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2864 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2866 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2868 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2872 else if (src_devicep
== NULL
)
2873 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2874 (char *) dst
+ dst_off
,
2875 (const char *) src
+ src_off
,
2877 else if (dst_devicep
== NULL
)
2878 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2879 (char *) dst
+ dst_off
,
2880 (const char *) src
+ src_off
,
2882 else if (src_devicep
== dst_devicep
)
2883 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2884 (char *) dst
+ dst_off
,
2885 (const char *) src
+ src_off
,
2889 return ret
? 0 : EINVAL
;
2892 /* FIXME: it would be nice to have some plugin function to handle
2893 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2894 be handled in the generic recursion below, and for host-host it
2895 should be used even for any num_dims >= 2. */
2897 for (i
= 1; i
< num_dims
; i
++)
2898 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2899 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2901 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2902 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2904 for (j
= 0; j
< volume
[0]; j
++)
2906 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2907 (const char *) src
+ src_off
,
2908 element_size
, num_dims
- 1,
2909 volume
+ 1, dst_offsets
+ 1,
2910 src_offsets
+ 1, dst_dimensions
+ 1,
2911 src_dimensions
+ 1, dst_devicep
,
2915 dst_off
+= dst_slice
;
2916 src_off
+= src_slice
;
2922 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2923 int num_dims
, const size_t *volume
,
2924 const size_t *dst_offsets
,
2925 const size_t *src_offsets
,
2926 const size_t *dst_dimensions
,
2927 const size_t *src_dimensions
,
2928 int dst_device_num
, int src_device_num
)
2930 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2935 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2937 if (dst_device_num
< 0)
2940 dst_devicep
= resolve_device (dst_device_num
);
2941 if (dst_devicep
== NULL
)
2944 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2945 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2948 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2950 if (src_device_num
< 0)
2953 src_devicep
= resolve_device (src_device_num
);
2954 if (src_devicep
== NULL
)
2957 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2958 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2962 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2966 gomp_mutex_lock (&src_devicep
->lock
);
2967 else if (dst_devicep
)
2968 gomp_mutex_lock (&dst_devicep
->lock
);
2969 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2970 volume
, dst_offsets
, src_offsets
,
2971 dst_dimensions
, src_dimensions
,
2972 dst_devicep
, src_devicep
);
2974 gomp_mutex_unlock (&src_devicep
->lock
);
2975 else if (dst_devicep
)
2976 gomp_mutex_unlock (&dst_devicep
->lock
);
2981 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2982 size_t size
, size_t device_offset
, int device_num
)
2984 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2990 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2991 if (devicep
== NULL
)
2994 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2995 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2998 gomp_mutex_lock (&devicep
->lock
);
3000 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3001 struct splay_tree_key_s cur_node
;
3004 cur_node
.host_start
= (uintptr_t) host_ptr
;
3005 cur_node
.host_end
= cur_node
.host_start
+ size
;
3006 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3009 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3010 == (uintptr_t) device_ptr
+ device_offset
3011 && n
->host_start
<= cur_node
.host_start
3012 && n
->host_end
>= cur_node
.host_end
)
3017 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3018 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3022 tgt
->to_free
= NULL
;
3024 tgt
->list_count
= 0;
3025 tgt
->device_descr
= devicep
;
3026 splay_tree_node array
= tgt
->array
;
3027 splay_tree_key k
= &array
->key
;
3028 k
->host_start
= cur_node
.host_start
;
3029 k
->host_end
= cur_node
.host_end
;
3031 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3032 k
->refcount
= REFCOUNT_INFINITY
;
3033 k
->dynamic_refcount
= 0;
3036 array
->right
= NULL
;
3037 splay_tree_insert (&devicep
->mem_map
, array
);
3040 gomp_mutex_unlock (&devicep
->lock
);
3045 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3047 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
3053 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3054 if (devicep
== NULL
)
3057 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3060 gomp_mutex_lock (&devicep
->lock
);
3062 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3063 struct splay_tree_key_s cur_node
;
3066 cur_node
.host_start
= (uintptr_t) ptr
;
3067 cur_node
.host_end
= cur_node
.host_start
;
3068 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3070 && n
->host_start
== cur_node
.host_start
3071 && n
->refcount
== REFCOUNT_INFINITY
3072 && n
->tgt
->tgt_start
== 0
3073 && n
->tgt
->to_free
== NULL
3074 && n
->tgt
->refcount
== 1
3075 && n
->tgt
->list_count
== 0)
3077 splay_tree_remove (&devicep
->mem_map
, n
);
3078 gomp_unmap_tgt (n
->tgt
);
3082 gomp_mutex_unlock (&devicep
->lock
);
3087 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3090 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
3091 return gomp_pause_host ();
3092 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
3094 /* Do nothing for target devices for now. */
3099 omp_pause_resource_all (omp_pause_resource_t kind
)
3102 if (gomp_pause_host ())
3104 /* Do nothing for target devices for now. */
3108 ialias (omp_pause_resource
)
3109 ialias (omp_pause_resource_all
)
3111 #ifdef PLUGIN_SUPPORT
3113 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3115 The handles of the found functions are stored in the corresponding fields
3116 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3119 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3120 const char *plugin_name
)
3122 const char *err
= NULL
, *last_missing
= NULL
;
3124 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3128 /* Check if all required functions are available in the plugin and store
3129 their handlers. None of the symbols can legitimately be NULL,
3130 so we don't need to check dlerror all the time. */
3132 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3134 /* Similar, but missing functions are not an error. Return false if
3135 failed, true otherwise. */
3136 #define DLSYM_OPT(f, n) \
3137 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3138 || (last_missing = #n, 0))
3141 if (device
->version_func () != GOMP_VERSION
)
3143 err
= "plugin version mismatch";
3150 DLSYM (get_num_devices
);
3151 DLSYM (init_device
);
3152 DLSYM (fini_device
);
3154 DLSYM (unload_image
);
3159 device
->capabilities
= device
->get_caps_func ();
3160 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3163 DLSYM_OPT (async_run
, async_run
);
3164 DLSYM_OPT (can_run
, can_run
);
3167 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3169 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3170 || !DLSYM_OPT (openacc
.create_thread_data
,
3171 openacc_create_thread_data
)
3172 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3173 openacc_destroy_thread_data
)
3174 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3175 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3176 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3177 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3178 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3179 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3180 openacc_async_queue_callback
)
3181 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3182 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3183 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3184 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3186 /* Require all the OpenACC handlers if we have
3187 GOMP_OFFLOAD_CAP_OPENACC_200. */
3188 err
= "plugin missing OpenACC handler function";
3193 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3194 openacc_cuda_get_current_device
);
3195 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3196 openacc_cuda_get_current_context
);
3197 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3198 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3199 if (cuda
&& cuda
!= 4)
3201 /* Make sure all the CUDA functions are there if any of them are. */
3202 err
= "plugin missing OpenACC CUDA handler function";
3214 gomp_error ("while loading %s: %s", plugin_name
, err
);
3216 gomp_error ("missing function was %s", last_missing
);
3218 dlclose (plugin_handle
);
3223 /* This function finalizes all initialized devices. */
3226 gomp_target_fini (void)
3229 for (i
= 0; i
< num_devices
; i
++)
3232 struct gomp_device_descr
*devicep
= &devices
[i
];
3233 gomp_mutex_lock (&devicep
->lock
);
3234 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3235 ret
= gomp_fini_device (devicep
);
3236 gomp_mutex_unlock (&devicep
->lock
);
3238 gomp_fatal ("device finalization failed");
3242 /* This function initializes the runtime for offloading.
3243 It parses the list of offload plugins, and tries to load these.
3244 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3245 will be set, and the array DEVICES initialized, containing descriptors for
3246 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3250 gomp_target_init (void)
3252 const char *prefix
="libgomp-plugin-";
3253 const char *suffix
= SONAME_SUFFIX (1);
3254 const char *cur
, *next
;
3256 int i
, new_num_devices
;
3261 cur
= OFFLOAD_PLUGINS
;
3265 struct gomp_device_descr current_device
;
3266 size_t prefix_len
, suffix_len
, cur_len
;
3268 next
= strchr (cur
, ',');
3270 prefix_len
= strlen (prefix
);
3271 cur_len
= next
? next
- cur
: strlen (cur
);
3272 suffix_len
= strlen (suffix
);
3274 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3281 memcpy (plugin_name
, prefix
, prefix_len
);
3282 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3283 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3285 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3287 new_num_devices
= current_device
.get_num_devices_func ();
3288 if (new_num_devices
>= 1)
3290 /* Augment DEVICES and NUM_DEVICES. */
3292 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
3293 * sizeof (struct gomp_device_descr
));
3301 current_device
.name
= current_device
.get_name_func ();
3302 /* current_device.capabilities has already been set. */
3303 current_device
.type
= current_device
.get_type_func ();
3304 current_device
.mem_map
.root
= NULL
;
3305 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3306 for (i
= 0; i
< new_num_devices
; i
++)
3308 current_device
.target_id
= i
;
3309 devices
[num_devices
] = current_device
;
3310 gomp_mutex_init (&devices
[num_devices
].lock
);
3321 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3322 NUM_DEVICES_OPENMP. */
3323 struct gomp_device_descr
*devices_s
3324 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
3331 num_devices_openmp
= 0;
3332 for (i
= 0; i
< num_devices
; i
++)
3333 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3334 devices_s
[num_devices_openmp
++] = devices
[i
];
3335 int num_devices_after_openmp
= num_devices_openmp
;
3336 for (i
= 0; i
< num_devices
; i
++)
3337 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3338 devices_s
[num_devices_after_openmp
++] = devices
[i
];
3340 devices
= devices_s
;
3342 for (i
= 0; i
< num_devices
; i
++)
3344 /* The 'devices' array can be moved (by the realloc call) until we have
3345 found all the plugins, so registering with the OpenACC runtime (which
3346 takes a copy of the pointer argument) must be delayed until now. */
3347 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3348 goacc_register (&devices
[i
]);
3351 if (atexit (gomp_target_fini
) != 0)
3352 gomp_fatal ("atexit failed");
3355 #else /* PLUGIN_SUPPORT */
3356 /* If dlfcn.h is unavailable we always fallback to host execution.
3357 GOMP_target* routines are just stubs for this case. */
3359 gomp_target_init (void)
3362 #endif /* PLUGIN_SUPPORT */