1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
121 gomp_mutex_lock (&devices
[device_id
].lock
);
122 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
123 gomp_init_device (&devices
[device_id
]);
124 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
126 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 gomp_mutex_unlock (&devices
[device_id
].lock
);
131 return &devices
[device_id
];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
138 if (key
->host_start
!= key
->host_end
)
139 return splay_tree_lookup (mem_map
, key
);
142 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
147 n
= splay_tree_lookup (mem_map
, key
);
151 return splay_tree_lookup (mem_map
, key
);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
157 if (key
->host_start
!= key
->host_end
)
158 return splay_tree_lookup (mem_map
, key
);
161 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
167 gomp_device_copy (struct gomp_device_descr
*devicep
,
168 bool (*copy_func
) (int, void *, const void *, size_t),
169 const char *dst
, void *dstaddr
,
170 const char *src
, const void *srcaddr
,
173 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
175 gomp_mutex_unlock (&devicep
->lock
);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
182 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
183 bool (*copy_func
) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue
*),
185 const char *dst
, void *dstaddr
,
186 const char *src
, const void *srcaddr
,
187 size_t size
, struct goacc_asyncqueue
*aq
)
189 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
191 gomp_mutex_unlock (&devicep
->lock
);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
211 struct target_mem_desc
*tgt
;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk
*chunks
;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
242 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
246 if (cbuf
->chunk_cnt
< 0)
248 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
250 cbuf
->chunk_cnt
= -1;
253 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
255 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf
->use_cnt
== 1)
265 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
266 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
275 gomp_to_device_kind_p (int kind
)
281 case GOMP_MAP_FORCE_ALLOC
:
282 case GOMP_MAP_ALWAYS_FROM
:
289 attribute_hidden
void
290 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
291 struct goacc_asyncqueue
*aq
,
292 void *d
, const void *h
, size_t sz
,
293 struct gomp_coalesce_buf
*cbuf
)
297 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
298 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
301 long last
= cbuf
->chunk_cnt
- 1;
302 while (first
<= last
)
304 long middle
= (first
+ last
) >> 1;
305 if (cbuf
->chunks
[middle
].end
<= doff
)
307 else if (cbuf
->chunks
[middle
].start
<= doff
)
309 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
310 gomp_fatal ("internal libgomp cbuf error");
311 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
320 if (__builtin_expect (aq
!= NULL
, 0))
321 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
322 "dev", d
, "host", h
, sz
, aq
);
324 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
327 attribute_hidden
void
328 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
329 struct goacc_asyncqueue
*aq
,
330 void *h
, const void *d
, size_t sz
)
332 if (__builtin_expect (aq
!= NULL
, 0))
333 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
334 "host", h
, "dev", d
, sz
, aq
);
336 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
340 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
342 if (!devicep
->free_func (devicep
->target_id
, devptr
))
344 gomp_mutex_unlock (&devicep
->lock
);
345 gomp_fatal ("error in freeing device memory block at %p", devptr
);
349 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
350 gomp_map_0len_lookup found oldn for newn.
351 Helper function of gomp_map_vars. */
354 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
355 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
356 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
357 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
360 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
361 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
362 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
363 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
365 if ((kind
& GOMP_MAP_FLAG_FORCE
)
366 || oldn
->host_start
> newn
->host_start
367 || oldn
->host_end
< newn
->host_end
)
369 gomp_mutex_unlock (&devicep
->lock
);
370 gomp_fatal ("Trying to map into device [%p..%p) object when "
371 "[%p..%p) is already mapped",
372 (void *) newn
->host_start
, (void *) newn
->host_end
,
373 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
376 if (GOMP_MAP_ALWAYS_TO_P (kind
))
377 gomp_copy_host2dev (devicep
, aq
,
378 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
379 + newn
->host_start
- oldn
->host_start
),
380 (void *) newn
->host_start
,
381 newn
->host_end
- newn
->host_start
, cbuf
);
383 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
388 get_kind (bool short_mapkind
, void *kinds
, int idx
)
390 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
391 : ((unsigned char *) kinds
)[idx
];
395 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
396 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
397 struct gomp_coalesce_buf
*cbuf
)
399 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
400 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
401 struct splay_tree_key_s cur_node
;
403 cur_node
.host_start
= host_ptr
;
404 if (cur_node
.host_start
== (uintptr_t) NULL
)
406 cur_node
.tgt_offset
= (uintptr_t) NULL
;
407 gomp_copy_host2dev (devicep
, aq
,
408 (void *) (tgt
->tgt_start
+ target_offset
),
409 (void *) &cur_node
.tgt_offset
,
410 sizeof (void *), cbuf
);
413 /* Add bias to the pointer value. */
414 cur_node
.host_start
+= bias
;
415 cur_node
.host_end
= cur_node
.host_start
;
416 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
419 gomp_mutex_unlock (&devicep
->lock
);
420 gomp_fatal ("Pointer target of array section wasn't mapped");
422 cur_node
.host_start
-= n
->host_start
;
424 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
425 /* At this point tgt_offset is target address of the
426 array section. Now subtract bias to get what we want
427 to initialize the pointer with. */
428 cur_node
.tgt_offset
-= bias
;
429 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
430 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
434 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
435 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
436 size_t first
, size_t i
, void **hostaddrs
,
437 size_t *sizes
, void *kinds
,
438 struct gomp_coalesce_buf
*cbuf
)
440 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
441 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
442 struct splay_tree_key_s cur_node
;
444 const bool short_mapkind
= true;
445 const int typemask
= short_mapkind
? 0xff : 0x7;
447 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
448 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
449 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
450 kind
= get_kind (short_mapkind
, kinds
, i
);
453 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
455 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
456 &tgt
->list
[i
], kind
& typemask
, cbuf
);
461 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
463 cur_node
.host_start
--;
464 n2
= splay_tree_lookup (mem_map
, &cur_node
);
465 cur_node
.host_start
++;
468 && n2
->host_start
- n
->host_start
469 == n2
->tgt_offset
- n
->tgt_offset
)
471 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
472 &tgt
->list
[i
], kind
& typemask
, cbuf
);
477 n2
= splay_tree_lookup (mem_map
, &cur_node
);
481 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
483 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
484 kind
& typemask
, cbuf
);
488 gomp_mutex_unlock (&devicep
->lock
);
489 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
490 "other mapped elements from the same structure weren't mapped "
491 "together with it", (void *) cur_node
.host_start
,
492 (void *) cur_node
.host_end
);
495 static inline uintptr_t
496 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
498 if (tgt
->list
[i
].key
!= NULL
)
499 return tgt
->list
[i
].key
->tgt
->tgt_start
500 + tgt
->list
[i
].key
->tgt_offset
501 + tgt
->list
[i
].offset
;
503 switch (tgt
->list
[i
].offset
)
506 return (uintptr_t) hostaddrs
[i
];
512 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
513 + tgt
->list
[i
+ 1].key
->tgt_offset
514 + tgt
->list
[i
+ 1].offset
515 + (uintptr_t) hostaddrs
[i
]
516 - (uintptr_t) hostaddrs
[i
+ 1];
519 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
523 static inline __attribute__((always_inline
)) struct target_mem_desc
*
524 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
525 struct goacc_asyncqueue
*aq
, size_t mapnum
,
526 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
527 void *kinds
, bool short_mapkind
,
528 enum gomp_map_vars_kind pragma_kind
)
530 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
531 bool has_firstprivate
= false;
532 const int rshift
= short_mapkind
? 8 : 3;
533 const int typemask
= short_mapkind
? 0xff : 0x7;
534 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
535 struct splay_tree_key_s cur_node
;
536 struct target_mem_desc
*tgt
537 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
538 tgt
->list_count
= mapnum
;
539 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
540 tgt
->device_descr
= devicep
;
541 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
550 tgt_align
= sizeof (void *);
556 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
558 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
559 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
562 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
564 size_t align
= 4 * sizeof (void *);
566 tgt_size
= mapnum
* sizeof (void *);
568 cbuf
.use_cnt
= 1 + (mapnum
> 1);
569 cbuf
.chunks
[0].start
= 0;
570 cbuf
.chunks
[0].end
= tgt_size
;
573 gomp_mutex_lock (&devicep
->lock
);
574 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
576 gomp_mutex_unlock (&devicep
->lock
);
581 for (i
= 0; i
< mapnum
; i
++)
583 int kind
= get_kind (short_mapkind
, kinds
, i
);
584 if (hostaddrs
[i
] == NULL
585 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
587 tgt
->list
[i
].key
= NULL
;
588 tgt
->list
[i
].offset
= OFFSET_INLINED
;
591 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
593 tgt
->list
[i
].key
= NULL
;
596 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
597 on a separate construct prior to using use_device_{addr,ptr}.
598 In OpenMP 5.0, map directives need to be ordered by the
599 middle-end before the use_device_* clauses. If
600 !not_found_cnt, all mappings requested (if any) are already
601 mapped, so use_device_{addr,ptr} can be resolved right away.
602 Otherwise, if not_found_cnt, gomp_map_lookup might fail
603 now but would succeed after performing the mappings in the
604 following loop. We can't defer this always to the second
605 loop, because it is not even invoked when !not_found_cnt
606 after the first loop. */
607 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
608 cur_node
.host_end
= cur_node
.host_start
;
609 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
612 gomp_mutex_unlock (&devicep
->lock
);
613 gomp_fatal ("use_device_ptr pointer wasn't mapped");
615 cur_node
.host_start
-= n
->host_start
;
617 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
618 + cur_node
.host_start
);
619 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
622 tgt
->list
[i
].offset
= 0;
625 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
627 size_t first
= i
+ 1;
628 size_t last
= i
+ sizes
[i
];
629 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
630 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
632 tgt
->list
[i
].key
= NULL
;
633 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
634 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
637 size_t align
= (size_t) 1 << (kind
>> rshift
);
638 if (tgt_align
< align
)
640 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
641 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
642 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
643 not_found_cnt
+= last
- i
;
644 for (i
= first
; i
<= last
; i
++)
646 tgt
->list
[i
].key
= NULL
;
647 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
649 gomp_coalesce_buf_add (&cbuf
,
650 tgt_size
- cur_node
.host_end
651 + (uintptr_t) hostaddrs
[i
],
657 for (i
= first
; i
<= last
; i
++)
658 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
663 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
665 tgt
->list
[i
].key
= NULL
;
666 tgt
->list
[i
].offset
= OFFSET_POINTER
;
667 has_firstprivate
= true;
670 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
671 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
672 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
674 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
675 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
677 tgt
->list
[i
].key
= NULL
;
679 size_t align
= (size_t) 1 << (kind
>> rshift
);
680 if (tgt_align
< align
)
682 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
683 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
684 cur_node
.host_end
- cur_node
.host_start
);
685 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
686 has_firstprivate
= true;
690 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
692 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
695 tgt
->list
[i
].key
= NULL
;
696 tgt
->list
[i
].offset
= OFFSET_POINTER
;
701 n
= splay_tree_lookup (mem_map
, &cur_node
);
702 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
703 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
704 kind
& typemask
, NULL
);
707 tgt
->list
[i
].key
= NULL
;
709 size_t align
= (size_t) 1 << (kind
>> rshift
);
711 if (tgt_align
< align
)
713 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
714 if (gomp_to_device_kind_p (kind
& typemask
))
715 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
716 cur_node
.host_end
- cur_node
.host_start
);
717 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
718 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
721 for (j
= i
+ 1; j
< mapnum
; j
++)
722 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
725 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
726 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
727 > cur_node
.host_end
))
731 tgt
->list
[j
].key
= NULL
;
742 gomp_mutex_unlock (&devicep
->lock
);
743 gomp_fatal ("unexpected aggregation");
745 tgt
->to_free
= devaddrs
[0];
746 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
747 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
749 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
751 /* Allocate tgt_align aligned tgt_size block of memory. */
752 /* FIXME: Perhaps change interface to allocate properly aligned
754 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
755 tgt_size
+ tgt_align
- 1);
758 gomp_mutex_unlock (&devicep
->lock
);
759 gomp_fatal ("device memory allocation fail");
762 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
763 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
764 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
766 if (cbuf
.use_cnt
== 1)
768 if (cbuf
.chunk_cnt
> 0)
771 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
787 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
788 tgt_size
= mapnum
* sizeof (void *);
791 if (not_found_cnt
|| has_firstprivate
)
794 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
795 splay_tree_node array
= tgt
->array
;
796 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
797 uintptr_t field_tgt_base
= 0;
799 for (i
= 0; i
< mapnum
; i
++)
800 if (tgt
->list
[i
].key
== NULL
)
802 int kind
= get_kind (short_mapkind
, kinds
, i
);
803 if (hostaddrs
[i
] == NULL
)
805 switch (kind
& typemask
)
807 size_t align
, len
, first
, last
;
809 case GOMP_MAP_FIRSTPRIVATE
:
810 align
= (size_t) 1 << (kind
>> rshift
);
811 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
812 tgt
->list
[i
].offset
= tgt_size
;
814 gomp_copy_host2dev (devicep
, aq
,
815 (void *) (tgt
->tgt_start
+ tgt_size
),
816 (void *) hostaddrs
[i
], len
, cbufp
);
819 case GOMP_MAP_FIRSTPRIVATE_INT
:
820 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
822 case GOMP_MAP_USE_DEVICE_PTR
:
823 if (tgt
->list
[i
].offset
== 0)
825 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
826 cur_node
.host_end
= cur_node
.host_start
;
827 n
= gomp_map_lookup (mem_map
, &cur_node
);
830 gomp_mutex_unlock (&devicep
->lock
);
831 gomp_fatal ("use_device_ptr pointer wasn't mapped");
833 cur_node
.host_start
-= n
->host_start
;
835 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
836 + cur_node
.host_start
);
837 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
840 case GOMP_MAP_STRUCT
:
843 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
844 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
846 if (tgt
->list
[first
].key
!= NULL
)
848 n
= splay_tree_lookup (mem_map
, &cur_node
);
851 size_t align
= (size_t) 1 << (kind
>> rshift
);
852 tgt_size
-= (uintptr_t) hostaddrs
[first
]
853 - (uintptr_t) hostaddrs
[i
];
854 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
855 tgt_size
+= (uintptr_t) hostaddrs
[first
]
856 - (uintptr_t) hostaddrs
[i
];
857 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
858 field_tgt_offset
= tgt_size
;
859 field_tgt_clear
= last
;
860 tgt_size
+= cur_node
.host_end
861 - (uintptr_t) hostaddrs
[first
];
864 for (i
= first
; i
<= last
; i
++)
865 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
866 sizes
, kinds
, cbufp
);
869 case GOMP_MAP_ALWAYS_POINTER
:
870 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
871 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
872 n
= splay_tree_lookup (mem_map
, &cur_node
);
874 || n
->host_start
> cur_node
.host_start
875 || n
->host_end
< cur_node
.host_end
)
877 gomp_mutex_unlock (&devicep
->lock
);
878 gomp_fatal ("always pointer not mapped");
880 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
881 != GOMP_MAP_ALWAYS_POINTER
)
882 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
883 if (cur_node
.tgt_offset
)
884 cur_node
.tgt_offset
-= sizes
[i
];
885 gomp_copy_host2dev (devicep
, aq
,
886 (void *) (n
->tgt
->tgt_start
888 + cur_node
.host_start
890 (void *) &cur_node
.tgt_offset
,
891 sizeof (void *), cbufp
);
892 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
893 + cur_node
.host_start
- n
->host_start
;
898 splay_tree_key k
= &array
->key
;
899 k
->host_start
= (uintptr_t) hostaddrs
[i
];
900 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
901 k
->host_end
= k
->host_start
+ sizes
[i
];
903 k
->host_end
= k
->host_start
+ sizeof (void *);
904 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
905 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
906 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
907 kind
& typemask
, cbufp
);
911 if (n
&& n
->refcount
== REFCOUNT_LINK
)
913 /* Replace target address of the pointer with target address
914 of mapped object in the splay tree. */
915 splay_tree_remove (mem_map
, n
);
918 size_t align
= (size_t) 1 << (kind
>> rshift
);
919 tgt
->list
[i
].key
= k
;
921 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
923 k
->tgt_offset
= k
->host_start
- field_tgt_base
925 if (i
== field_tgt_clear
)
926 field_tgt_clear
= FIELD_TGT_EMPTY
;
930 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
931 k
->tgt_offset
= tgt_size
;
932 tgt_size
+= k
->host_end
- k
->host_start
;
934 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
935 tgt
->list
[i
].always_copy_from
936 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
937 tgt
->list
[i
].offset
= 0;
938 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
940 k
->dynamic_refcount
= 0;
944 splay_tree_insert (mem_map
, array
);
945 switch (kind
& typemask
)
949 case GOMP_MAP_FORCE_ALLOC
:
950 case GOMP_MAP_FORCE_FROM
:
951 case GOMP_MAP_ALWAYS_FROM
:
954 case GOMP_MAP_TOFROM
:
955 case GOMP_MAP_FORCE_TO
:
956 case GOMP_MAP_FORCE_TOFROM
:
957 case GOMP_MAP_ALWAYS_TO
:
958 case GOMP_MAP_ALWAYS_TOFROM
:
959 gomp_copy_host2dev (devicep
, aq
,
960 (void *) (tgt
->tgt_start
962 (void *) k
->host_start
,
963 k
->host_end
- k
->host_start
, cbufp
);
965 case GOMP_MAP_POINTER
:
966 gomp_map_pointer (tgt
, aq
,
967 (uintptr_t) *(void **) k
->host_start
,
968 k
->tgt_offset
, sizes
[i
], cbufp
);
970 case GOMP_MAP_TO_PSET
:
971 gomp_copy_host2dev (devicep
, aq
,
972 (void *) (tgt
->tgt_start
974 (void *) k
->host_start
,
975 k
->host_end
- k
->host_start
, cbufp
);
977 for (j
= i
+ 1; j
< mapnum
; j
++)
978 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
982 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
983 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
988 tgt
->list
[j
].key
= k
;
989 tgt
->list
[j
].copy_from
= false;
990 tgt
->list
[j
].always_copy_from
= false;
991 if (k
->refcount
!= REFCOUNT_INFINITY
)
993 gomp_map_pointer (tgt
, aq
,
994 (uintptr_t) *(void **) hostaddrs
[j
],
996 + ((uintptr_t) hostaddrs
[j
]
1002 case GOMP_MAP_FORCE_PRESENT
:
1004 /* We already looked up the memory region above and it
1006 size_t size
= k
->host_end
- k
->host_start
;
1007 gomp_mutex_unlock (&devicep
->lock
);
1008 #ifdef HAVE_INTTYPES_H
1009 gomp_fatal ("present clause: !acc_is_present (%p, "
1010 "%"PRIu64
" (0x%"PRIx64
"))",
1011 (void *) k
->host_start
,
1012 (uint64_t) size
, (uint64_t) size
);
1014 gomp_fatal ("present clause: !acc_is_present (%p, "
1015 "%lu (0x%lx))", (void *) k
->host_start
,
1016 (unsigned long) size
, (unsigned long) size
);
1020 case GOMP_MAP_FORCE_DEVICEPTR
:
1021 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1022 gomp_copy_host2dev (devicep
, aq
,
1023 (void *) (tgt
->tgt_start
1025 (void *) k
->host_start
,
1026 sizeof (void *), cbufp
);
1029 gomp_mutex_unlock (&devicep
->lock
);
1030 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1036 /* Set link pointer on target to the device address of the
1038 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1039 /* We intentionally do not use coalescing here, as it's not
1040 data allocated by the current call to this function. */
1041 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1042 &tgt_addr
, sizeof (void *), NULL
);
1049 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1051 for (i
= 0; i
< mapnum
; i
++)
1053 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1054 gomp_copy_host2dev (devicep
, aq
,
1055 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1056 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1064 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1065 gomp_copy_host2dev (devicep
, aq
,
1066 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1067 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1068 - cbuf
.chunks
[0].start
),
1069 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1075 /* If the variable from "omp target enter data" map-list was already mapped,
1076 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1078 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
1084 gomp_mutex_unlock (&devicep
->lock
);
1088 attribute_hidden
struct target_mem_desc
*
1089 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1090 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1091 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1093 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1094 sizes
, kinds
, short_mapkind
, pragma_kind
);
1097 attribute_hidden
struct target_mem_desc
*
1098 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1099 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1100 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1101 void *kinds
, bool short_mapkind
,
1102 enum gomp_map_vars_kind pragma_kind
)
1104 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1105 sizes
, kinds
, short_mapkind
, pragma_kind
);
1108 attribute_hidden
void
1109 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1111 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1113 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1119 attribute_hidden
bool
1120 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1122 bool is_tgt_unmapped
= false;
1123 splay_tree_remove (&devicep
->mem_map
, k
);
1125 splay_tree_insert (&devicep
->mem_map
, (splay_tree_node
) k
->link_key
);
1126 if (k
->tgt
->refcount
> 1)
1130 is_tgt_unmapped
= true;
1131 gomp_unmap_tgt (k
->tgt
);
1133 return is_tgt_unmapped
;
1137 gomp_unref_tgt (void *ptr
)
1139 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1141 if (tgt
->refcount
> 1)
1144 gomp_unmap_tgt (tgt
);
1147 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1148 variables back from device to host: if it is false, it is assumed that this
1149 has been done already. */
1151 static inline __attribute__((always_inline
)) void
1152 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1153 struct goacc_asyncqueue
*aq
)
1155 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1157 if (tgt
->list_count
== 0)
1163 gomp_mutex_lock (&devicep
->lock
);
1164 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1166 gomp_mutex_unlock (&devicep
->lock
);
1173 for (i
= 0; i
< tgt
->list_count
; i
++)
1175 splay_tree_key k
= tgt
->list
[i
].key
;
1179 bool do_unmap
= false;
1180 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1182 else if (k
->refcount
== 1)
1188 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1189 || tgt
->list
[i
].always_copy_from
)
1190 gomp_copy_dev2host (devicep
, aq
,
1191 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1192 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1193 + tgt
->list
[i
].offset
),
1194 tgt
->list
[i
].length
);
1196 gomp_remove_var (devicep
, k
);
1200 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt
,
1203 gomp_unref_tgt ((void *) tgt
);
1205 gomp_mutex_unlock (&devicep
->lock
);
1208 attribute_hidden
void
1209 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1211 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1214 attribute_hidden
void
1215 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1216 struct goacc_asyncqueue
*aq
)
1218 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1222 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1223 size_t *sizes
, void *kinds
, bool short_mapkind
)
1226 struct splay_tree_key_s cur_node
;
1227 const int typemask
= short_mapkind
? 0xff : 0x7;
1235 gomp_mutex_lock (&devicep
->lock
);
1236 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1238 gomp_mutex_unlock (&devicep
->lock
);
1242 for (i
= 0; i
< mapnum
; i
++)
1245 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1246 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1247 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1250 int kind
= get_kind (short_mapkind
, kinds
, i
);
1251 if (n
->host_start
> cur_node
.host_start
1252 || n
->host_end
< cur_node
.host_end
)
1254 gomp_mutex_unlock (&devicep
->lock
);
1255 gomp_fatal ("Trying to update [%p..%p) object when "
1256 "only [%p..%p) is mapped",
1257 (void *) cur_node
.host_start
,
1258 (void *) cur_node
.host_end
,
1259 (void *) n
->host_start
,
1260 (void *) n
->host_end
);
1264 void *hostaddr
= (void *) cur_node
.host_start
;
1265 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1266 + cur_node
.host_start
- n
->host_start
);
1267 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1269 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1270 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1272 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1273 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1276 gomp_mutex_unlock (&devicep
->lock
);
1279 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1280 And insert to splay tree the mapping between addresses from HOST_TABLE and
1281 from loaded target image. We rely in the host and device compiler
1282 emitting variable and functions in the same order. */
1285 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1286 const void *host_table
, const void *target_data
,
1287 bool is_register_lock
)
1289 void **host_func_table
= ((void ***) host_table
)[0];
1290 void **host_funcs_end
= ((void ***) host_table
)[1];
1291 void **host_var_table
= ((void ***) host_table
)[2];
1292 void **host_vars_end
= ((void ***) host_table
)[3];
1294 /* The func table contains only addresses, the var table contains addresses
1295 and corresponding sizes. */
1296 int num_funcs
= host_funcs_end
- host_func_table
;
1297 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1299 /* Load image to device and get target addresses for the image. */
1300 struct addr_pair
*target_table
= NULL
;
1301 int i
, num_target_entries
;
1304 = devicep
->load_image_func (devicep
->target_id
, version
,
1305 target_data
, &target_table
);
1307 if (num_target_entries
!= num_funcs
+ num_vars
)
1309 gomp_mutex_unlock (&devicep
->lock
);
1310 if (is_register_lock
)
1311 gomp_mutex_unlock (®ister_lock
);
1312 gomp_fatal ("Cannot map target functions or variables"
1313 " (expected %u, have %u)", num_funcs
+ num_vars
,
1314 num_target_entries
);
1317 /* Insert host-target address mapping into splay tree. */
1318 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1319 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1320 tgt
->refcount
= REFCOUNT_INFINITY
;
1323 tgt
->to_free
= NULL
;
1325 tgt
->list_count
= 0;
1326 tgt
->device_descr
= devicep
;
1327 splay_tree_node array
= tgt
->array
;
1329 for (i
= 0; i
< num_funcs
; i
++)
1331 splay_tree_key k
= &array
->key
;
1332 k
->host_start
= (uintptr_t) host_func_table
[i
];
1333 k
->host_end
= k
->host_start
+ 1;
1335 k
->tgt_offset
= target_table
[i
].start
;
1336 k
->refcount
= REFCOUNT_INFINITY
;
1339 array
->right
= NULL
;
1340 splay_tree_insert (&devicep
->mem_map
, array
);
1344 /* Most significant bit of the size in host and target tables marks
1345 "omp declare target link" variables. */
1346 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1347 const uintptr_t size_mask
= ~link_bit
;
1349 for (i
= 0; i
< num_vars
; i
++)
1351 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1352 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1354 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1356 gomp_mutex_unlock (&devicep
->lock
);
1357 if (is_register_lock
)
1358 gomp_mutex_unlock (®ister_lock
);
1359 gomp_fatal ("Cannot map target variables (size mismatch)");
1362 splay_tree_key k
= &array
->key
;
1363 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1365 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1367 k
->tgt_offset
= target_var
->start
;
1368 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1371 array
->right
= NULL
;
1372 splay_tree_insert (&devicep
->mem_map
, array
);
1376 free (target_table
);
1379 /* Unload the mappings described by target_data from device DEVICE_P.
1380 The device must be locked. */
1383 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1385 const void *host_table
, const void *target_data
)
1387 void **host_func_table
= ((void ***) host_table
)[0];
1388 void **host_funcs_end
= ((void ***) host_table
)[1];
1389 void **host_var_table
= ((void ***) host_table
)[2];
1390 void **host_vars_end
= ((void ***) host_table
)[3];
1392 /* The func table contains only addresses, the var table contains addresses
1393 and corresponding sizes. */
1394 int num_funcs
= host_funcs_end
- host_func_table
;
1395 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1397 struct splay_tree_key_s k
;
1398 splay_tree_key node
= NULL
;
1400 /* Find mapping at start of node array */
1401 if (num_funcs
|| num_vars
)
1403 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1404 : (uintptr_t) host_var_table
[0]);
1405 k
.host_end
= k
.host_start
+ 1;
1406 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1409 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1411 gomp_mutex_unlock (&devicep
->lock
);
1412 gomp_fatal ("image unload fail");
1415 /* Remove mappings from splay tree. */
1417 for (i
= 0; i
< num_funcs
; i
++)
1419 k
.host_start
= (uintptr_t) host_func_table
[i
];
1420 k
.host_end
= k
.host_start
+ 1;
1421 splay_tree_remove (&devicep
->mem_map
, &k
);
1424 /* Most significant bit of the size in host and target tables marks
1425 "omp declare target link" variables. */
1426 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1427 const uintptr_t size_mask
= ~link_bit
;
1428 bool is_tgt_unmapped
= false;
1430 for (i
= 0; i
< num_vars
; i
++)
1432 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1434 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1436 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1437 splay_tree_remove (&devicep
->mem_map
, &k
);
1440 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1441 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1445 if (node
&& !is_tgt_unmapped
)
1452 /* This function should be called from every offload image while loading.
1453 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1454 the target, and TARGET_DATA needed by target plugin. */
1457 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1458 int target_type
, const void *target_data
)
1462 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1463 gomp_fatal ("Library too old for offload (version %u < %u)",
1464 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1466 gomp_mutex_lock (®ister_lock
);
1468 /* Load image to all initialized devices. */
1469 for (i
= 0; i
< num_devices
; i
++)
1471 struct gomp_device_descr
*devicep
= &devices
[i
];
1472 gomp_mutex_lock (&devicep
->lock
);
1473 if (devicep
->type
== target_type
1474 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1475 gomp_load_image_to_device (devicep
, version
,
1476 host_table
, target_data
, true);
1477 gomp_mutex_unlock (&devicep
->lock
);
1480 /* Insert image to array of pending images. */
1482 = gomp_realloc_unlock (offload_images
,
1483 (num_offload_images
+ 1)
1484 * sizeof (struct offload_image_descr
));
1485 offload_images
[num_offload_images
].version
= version
;
1486 offload_images
[num_offload_images
].type
= target_type
;
1487 offload_images
[num_offload_images
].host_table
= host_table
;
1488 offload_images
[num_offload_images
].target_data
= target_data
;
1490 num_offload_images
++;
1491 gomp_mutex_unlock (®ister_lock
);
1495 GOMP_offload_register (const void *host_table
, int target_type
,
1496 const void *target_data
)
1498 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1501 /* This function should be called from every offload image while unloading.
1502 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1503 the target, and TARGET_DATA needed by target plugin. */
1506 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1507 int target_type
, const void *target_data
)
1511 gomp_mutex_lock (®ister_lock
);
1513 /* Unload image from all initialized devices. */
1514 for (i
= 0; i
< num_devices
; i
++)
1516 struct gomp_device_descr
*devicep
= &devices
[i
];
1517 gomp_mutex_lock (&devicep
->lock
);
1518 if (devicep
->type
== target_type
1519 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1520 gomp_unload_image_from_device (devicep
, version
,
1521 host_table
, target_data
);
1522 gomp_mutex_unlock (&devicep
->lock
);
1525 /* Remove image from array of pending images. */
1526 for (i
= 0; i
< num_offload_images
; i
++)
1527 if (offload_images
[i
].target_data
== target_data
)
1529 offload_images
[i
] = offload_images
[--num_offload_images
];
1533 gomp_mutex_unlock (®ister_lock
);
1537 GOMP_offload_unregister (const void *host_table
, int target_type
,
1538 const void *target_data
)
1540 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1543 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1544 must be locked on entry, and remains locked on return. */
1546 attribute_hidden
void
1547 gomp_init_device (struct gomp_device_descr
*devicep
)
1550 if (!devicep
->init_device_func (devicep
->target_id
))
1552 gomp_mutex_unlock (&devicep
->lock
);
1553 gomp_fatal ("device initialization failed");
1556 /* Load to device all images registered by the moment. */
1557 for (i
= 0; i
< num_offload_images
; i
++)
1559 struct offload_image_descr
*image
= &offload_images
[i
];
1560 if (image
->type
== devicep
->type
)
1561 gomp_load_image_to_device (devicep
, image
->version
,
1562 image
->host_table
, image
->target_data
,
1566 /* Initialize OpenACC asynchronous queues. */
1567 goacc_init_asyncqueues (devicep
);
1569 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1572 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1573 must be locked on entry, and remains locked on return. */
1575 attribute_hidden
bool
1576 gomp_fini_device (struct gomp_device_descr
*devicep
)
1578 bool ret
= goacc_fini_asyncqueues (devicep
);
1579 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1580 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1584 attribute_hidden
void
1585 gomp_unload_device (struct gomp_device_descr
*devicep
)
1587 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1591 /* Unload from device all images registered at the moment. */
1592 for (i
= 0; i
< num_offload_images
; i
++)
1594 struct offload_image_descr
*image
= &offload_images
[i
];
1595 if (image
->type
== devicep
->type
)
1596 gomp_unload_image_from_device (devicep
, image
->version
,
1598 image
->target_data
);
1603 /* Free address mapping tables. MM must be locked on entry, and remains locked
1606 attribute_hidden
void
1607 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1609 while (mem_map
->root
)
1611 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1613 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1619 /* Host fallback for GOMP_target{,_ext} routines. */
1622 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1624 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1626 memset (thr
, '\0', sizeof (*thr
));
1627 if (gomp_places_list
)
1629 thr
->place
= old_thr
.place
;
1630 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1633 gomp_free_thread (thr
);
1637 /* Calculate alignment and size requirements of a private copy of data shared
1638 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1641 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1642 unsigned short *kinds
, size_t *tgt_align
,
1646 for (i
= 0; i
< mapnum
; i
++)
1647 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1649 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1650 if (*tgt_align
< align
)
1652 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1653 *tgt_size
+= sizes
[i
];
1657 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1660 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1661 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1664 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1666 tgt
+= tgt_align
- al
;
1669 for (i
= 0; i
< mapnum
; i
++)
1670 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1672 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1673 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1674 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1675 hostaddrs
[i
] = tgt
+ tgt_size
;
1676 tgt_size
= tgt_size
+ sizes
[i
];
1680 /* Helper function of GOMP_target{,_ext} routines. */
1683 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1684 void (*host_fn
) (void *))
1686 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1687 return (void *) host_fn
;
1690 gomp_mutex_lock (&devicep
->lock
);
1691 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1693 gomp_mutex_unlock (&devicep
->lock
);
1697 struct splay_tree_key_s k
;
1698 k
.host_start
= (uintptr_t) host_fn
;
1699 k
.host_end
= k
.host_start
+ 1;
1700 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1701 gomp_mutex_unlock (&devicep
->lock
);
1705 return (void *) tgt_fn
->tgt_offset
;
1709 /* Called when encountering a target directive. If DEVICE
1710 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1711 GOMP_DEVICE_HOST_FALLBACK (or any value
1712 larger than last available hw device), use host fallback.
1713 FN is address of host code, UNUSED is part of the current ABI, but
1714 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1715 with MAPNUM entries, with addresses of the host objects,
1716 sizes of the host objects (resp. for pointer kind pointer bias
1717 and assumed sizeof (void *) size) and kinds. */
1720 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1721 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1722 unsigned char *kinds
)
1724 struct gomp_device_descr
*devicep
= resolve_device (device
);
1728 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1729 /* All shared memory devices should use the GOMP_target_ext function. */
1730 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1731 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1732 return gomp_target_fallback (fn
, hostaddrs
);
1734 struct target_mem_desc
*tgt_vars
1735 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1736 GOMP_MAP_VARS_TARGET
);
1737 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1739 gomp_unmap_vars (tgt_vars
, true);
1742 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1743 and several arguments have been added:
1744 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1745 DEPEND is array of dependencies, see GOMP_task for details.
1747 ARGS is a pointer to an array consisting of a variable number of both
1748 device-independent and device-specific arguments, which can take one two
1749 elements where the first specifies for which device it is intended, the type
1750 and optionally also the value. If the value is not present in the first
1751 one, the whole second element the actual value. The last element of the
1752 array is a single NULL. Among the device independent can be for example
1753 NUM_TEAMS and THREAD_LIMIT.
1755 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1756 that value, or 1 if teams construct is not present, or 0, if
1757 teams construct does not have num_teams clause and so the choice is
1758 implementation defined, and -1 if it can't be determined on the host
1759 what value will GOMP_teams have on the device.
1760 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1761 body with that value, or 0, if teams construct does not have thread_limit
1762 clause or the teams construct is not present, or -1 if it can't be
1763 determined on the host what value will GOMP_teams have on the device. */
1766 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1767 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1768 unsigned int flags
, void **depend
, void **args
)
1770 struct gomp_device_descr
*devicep
= resolve_device (device
);
1771 size_t tgt_align
= 0, tgt_size
= 0;
1772 bool fpc_done
= false;
1774 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1776 struct gomp_thread
*thr
= gomp_thread ();
1777 /* Create a team if we don't have any around, as nowait
1778 target tasks make sense to run asynchronously even when
1779 outside of any parallel. */
1780 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1782 struct gomp_team
*team
= gomp_new_team (1);
1783 struct gomp_task
*task
= thr
->task
;
1784 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1785 team
->prev_ts
= thr
->ts
;
1786 thr
->ts
.team
= team
;
1787 thr
->ts
.team_id
= 0;
1788 thr
->ts
.work_share
= &team
->work_shares
[0];
1789 thr
->ts
.last_work_share
= NULL
;
1790 #ifdef HAVE_SYNC_BUILTINS
1791 thr
->ts
.single_count
= 0;
1793 thr
->ts
.static_trip
= 0;
1794 thr
->task
= &team
->implicit_task
[0];
1795 gomp_init_task (thr
->task
, NULL
, icv
);
1801 thr
->task
= &team
->implicit_task
[0];
1804 pthread_setspecific (gomp_thread_destructor
, thr
);
1807 && !thr
->task
->final_task
)
1809 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1810 sizes
, kinds
, flags
, depend
, args
,
1811 GOMP_TARGET_TASK_BEFORE_MAP
);
1816 /* If there are depend clauses, but nowait is not present
1817 (or we are in a final task), block the parent task until the
1818 dependencies are resolved and then just continue with the rest
1819 of the function as if it is a merged task. */
1822 struct gomp_thread
*thr
= gomp_thread ();
1823 if (thr
->task
&& thr
->task
->depend_hash
)
1825 /* If we might need to wait, copy firstprivate now. */
1826 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1827 &tgt_align
, &tgt_size
);
1830 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1831 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1832 tgt_align
, tgt_size
);
1835 gomp_task_maybe_wait_for_dependencies (depend
);
1841 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1842 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1843 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1847 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1848 &tgt_align
, &tgt_size
);
1851 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1852 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1853 tgt_align
, tgt_size
);
1856 gomp_target_fallback (fn
, hostaddrs
);
1860 struct target_mem_desc
*tgt_vars
;
1861 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1865 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1866 &tgt_align
, &tgt_size
);
1869 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1870 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1871 tgt_align
, tgt_size
);
1877 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1878 true, GOMP_MAP_VARS_TARGET
);
1879 devicep
->run_func (devicep
->target_id
, fn_addr
,
1880 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1883 gomp_unmap_vars (tgt_vars
, true);
1886 /* Host fallback for GOMP_target_data{,_ext} routines. */
1889 gomp_target_data_fallback (void)
1891 struct gomp_task_icv
*icv
= gomp_icv (false);
1892 if (icv
->target_data
)
1894 /* Even when doing a host fallback, if there are any active
1895 #pragma omp target data constructs, need to remember the
1896 new #pragma omp target data, otherwise GOMP_target_end_data
1897 would get out of sync. */
1898 struct target_mem_desc
*tgt
1899 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1900 GOMP_MAP_VARS_DATA
);
1901 tgt
->prev
= icv
->target_data
;
1902 icv
->target_data
= tgt
;
1907 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1908 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1910 struct gomp_device_descr
*devicep
= resolve_device (device
);
1913 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1914 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1915 return gomp_target_data_fallback ();
1917 struct target_mem_desc
*tgt
1918 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1919 GOMP_MAP_VARS_DATA
);
1920 struct gomp_task_icv
*icv
= gomp_icv (true);
1921 tgt
->prev
= icv
->target_data
;
1922 icv
->target_data
= tgt
;
1926 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1927 size_t *sizes
, unsigned short *kinds
)
1929 struct gomp_device_descr
*devicep
= resolve_device (device
);
1932 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1933 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1934 return gomp_target_data_fallback ();
1936 struct target_mem_desc
*tgt
1937 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1938 GOMP_MAP_VARS_DATA
);
1939 struct gomp_task_icv
*icv
= gomp_icv (true);
1940 tgt
->prev
= icv
->target_data
;
1941 icv
->target_data
= tgt
;
1945 GOMP_target_end_data (void)
1947 struct gomp_task_icv
*icv
= gomp_icv (false);
1948 if (icv
->target_data
)
1950 struct target_mem_desc
*tgt
= icv
->target_data
;
1951 icv
->target_data
= tgt
->prev
;
1952 gomp_unmap_vars (tgt
, true);
1957 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1958 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1960 struct gomp_device_descr
*devicep
= resolve_device (device
);
1963 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1964 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1967 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1971 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1972 size_t *sizes
, unsigned short *kinds
,
1973 unsigned int flags
, void **depend
)
1975 struct gomp_device_descr
*devicep
= resolve_device (device
);
1977 /* If there are depend clauses, but nowait is not present,
1978 block the parent task until the dependencies are resolved
1979 and then just continue with the rest of the function as if it
1980 is a merged task. Until we are able to schedule task during
1981 variable mapping or unmapping, ignore nowait if depend clauses
1985 struct gomp_thread
*thr
= gomp_thread ();
1986 if (thr
->task
&& thr
->task
->depend_hash
)
1988 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1990 && !thr
->task
->final_task
)
1992 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1993 mapnum
, hostaddrs
, sizes
, kinds
,
1994 flags
| GOMP_TARGET_FLAG_UPDATE
,
1995 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2000 struct gomp_team
*team
= thr
->ts
.team
;
2001 /* If parallel or taskgroup has been cancelled, don't start new
2003 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2005 if (gomp_team_barrier_cancelled (&team
->barrier
))
2007 if (thr
->task
->taskgroup
)
2009 if (thr
->task
->taskgroup
->cancelled
)
2011 if (thr
->task
->taskgroup
->workshare
2012 && thr
->task
->taskgroup
->prev
2013 && thr
->task
->taskgroup
->prev
->cancelled
)
2018 gomp_task_maybe_wait_for_dependencies (depend
);
2024 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2025 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2028 struct gomp_thread
*thr
= gomp_thread ();
2029 struct gomp_team
*team
= thr
->ts
.team
;
2030 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2031 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2033 if (gomp_team_barrier_cancelled (&team
->barrier
))
2035 if (thr
->task
->taskgroup
)
2037 if (thr
->task
->taskgroup
->cancelled
)
2039 if (thr
->task
->taskgroup
->workshare
2040 && thr
->task
->taskgroup
->prev
2041 && thr
->task
->taskgroup
->prev
->cancelled
)
2046 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2050 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2051 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2053 const int typemask
= 0xff;
2055 gomp_mutex_lock (&devicep
->lock
);
2056 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2058 gomp_mutex_unlock (&devicep
->lock
);
2062 for (i
= 0; i
< mapnum
; i
++)
2064 struct splay_tree_key_s cur_node
;
2065 unsigned char kind
= kinds
[i
] & typemask
;
2069 case GOMP_MAP_ALWAYS_FROM
:
2070 case GOMP_MAP_DELETE
:
2071 case GOMP_MAP_RELEASE
:
2072 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2073 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2074 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2075 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2076 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2077 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2078 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2079 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2083 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2085 if ((kind
== GOMP_MAP_DELETE
2086 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2087 && k
->refcount
!= REFCOUNT_INFINITY
)
2090 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2091 || kind
== GOMP_MAP_ALWAYS_FROM
)
2092 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2093 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2094 + cur_node
.host_start
2096 cur_node
.host_end
- cur_node
.host_start
);
2097 if (k
->refcount
== 0)
2099 splay_tree_remove (&devicep
->mem_map
, k
);
2101 splay_tree_insert (&devicep
->mem_map
,
2102 (splay_tree_node
) k
->link_key
);
2103 if (k
->tgt
->refcount
> 1)
2106 gomp_unmap_tgt (k
->tgt
);
2111 gomp_mutex_unlock (&devicep
->lock
);
2112 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2117 gomp_mutex_unlock (&devicep
->lock
);
2121 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2122 size_t *sizes
, unsigned short *kinds
,
2123 unsigned int flags
, void **depend
)
2125 struct gomp_device_descr
*devicep
= resolve_device (device
);
2127 /* If there are depend clauses, but nowait is not present,
2128 block the parent task until the dependencies are resolved
2129 and then just continue with the rest of the function as if it
2130 is a merged task. Until we are able to schedule task during
2131 variable mapping or unmapping, ignore nowait if depend clauses
2135 struct gomp_thread
*thr
= gomp_thread ();
2136 if (thr
->task
&& thr
->task
->depend_hash
)
2138 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2140 && !thr
->task
->final_task
)
2142 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2143 mapnum
, hostaddrs
, sizes
, kinds
,
2144 flags
, depend
, NULL
,
2145 GOMP_TARGET_TASK_DATA
))
2150 struct gomp_team
*team
= thr
->ts
.team
;
2151 /* If parallel or taskgroup has been cancelled, don't start new
2153 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2155 if (gomp_team_barrier_cancelled (&team
->barrier
))
2157 if (thr
->task
->taskgroup
)
2159 if (thr
->task
->taskgroup
->cancelled
)
2161 if (thr
->task
->taskgroup
->workshare
2162 && thr
->task
->taskgroup
->prev
2163 && thr
->task
->taskgroup
->prev
->cancelled
)
2168 gomp_task_maybe_wait_for_dependencies (depend
);
2174 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2175 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2178 struct gomp_thread
*thr
= gomp_thread ();
2179 struct gomp_team
*team
= thr
->ts
.team
;
2180 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2181 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2183 if (gomp_team_barrier_cancelled (&team
->barrier
))
2185 if (thr
->task
->taskgroup
)
2187 if (thr
->task
->taskgroup
->cancelled
)
2189 if (thr
->task
->taskgroup
->workshare
2190 && thr
->task
->taskgroup
->prev
2191 && thr
->task
->taskgroup
->prev
->cancelled
)
2197 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2198 for (i
= 0; i
< mapnum
; i
++)
2199 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2201 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2202 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2206 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2207 true, GOMP_MAP_VARS_ENTER_DATA
);
2209 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2213 gomp_target_task_fn (void *data
)
2215 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2216 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2218 if (ttask
->fn
!= NULL
)
2222 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2223 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2224 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2226 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2227 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2231 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2234 gomp_unmap_vars (ttask
->tgt
, true);
2238 void *actual_arguments
;
2239 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2242 actual_arguments
= ttask
->hostaddrs
;
2246 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2247 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2248 GOMP_MAP_VARS_TARGET
);
2249 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2251 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2253 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2254 ttask
->args
, (void *) ttask
);
2257 else if (devicep
== NULL
2258 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2259 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2263 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2264 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2265 ttask
->kinds
, true);
2266 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2267 for (i
= 0; i
< ttask
->mapnum
; i
++)
2268 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2270 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2271 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2272 GOMP_MAP_VARS_ENTER_DATA
);
2273 i
+= ttask
->sizes
[i
];
2276 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2277 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2279 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2285 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2289 struct gomp_task_icv
*icv
= gomp_icv (true);
2290 icv
->thread_limit_var
2291 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2297 omp_target_alloc (size_t size
, int device_num
)
2299 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2300 return malloc (size
);
2305 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2306 if (devicep
== NULL
)
2309 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2310 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2311 return malloc (size
);
2313 gomp_mutex_lock (&devicep
->lock
);
2314 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2315 gomp_mutex_unlock (&devicep
->lock
);
2320 omp_target_free (void *device_ptr
, int device_num
)
2322 if (device_ptr
== NULL
)
2325 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2334 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2335 if (devicep
== NULL
)
2338 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2339 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2345 gomp_mutex_lock (&devicep
->lock
);
2346 gomp_free_device_memory (devicep
, device_ptr
);
2347 gomp_mutex_unlock (&devicep
->lock
);
2351 omp_target_is_present (const void *ptr
, int device_num
)
2356 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2362 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2363 if (devicep
== NULL
)
2366 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2367 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2370 gomp_mutex_lock (&devicep
->lock
);
2371 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2372 struct splay_tree_key_s cur_node
;
2374 cur_node
.host_start
= (uintptr_t) ptr
;
2375 cur_node
.host_end
= cur_node
.host_start
;
2376 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2377 int ret
= n
!= NULL
;
2378 gomp_mutex_unlock (&devicep
->lock
);
2383 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2384 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2387 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2390 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2392 if (dst_device_num
< 0)
2395 dst_devicep
= resolve_device (dst_device_num
);
2396 if (dst_devicep
== NULL
)
2399 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2400 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2403 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2405 if (src_device_num
< 0)
2408 src_devicep
= resolve_device (src_device_num
);
2409 if (src_devicep
== NULL
)
2412 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2413 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2416 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2418 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2421 if (src_devicep
== NULL
)
2423 gomp_mutex_lock (&dst_devicep
->lock
);
2424 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2425 (char *) dst
+ dst_offset
,
2426 (char *) src
+ src_offset
, length
);
2427 gomp_mutex_unlock (&dst_devicep
->lock
);
2428 return (ret
? 0 : EINVAL
);
2430 if (dst_devicep
== NULL
)
2432 gomp_mutex_lock (&src_devicep
->lock
);
2433 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2434 (char *) dst
+ dst_offset
,
2435 (char *) src
+ src_offset
, length
);
2436 gomp_mutex_unlock (&src_devicep
->lock
);
2437 return (ret
? 0 : EINVAL
);
2439 if (src_devicep
== dst_devicep
)
2441 gomp_mutex_lock (&src_devicep
->lock
);
2442 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2443 (char *) dst
+ dst_offset
,
2444 (char *) src
+ src_offset
, length
);
2445 gomp_mutex_unlock (&src_devicep
->lock
);
2446 return (ret
? 0 : EINVAL
);
2452 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2453 int num_dims
, const size_t *volume
,
2454 const size_t *dst_offsets
,
2455 const size_t *src_offsets
,
2456 const size_t *dst_dimensions
,
2457 const size_t *src_dimensions
,
2458 struct gomp_device_descr
*dst_devicep
,
2459 struct gomp_device_descr
*src_devicep
)
2461 size_t dst_slice
= element_size
;
2462 size_t src_slice
= element_size
;
2463 size_t j
, dst_off
, src_off
, length
;
2468 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2469 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2470 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2472 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2474 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2478 else if (src_devicep
== NULL
)
2479 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2480 (char *) dst
+ dst_off
,
2481 (const char *) src
+ src_off
,
2483 else if (dst_devicep
== NULL
)
2484 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2485 (char *) dst
+ dst_off
,
2486 (const char *) src
+ src_off
,
2488 else if (src_devicep
== dst_devicep
)
2489 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2490 (char *) dst
+ dst_off
,
2491 (const char *) src
+ src_off
,
2495 return ret
? 0 : EINVAL
;
2498 /* FIXME: it would be nice to have some plugin function to handle
2499 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2500 be handled in the generic recursion below, and for host-host it
2501 should be used even for any num_dims >= 2. */
2503 for (i
= 1; i
< num_dims
; i
++)
2504 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2505 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2507 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2508 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2510 for (j
= 0; j
< volume
[0]; j
++)
2512 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2513 (const char *) src
+ src_off
,
2514 element_size
, num_dims
- 1,
2515 volume
+ 1, dst_offsets
+ 1,
2516 src_offsets
+ 1, dst_dimensions
+ 1,
2517 src_dimensions
+ 1, dst_devicep
,
2521 dst_off
+= dst_slice
;
2522 src_off
+= src_slice
;
2528 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2529 int num_dims
, const size_t *volume
,
2530 const size_t *dst_offsets
,
2531 const size_t *src_offsets
,
2532 const size_t *dst_dimensions
,
2533 const size_t *src_dimensions
,
2534 int dst_device_num
, int src_device_num
)
2536 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2541 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2543 if (dst_device_num
< 0)
2546 dst_devicep
= resolve_device (dst_device_num
);
2547 if (dst_devicep
== NULL
)
2550 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2551 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2554 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2556 if (src_device_num
< 0)
2559 src_devicep
= resolve_device (src_device_num
);
2560 if (src_devicep
== NULL
)
2563 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2564 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2568 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2572 gomp_mutex_lock (&src_devicep
->lock
);
2573 else if (dst_devicep
)
2574 gomp_mutex_lock (&dst_devicep
->lock
);
2575 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2576 volume
, dst_offsets
, src_offsets
,
2577 dst_dimensions
, src_dimensions
,
2578 dst_devicep
, src_devicep
);
2580 gomp_mutex_unlock (&src_devicep
->lock
);
2581 else if (dst_devicep
)
2582 gomp_mutex_unlock (&dst_devicep
->lock
);
2587 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2588 size_t size
, size_t device_offset
, int device_num
)
2590 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2596 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2597 if (devicep
== NULL
)
2600 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2601 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2604 gomp_mutex_lock (&devicep
->lock
);
2606 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2607 struct splay_tree_key_s cur_node
;
2610 cur_node
.host_start
= (uintptr_t) host_ptr
;
2611 cur_node
.host_end
= cur_node
.host_start
+ size
;
2612 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2615 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2616 == (uintptr_t) device_ptr
+ device_offset
2617 && n
->host_start
<= cur_node
.host_start
2618 && n
->host_end
>= cur_node
.host_end
)
2623 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2624 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2628 tgt
->to_free
= NULL
;
2630 tgt
->list_count
= 0;
2631 tgt
->device_descr
= devicep
;
2632 splay_tree_node array
= tgt
->array
;
2633 splay_tree_key k
= &array
->key
;
2634 k
->host_start
= cur_node
.host_start
;
2635 k
->host_end
= cur_node
.host_end
;
2637 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2638 k
->refcount
= REFCOUNT_INFINITY
;
2640 array
->right
= NULL
;
2641 splay_tree_insert (&devicep
->mem_map
, array
);
2644 gomp_mutex_unlock (&devicep
->lock
);
2649 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2651 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2657 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2658 if (devicep
== NULL
)
2661 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2664 gomp_mutex_lock (&devicep
->lock
);
2666 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2667 struct splay_tree_key_s cur_node
;
2670 cur_node
.host_start
= (uintptr_t) ptr
;
2671 cur_node
.host_end
= cur_node
.host_start
;
2672 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2674 && n
->host_start
== cur_node
.host_start
2675 && n
->refcount
== REFCOUNT_INFINITY
2676 && n
->tgt
->tgt_start
== 0
2677 && n
->tgt
->to_free
== NULL
2678 && n
->tgt
->refcount
== 1
2679 && n
->tgt
->list_count
== 0)
2681 splay_tree_remove (&devicep
->mem_map
, n
);
2682 gomp_unmap_tgt (n
->tgt
);
2686 gomp_mutex_unlock (&devicep
->lock
);
2691 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2694 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2695 return gomp_pause_host ();
2696 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2698 /* Do nothing for target devices for now. */
2703 omp_pause_resource_all (omp_pause_resource_t kind
)
2706 if (gomp_pause_host ())
2708 /* Do nothing for target devices for now. */
2712 ialias (omp_pause_resource
)
2713 ialias (omp_pause_resource_all
)
2715 #ifdef PLUGIN_SUPPORT
2717 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2719 The handles of the found functions are stored in the corresponding fields
2720 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2723 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2724 const char *plugin_name
)
2726 const char *err
= NULL
, *last_missing
= NULL
;
2728 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2732 /* Check if all required functions are available in the plugin and store
2733 their handlers. None of the symbols can legitimately be NULL,
2734 so we don't need to check dlerror all the time. */
2736 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2738 /* Similar, but missing functions are not an error. Return false if
2739 failed, true otherwise. */
2740 #define DLSYM_OPT(f, n) \
2741 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2742 || (last_missing = #n, 0))
2745 if (device
->version_func () != GOMP_VERSION
)
2747 err
= "plugin version mismatch";
2754 DLSYM (get_num_devices
);
2755 DLSYM (init_device
);
2756 DLSYM (fini_device
);
2758 DLSYM (unload_image
);
2763 device
->capabilities
= device
->get_caps_func ();
2764 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2768 DLSYM_OPT (can_run
, can_run
);
2771 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2773 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
2774 || !DLSYM_OPT (openacc
.create_thread_data
,
2775 openacc_create_thread_data
)
2776 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2777 openacc_destroy_thread_data
)
2778 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
2779 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
2780 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
2781 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
2782 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
2783 || !DLSYM_OPT (openacc
.async
.queue_callback
,
2784 openacc_async_queue_callback
)
2785 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
2786 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
2787 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
))
2789 /* Require all the OpenACC handlers if we have
2790 GOMP_OFFLOAD_CAP_OPENACC_200. */
2791 err
= "plugin missing OpenACC handler function";
2796 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2797 openacc_cuda_get_current_device
);
2798 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2799 openacc_cuda_get_current_context
);
2800 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
2801 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
2802 if (cuda
&& cuda
!= 4)
2804 /* Make sure all the CUDA functions are there if any of them are. */
2805 err
= "plugin missing OpenACC CUDA handler function";
2817 gomp_error ("while loading %s: %s", plugin_name
, err
);
2819 gomp_error ("missing function was %s", last_missing
);
2821 dlclose (plugin_handle
);
2826 /* This function finalizes all initialized devices. */
2829 gomp_target_fini (void)
2832 for (i
= 0; i
< num_devices
; i
++)
2835 struct gomp_device_descr
*devicep
= &devices
[i
];
2836 gomp_mutex_lock (&devicep
->lock
);
2837 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2838 ret
= gomp_fini_device (devicep
);
2839 gomp_mutex_unlock (&devicep
->lock
);
2841 gomp_fatal ("device finalization failed");
2845 /* This function initializes the runtime for offloading.
2846 It parses the list of offload plugins, and tries to load these.
2847 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2848 will be set, and the array DEVICES initialized, containing descriptors for
2849 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2853 gomp_target_init (void)
2855 const char *prefix
="libgomp-plugin-";
2856 const char *suffix
= SONAME_SUFFIX (1);
2857 const char *cur
, *next
;
2859 int i
, new_num_devices
;
2864 cur
= OFFLOAD_PLUGINS
;
2868 struct gomp_device_descr current_device
;
2869 size_t prefix_len
, suffix_len
, cur_len
;
2871 next
= strchr (cur
, ',');
2873 prefix_len
= strlen (prefix
);
2874 cur_len
= next
? next
- cur
: strlen (cur
);
2875 suffix_len
= strlen (suffix
);
2877 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
2884 memcpy (plugin_name
, prefix
, prefix_len
);
2885 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
2886 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
2888 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2890 new_num_devices
= current_device
.get_num_devices_func ();
2891 if (new_num_devices
>= 1)
2893 /* Augment DEVICES and NUM_DEVICES. */
2895 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2896 * sizeof (struct gomp_device_descr
));
2904 current_device
.name
= current_device
.get_name_func ();
2905 /* current_device.capabilities has already been set. */
2906 current_device
.type
= current_device
.get_type_func ();
2907 current_device
.mem_map
.root
= NULL
;
2908 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2909 current_device
.openacc
.data_environ
= NULL
;
2910 for (i
= 0; i
< new_num_devices
; i
++)
2912 current_device
.target_id
= i
;
2913 devices
[num_devices
] = current_device
;
2914 gomp_mutex_init (&devices
[num_devices
].lock
);
2925 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2926 NUM_DEVICES_OPENMP. */
2927 struct gomp_device_descr
*devices_s
2928 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2935 num_devices_openmp
= 0;
2936 for (i
= 0; i
< num_devices
; i
++)
2937 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2938 devices_s
[num_devices_openmp
++] = devices
[i
];
2939 int num_devices_after_openmp
= num_devices_openmp
;
2940 for (i
= 0; i
< num_devices
; i
++)
2941 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2942 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2944 devices
= devices_s
;
2946 for (i
= 0; i
< num_devices
; i
++)
2948 /* The 'devices' array can be moved (by the realloc call) until we have
2949 found all the plugins, so registering with the OpenACC runtime (which
2950 takes a copy of the pointer argument) must be delayed until now. */
2951 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2952 goacc_register (&devices
[i
]);
2955 if (atexit (gomp_target_fini
) != 0)
2956 gomp_fatal ("atexit failed");
2959 #else /* PLUGIN_SUPPORT */
2960 /* If dlfcn.h is unavailable we always fallback to host execution.
2961 GOMP_target* routines are just stubs for this case. */
2963 gomp_target_init (void)
2966 #endif /* PLUGIN_SUPPORT */