1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock
;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr
{
61 enum offload_target_type type
;
62 const void *host_table
;
63 const void *target_data
;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr
*offload_images
;
69 /* Total number of offload images. */
70 static int num_offload_images
;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr
*devices
;
75 /* Total number of available devices. */
76 static int num_devices
;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp
;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old
, size_t size
)
86 void *ret
= realloc (old
, size
);
89 gomp_mutex_unlock (®ister_lock
);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp
;
108 static struct gomp_device_descr
*
109 resolve_device (int device_id
)
111 if (device_id
== GOMP_DEVICE_ICV
)
113 struct gomp_task_icv
*icv
= gomp_icv (false);
114 device_id
= icv
->default_device_var
;
117 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 gomp_mutex_lock (&devices
[device_id
].lock
);
121 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
122 gomp_init_device (&devices
[device_id
]);
123 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
125 gomp_mutex_unlock (&devices
[device_id
].lock
);
128 gomp_mutex_unlock (&devices
[device_id
].lock
);
130 return &devices
[device_id
];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
137 if (key
->host_start
!= key
->host_end
)
138 return splay_tree_lookup (mem_map
, key
);
141 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
146 n
= splay_tree_lookup (mem_map
, key
);
150 return splay_tree_lookup (mem_map
, key
);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
156 if (key
->host_start
!= key
->host_end
)
157 return splay_tree_lookup (mem_map
, key
);
160 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
166 gomp_device_copy (struct gomp_device_descr
*devicep
,
167 bool (*copy_func
) (int, void *, const void *, size_t),
168 const char *dst
, void *dstaddr
,
169 const char *src
, const void *srcaddr
,
172 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
174 gomp_mutex_unlock (&devicep
->lock
);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
183 struct gomp_coalesce_chunk
185 /* The starting and ending point of a coalesced chunk of memory. */
189 struct gomp_coalesce_buf
191 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
192 it will be copied to the device. */
194 struct target_mem_desc
*tgt
;
195 /* Array with offsets, chunks[i].start is the starting offset and
196 chunks[i].end ending offset relative to tgt->tgt_start device address
197 of chunks which are to be copied to buf and later copied to device. */
198 struct gomp_coalesce_chunk
*chunks
;
199 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
202 /* During construction of chunks array, how many memory regions are within
203 the last chunk. If there is just one memory region for a chunk, we copy
204 it directly to device rather than going through buf. */
208 /* Maximum size of memory region considered for coalescing. Larger copies
209 are performed directly. */
210 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
212 /* Maximum size of a gap in between regions to consider them being copied
213 within the same chunk. All the device offsets considered are within
214 newly allocated device memory, so it isn't fatal if we copy some padding
215 in between from host to device. The gaps come either from alignment
216 padding or from memory regions which are not supposed to be copied from
217 host to device (e.g. map(alloc:), map(from:) etc.). */
218 #define MAX_COALESCE_BUF_GAP (4 * 1024)
220 /* Add region with device tgt_start relative offset and length to CBUF. */
223 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
225 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
229 if (cbuf
->chunk_cnt
< 0)
231 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
233 cbuf
->chunk_cnt
= -1;
236 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
238 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
242 /* If the last chunk is only used by one mapping, discard it,
243 as it will be one host to device copy anyway and
244 memcpying it around will only waste cycles. */
245 if (cbuf
->use_cnt
== 1)
248 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
249 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
254 /* Return true for mapping kinds which need to copy data from the
255 host to device for regions that weren't previously mapped. */
258 gomp_to_device_kind_p (int kind
)
264 case GOMP_MAP_FORCE_ALLOC
:
265 case GOMP_MAP_ALWAYS_FROM
:
273 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
274 void *d
, const void *h
, size_t sz
,
275 struct gomp_coalesce_buf
*cbuf
)
279 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
280 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
283 long last
= cbuf
->chunk_cnt
- 1;
284 while (first
<= last
)
286 long middle
= (first
+ last
) >> 1;
287 if (cbuf
->chunks
[middle
].end
<= doff
)
289 else if (cbuf
->chunks
[middle
].start
<= doff
)
291 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
292 gomp_fatal ("internal libgomp cbuf error");
293 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
302 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
306 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
307 void *h
, const void *d
, size_t sz
)
309 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
313 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
315 if (!devicep
->free_func (devicep
->target_id
, devptr
))
317 gomp_mutex_unlock (&devicep
->lock
);
318 gomp_fatal ("error in freeing device memory block at %p", devptr
);
322 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
323 gomp_map_0len_lookup found oldn for newn.
324 Helper function of gomp_map_vars. */
327 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
328 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
329 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
332 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
333 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
334 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
335 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
337 if ((kind
& GOMP_MAP_FLAG_FORCE
)
338 || oldn
->host_start
> newn
->host_start
339 || oldn
->host_end
< newn
->host_end
)
341 gomp_mutex_unlock (&devicep
->lock
);
342 gomp_fatal ("Trying to map into device [%p..%p) object when "
343 "[%p..%p) is already mapped",
344 (void *) newn
->host_start
, (void *) newn
->host_end
,
345 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
348 if (GOMP_MAP_ALWAYS_TO_P (kind
))
349 gomp_copy_host2dev (devicep
,
350 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
351 + newn
->host_start
- oldn
->host_start
),
352 (void *) newn
->host_start
,
353 newn
->host_end
- newn
->host_start
, cbuf
);
355 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
360 get_kind (bool short_mapkind
, void *kinds
, int idx
)
362 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
363 : ((unsigned char *) kinds
)[idx
];
367 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
368 uintptr_t target_offset
, uintptr_t bias
,
369 struct gomp_coalesce_buf
*cbuf
)
371 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
372 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
373 struct splay_tree_key_s cur_node
;
375 cur_node
.host_start
= host_ptr
;
376 if (cur_node
.host_start
== (uintptr_t) NULL
)
378 cur_node
.tgt_offset
= (uintptr_t) NULL
;
379 gomp_copy_host2dev (devicep
,
380 (void *) (tgt
->tgt_start
+ target_offset
),
381 (void *) &cur_node
.tgt_offset
,
382 sizeof (void *), cbuf
);
385 /* Add bias to the pointer value. */
386 cur_node
.host_start
+= bias
;
387 cur_node
.host_end
= cur_node
.host_start
;
388 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
391 gomp_mutex_unlock (&devicep
->lock
);
392 gomp_fatal ("Pointer target of array section wasn't mapped");
394 cur_node
.host_start
-= n
->host_start
;
396 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
397 /* At this point tgt_offset is target address of the
398 array section. Now subtract bias to get what we want
399 to initialize the pointer with. */
400 cur_node
.tgt_offset
-= bias
;
401 gomp_copy_host2dev (devicep
, (void *) (tgt
->tgt_start
+ target_offset
),
402 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
406 gomp_map_fields_existing (struct target_mem_desc
*tgt
, splay_tree_key n
,
407 size_t first
, size_t i
, void **hostaddrs
,
408 size_t *sizes
, void *kinds
,
409 struct gomp_coalesce_buf
*cbuf
)
411 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
412 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
413 struct splay_tree_key_s cur_node
;
415 const bool short_mapkind
= true;
416 const int typemask
= short_mapkind
? 0xff : 0x7;
418 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
419 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
420 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
421 kind
= get_kind (short_mapkind
, kinds
, i
);
424 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
426 gomp_map_vars_existing (devicep
, n2
, &cur_node
,
427 &tgt
->list
[i
], kind
& typemask
, cbuf
);
432 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
434 cur_node
.host_start
--;
435 n2
= splay_tree_lookup (mem_map
, &cur_node
);
436 cur_node
.host_start
++;
439 && n2
->host_start
- n
->host_start
440 == n2
->tgt_offset
- n
->tgt_offset
)
442 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
443 kind
& typemask
, cbuf
);
448 n2
= splay_tree_lookup (mem_map
, &cur_node
);
452 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
454 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
455 kind
& typemask
, cbuf
);
459 gomp_mutex_unlock (&devicep
->lock
);
460 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
461 "other mapped elements from the same structure weren't mapped "
462 "together with it", (void *) cur_node
.host_start
,
463 (void *) cur_node
.host_end
);
466 static inline uintptr_t
467 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
469 if (tgt
->list
[i
].key
!= NULL
)
470 return tgt
->list
[i
].key
->tgt
->tgt_start
471 + tgt
->list
[i
].key
->tgt_offset
472 + tgt
->list
[i
].offset
;
473 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
474 return (uintptr_t) hostaddrs
[i
];
475 if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
477 if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
478 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
479 + tgt
->list
[i
+ 1].key
->tgt_offset
480 + tgt
->list
[i
+ 1].offset
481 + (uintptr_t) hostaddrs
[i
]
482 - (uintptr_t) hostaddrs
[i
+ 1];
483 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
486 attribute_hidden
struct target_mem_desc
*
487 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
488 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
489 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
491 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
492 bool has_firstprivate
= false;
493 const int rshift
= short_mapkind
? 8 : 3;
494 const int typemask
= short_mapkind
? 0xff : 0x7;
495 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
496 struct splay_tree_key_s cur_node
;
497 struct target_mem_desc
*tgt
498 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
499 tgt
->list_count
= mapnum
;
500 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
501 tgt
->device_descr
= devicep
;
502 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
511 tgt_align
= sizeof (void *);
517 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
519 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
520 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
523 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
525 size_t align
= 4 * sizeof (void *);
527 tgt_size
= mapnum
* sizeof (void *);
529 cbuf
.use_cnt
= 1 + (mapnum
> 1);
530 cbuf
.chunks
[0].start
= 0;
531 cbuf
.chunks
[0].end
= tgt_size
;
534 gomp_mutex_lock (&devicep
->lock
);
535 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
537 gomp_mutex_unlock (&devicep
->lock
);
542 for (i
= 0; i
< mapnum
; i
++)
544 int kind
= get_kind (short_mapkind
, kinds
, i
);
545 if (hostaddrs
[i
] == NULL
546 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
548 tgt
->list
[i
].key
= NULL
;
549 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
552 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
554 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
555 cur_node
.host_end
= cur_node
.host_start
;
556 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
559 gomp_mutex_unlock (&devicep
->lock
);
560 gomp_fatal ("use_device_ptr pointer wasn't mapped");
562 cur_node
.host_start
-= n
->host_start
;
564 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
565 + cur_node
.host_start
);
566 tgt
->list
[i
].key
= NULL
;
567 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
570 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
572 size_t first
= i
+ 1;
573 size_t last
= i
+ sizes
[i
];
574 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
575 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
577 tgt
->list
[i
].key
= NULL
;
578 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
579 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
582 size_t align
= (size_t) 1 << (kind
>> rshift
);
583 if (tgt_align
< align
)
585 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
586 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
587 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
588 not_found_cnt
+= last
- i
;
589 for (i
= first
; i
<= last
; i
++)
591 tgt
->list
[i
].key
= NULL
;
592 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
594 gomp_coalesce_buf_add (&cbuf
,
595 tgt_size
- cur_node
.host_end
596 + (uintptr_t) hostaddrs
[i
],
602 for (i
= first
; i
<= last
; i
++)
603 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
608 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
610 tgt
->list
[i
].key
= NULL
;
611 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
612 has_firstprivate
= true;
615 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
616 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
617 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
619 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
620 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
622 tgt
->list
[i
].key
= NULL
;
624 size_t align
= (size_t) 1 << (kind
>> rshift
);
625 if (tgt_align
< align
)
627 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
628 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
629 cur_node
.host_end
- cur_node
.host_start
);
630 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
631 has_firstprivate
= true;
635 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
637 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
640 tgt
->list
[i
].key
= NULL
;
641 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
646 n
= splay_tree_lookup (mem_map
, &cur_node
);
647 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
648 gomp_map_vars_existing (devicep
, n
, &cur_node
, &tgt
->list
[i
],
649 kind
& typemask
, NULL
);
652 tgt
->list
[i
].key
= NULL
;
654 size_t align
= (size_t) 1 << (kind
>> rshift
);
656 if (tgt_align
< align
)
658 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
659 if (gomp_to_device_kind_p (kind
& typemask
))
660 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
661 cur_node
.host_end
- cur_node
.host_start
);
662 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
663 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
666 for (j
= i
+ 1; j
< mapnum
; j
++)
667 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
670 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
671 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
672 > cur_node
.host_end
))
676 tgt
->list
[j
].key
= NULL
;
687 gomp_mutex_unlock (&devicep
->lock
);
688 gomp_fatal ("unexpected aggregation");
690 tgt
->to_free
= devaddrs
[0];
691 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
692 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
694 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
696 /* Allocate tgt_align aligned tgt_size block of memory. */
697 /* FIXME: Perhaps change interface to allocate properly aligned
699 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
700 tgt_size
+ tgt_align
- 1);
703 gomp_mutex_unlock (&devicep
->lock
);
704 gomp_fatal ("device memory allocation fail");
707 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
708 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
709 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
711 if (cbuf
.use_cnt
== 1)
713 if (cbuf
.chunk_cnt
> 0)
716 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
732 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
733 tgt_size
= mapnum
* sizeof (void *);
736 if (not_found_cnt
|| has_firstprivate
)
739 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
740 splay_tree_node array
= tgt
->array
;
741 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
742 uintptr_t field_tgt_base
= 0;
744 for (i
= 0; i
< mapnum
; i
++)
745 if (tgt
->list
[i
].key
== NULL
)
747 int kind
= get_kind (short_mapkind
, kinds
, i
);
748 if (hostaddrs
[i
] == NULL
)
750 switch (kind
& typemask
)
752 size_t align
, len
, first
, last
;
754 case GOMP_MAP_FIRSTPRIVATE
:
755 align
= (size_t) 1 << (kind
>> rshift
);
756 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
757 tgt
->list
[i
].offset
= tgt_size
;
759 gomp_copy_host2dev (devicep
,
760 (void *) (tgt
->tgt_start
+ tgt_size
),
761 (void *) hostaddrs
[i
], len
, cbufp
);
764 case GOMP_MAP_FIRSTPRIVATE_INT
:
765 case GOMP_MAP_USE_DEVICE_PTR
:
766 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
768 case GOMP_MAP_STRUCT
:
771 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
772 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
774 if (tgt
->list
[first
].key
!= NULL
)
776 n
= splay_tree_lookup (mem_map
, &cur_node
);
779 size_t align
= (size_t) 1 << (kind
>> rshift
);
780 tgt_size
-= (uintptr_t) hostaddrs
[first
]
781 - (uintptr_t) hostaddrs
[i
];
782 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
783 tgt_size
+= (uintptr_t) hostaddrs
[first
]
784 - (uintptr_t) hostaddrs
[i
];
785 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
786 field_tgt_offset
= tgt_size
;
787 field_tgt_clear
= last
;
788 tgt_size
+= cur_node
.host_end
789 - (uintptr_t) hostaddrs
[first
];
792 for (i
= first
; i
<= last
; i
++)
793 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
794 sizes
, kinds
, cbufp
);
797 case GOMP_MAP_ALWAYS_POINTER
:
798 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
799 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
800 n
= splay_tree_lookup (mem_map
, &cur_node
);
802 || n
->host_start
> cur_node
.host_start
803 || n
->host_end
< cur_node
.host_end
)
805 gomp_mutex_unlock (&devicep
->lock
);
806 gomp_fatal ("always pointer not mapped");
808 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
809 != GOMP_MAP_ALWAYS_POINTER
)
810 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
811 if (cur_node
.tgt_offset
)
812 cur_node
.tgt_offset
-= sizes
[i
];
813 gomp_copy_host2dev (devicep
,
814 (void *) (n
->tgt
->tgt_start
816 + cur_node
.host_start
818 (void *) &cur_node
.tgt_offset
,
819 sizeof (void *), cbufp
);
820 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
821 + cur_node
.host_start
- n
->host_start
;
826 splay_tree_key k
= &array
->key
;
827 k
->host_start
= (uintptr_t) hostaddrs
[i
];
828 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
829 k
->host_end
= k
->host_start
+ sizes
[i
];
831 k
->host_end
= k
->host_start
+ sizeof (void *);
832 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
833 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
834 gomp_map_vars_existing (devicep
, n
, k
, &tgt
->list
[i
],
835 kind
& typemask
, cbufp
);
839 if (n
&& n
->refcount
== REFCOUNT_LINK
)
841 /* Replace target address of the pointer with target address
842 of mapped object in the splay tree. */
843 splay_tree_remove (mem_map
, n
);
846 size_t align
= (size_t) 1 << (kind
>> rshift
);
847 tgt
->list
[i
].key
= k
;
849 if (field_tgt_clear
!= ~(size_t) 0)
851 k
->tgt_offset
= k
->host_start
- field_tgt_base
853 if (i
== field_tgt_clear
)
854 field_tgt_clear
= ~(size_t) 0;
858 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
859 k
->tgt_offset
= tgt_size
;
860 tgt_size
+= k
->host_end
- k
->host_start
;
862 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
863 tgt
->list
[i
].always_copy_from
864 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
865 tgt
->list
[i
].offset
= 0;
866 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
868 k
->dynamic_refcount
= 0;
872 splay_tree_insert (mem_map
, array
);
873 switch (kind
& typemask
)
877 case GOMP_MAP_FORCE_ALLOC
:
878 case GOMP_MAP_FORCE_FROM
:
879 case GOMP_MAP_ALWAYS_FROM
:
882 case GOMP_MAP_TOFROM
:
883 case GOMP_MAP_FORCE_TO
:
884 case GOMP_MAP_FORCE_TOFROM
:
885 case GOMP_MAP_ALWAYS_TO
:
886 case GOMP_MAP_ALWAYS_TOFROM
:
887 gomp_copy_host2dev (devicep
,
888 (void *) (tgt
->tgt_start
890 (void *) k
->host_start
,
891 k
->host_end
- k
->host_start
, cbufp
);
893 case GOMP_MAP_POINTER
:
894 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
895 k
->tgt_offset
, sizes
[i
], cbufp
);
897 case GOMP_MAP_TO_PSET
:
898 gomp_copy_host2dev (devicep
,
899 (void *) (tgt
->tgt_start
901 (void *) k
->host_start
,
902 k
->host_end
- k
->host_start
, cbufp
);
904 for (j
= i
+ 1; j
< mapnum
; j
++)
905 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
909 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
910 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
915 tgt
->list
[j
].key
= k
;
916 tgt
->list
[j
].copy_from
= false;
917 tgt
->list
[j
].always_copy_from
= false;
918 if (k
->refcount
!= REFCOUNT_INFINITY
)
920 gomp_map_pointer (tgt
,
921 (uintptr_t) *(void **) hostaddrs
[j
],
923 + ((uintptr_t) hostaddrs
[j
]
929 case GOMP_MAP_FORCE_PRESENT
:
931 /* We already looked up the memory region above and it
933 size_t size
= k
->host_end
- k
->host_start
;
934 gomp_mutex_unlock (&devicep
->lock
);
935 #ifdef HAVE_INTTYPES_H
936 gomp_fatal ("present clause: !acc_is_present (%p, "
937 "%"PRIu64
" (0x%"PRIx64
"))",
938 (void *) k
->host_start
,
939 (uint64_t) size
, (uint64_t) size
);
941 gomp_fatal ("present clause: !acc_is_present (%p, "
942 "%lu (0x%lx))", (void *) k
->host_start
,
943 (unsigned long) size
, (unsigned long) size
);
947 case GOMP_MAP_FORCE_DEVICEPTR
:
948 assert (k
->host_end
- k
->host_start
== sizeof (void *));
949 gomp_copy_host2dev (devicep
,
950 (void *) (tgt
->tgt_start
952 (void *) k
->host_start
,
953 sizeof (void *), cbufp
);
956 gomp_mutex_unlock (&devicep
->lock
);
957 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
963 /* Set link pointer on target to the device address of the
965 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
966 /* We intentionally do not use coalescing here, as it's not
967 data allocated by the current call to this function. */
968 gomp_copy_host2dev (devicep
, (void *) n
->tgt_offset
,
969 &tgt_addr
, sizeof (void *), NULL
);
976 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
978 for (i
= 0; i
< mapnum
; i
++)
980 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
981 gomp_copy_host2dev (devicep
,
982 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
983 (void *) &cur_node
.tgt_offset
, sizeof (void *),
991 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
992 gomp_copy_host2dev (devicep
,
993 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
994 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
995 - cbuf
.chunks
[0].start
),
996 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1002 /* If the variable from "omp target enter data" map-list was already mapped,
1003 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1005 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
1011 gomp_mutex_unlock (&devicep
->lock
);
1016 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1018 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1020 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1026 attribute_hidden
bool
1027 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1029 bool is_tgt_unmapped
= false;
1030 splay_tree_remove (&devicep
->mem_map
, k
);
1032 splay_tree_insert (&devicep
->mem_map
, (splay_tree_node
) k
->link_key
);
1033 if (k
->tgt
->refcount
> 1)
1037 is_tgt_unmapped
= true;
1038 gomp_unmap_tgt (k
->tgt
);
1040 return is_tgt_unmapped
;
1043 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1044 variables back from device to host: if it is false, it is assumed that this
1045 has been done already. */
1047 attribute_hidden
void
1048 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1050 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1052 if (tgt
->list_count
== 0)
1058 gomp_mutex_lock (&devicep
->lock
);
1059 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1061 gomp_mutex_unlock (&devicep
->lock
);
1068 for (i
= 0; i
< tgt
->list_count
; i
++)
1070 splay_tree_key k
= tgt
->list
[i
].key
;
1074 bool do_unmap
= false;
1075 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1077 else if (k
->refcount
== 1)
1083 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1084 || tgt
->list
[i
].always_copy_from
)
1085 gomp_copy_dev2host (devicep
,
1086 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1087 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1088 + tgt
->list
[i
].offset
),
1089 tgt
->list
[i
].length
);
1091 gomp_remove_var (devicep
, k
);
1094 if (tgt
->refcount
> 1)
1097 gomp_unmap_tgt (tgt
);
1099 gomp_mutex_unlock (&devicep
->lock
);
1103 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1104 size_t *sizes
, void *kinds
, bool short_mapkind
)
1107 struct splay_tree_key_s cur_node
;
1108 const int typemask
= short_mapkind
? 0xff : 0x7;
1116 gomp_mutex_lock (&devicep
->lock
);
1117 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1119 gomp_mutex_unlock (&devicep
->lock
);
1123 for (i
= 0; i
< mapnum
; i
++)
1126 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1127 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1128 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1131 int kind
= get_kind (short_mapkind
, kinds
, i
);
1132 if (n
->host_start
> cur_node
.host_start
1133 || n
->host_end
< cur_node
.host_end
)
1135 gomp_mutex_unlock (&devicep
->lock
);
1136 gomp_fatal ("Trying to update [%p..%p) object when "
1137 "only [%p..%p) is mapped",
1138 (void *) cur_node
.host_start
,
1139 (void *) cur_node
.host_end
,
1140 (void *) n
->host_start
,
1141 (void *) n
->host_end
);
1145 void *hostaddr
= (void *) cur_node
.host_start
;
1146 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1147 + cur_node
.host_start
- n
->host_start
);
1148 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1150 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1151 gomp_copy_host2dev (devicep
, devaddr
, hostaddr
, size
, NULL
);
1152 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1153 gomp_copy_dev2host (devicep
, hostaddr
, devaddr
, size
);
1156 gomp_mutex_unlock (&devicep
->lock
);
1159 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1160 And insert to splay tree the mapping between addresses from HOST_TABLE and
1161 from loaded target image. We rely in the host and device compiler
1162 emitting variable and functions in the same order. */
1165 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1166 const void *host_table
, const void *target_data
,
1167 bool is_register_lock
)
1169 void **host_func_table
= ((void ***) host_table
)[0];
1170 void **host_funcs_end
= ((void ***) host_table
)[1];
1171 void **host_var_table
= ((void ***) host_table
)[2];
1172 void **host_vars_end
= ((void ***) host_table
)[3];
1174 /* The func table contains only addresses, the var table contains addresses
1175 and corresponding sizes. */
1176 int num_funcs
= host_funcs_end
- host_func_table
;
1177 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1179 /* Load image to device and get target addresses for the image. */
1180 struct addr_pair
*target_table
= NULL
;
1181 int i
, num_target_entries
;
1184 = devicep
->load_image_func (devicep
->target_id
, version
,
1185 target_data
, &target_table
);
1187 if (num_target_entries
!= num_funcs
+ num_vars
)
1189 gomp_mutex_unlock (&devicep
->lock
);
1190 if (is_register_lock
)
1191 gomp_mutex_unlock (®ister_lock
);
1192 gomp_fatal ("Cannot map target functions or variables"
1193 " (expected %u, have %u)", num_funcs
+ num_vars
,
1194 num_target_entries
);
1197 /* Insert host-target address mapping into splay tree. */
1198 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1199 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1200 tgt
->refcount
= REFCOUNT_INFINITY
;
1203 tgt
->to_free
= NULL
;
1205 tgt
->list_count
= 0;
1206 tgt
->device_descr
= devicep
;
1207 splay_tree_node array
= tgt
->array
;
1209 for (i
= 0; i
< num_funcs
; i
++)
1211 splay_tree_key k
= &array
->key
;
1212 k
->host_start
= (uintptr_t) host_func_table
[i
];
1213 k
->host_end
= k
->host_start
+ 1;
1215 k
->tgt_offset
= target_table
[i
].start
;
1216 k
->refcount
= REFCOUNT_INFINITY
;
1219 array
->right
= NULL
;
1220 splay_tree_insert (&devicep
->mem_map
, array
);
1224 /* Most significant bit of the size in host and target tables marks
1225 "omp declare target link" variables. */
1226 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1227 const uintptr_t size_mask
= ~link_bit
;
1229 for (i
= 0; i
< num_vars
; i
++)
1231 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1232 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1234 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1236 gomp_mutex_unlock (&devicep
->lock
);
1237 if (is_register_lock
)
1238 gomp_mutex_unlock (®ister_lock
);
1239 gomp_fatal ("Cannot map target variables (size mismatch)");
1242 splay_tree_key k
= &array
->key
;
1243 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1245 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1247 k
->tgt_offset
= target_var
->start
;
1248 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1251 array
->right
= NULL
;
1252 splay_tree_insert (&devicep
->mem_map
, array
);
1256 free (target_table
);
1259 /* Unload the mappings described by target_data from device DEVICE_P.
1260 The device must be locked. */
1263 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1265 const void *host_table
, const void *target_data
)
1267 void **host_func_table
= ((void ***) host_table
)[0];
1268 void **host_funcs_end
= ((void ***) host_table
)[1];
1269 void **host_var_table
= ((void ***) host_table
)[2];
1270 void **host_vars_end
= ((void ***) host_table
)[3];
1272 /* The func table contains only addresses, the var table contains addresses
1273 and corresponding sizes. */
1274 int num_funcs
= host_funcs_end
- host_func_table
;
1275 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1277 struct splay_tree_key_s k
;
1278 splay_tree_key node
= NULL
;
1280 /* Find mapping at start of node array */
1281 if (num_funcs
|| num_vars
)
1283 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1284 : (uintptr_t) host_var_table
[0]);
1285 k
.host_end
= k
.host_start
+ 1;
1286 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1289 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1291 gomp_mutex_unlock (&devicep
->lock
);
1292 gomp_fatal ("image unload fail");
1295 /* Remove mappings from splay tree. */
1297 for (i
= 0; i
< num_funcs
; i
++)
1299 k
.host_start
= (uintptr_t) host_func_table
[i
];
1300 k
.host_end
= k
.host_start
+ 1;
1301 splay_tree_remove (&devicep
->mem_map
, &k
);
1304 /* Most significant bit of the size in host and target tables marks
1305 "omp declare target link" variables. */
1306 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1307 const uintptr_t size_mask
= ~link_bit
;
1308 bool is_tgt_unmapped
= false;
1310 for (i
= 0; i
< num_vars
; i
++)
1312 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1314 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1316 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1317 splay_tree_remove (&devicep
->mem_map
, &k
);
1320 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1321 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1325 if (node
&& !is_tgt_unmapped
)
1332 /* This function should be called from every offload image while loading.
1333 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1334 the target, and TARGET_DATA needed by target plugin. */
1337 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1338 int target_type
, const void *target_data
)
1342 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1343 gomp_fatal ("Library too old for offload (version %u < %u)",
1344 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1346 gomp_mutex_lock (®ister_lock
);
1348 /* Load image to all initialized devices. */
1349 for (i
= 0; i
< num_devices
; i
++)
1351 struct gomp_device_descr
*devicep
= &devices
[i
];
1352 gomp_mutex_lock (&devicep
->lock
);
1353 if (devicep
->type
== target_type
1354 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1355 gomp_load_image_to_device (devicep
, version
,
1356 host_table
, target_data
, true);
1357 gomp_mutex_unlock (&devicep
->lock
);
1360 /* Insert image to array of pending images. */
1362 = gomp_realloc_unlock (offload_images
,
1363 (num_offload_images
+ 1)
1364 * sizeof (struct offload_image_descr
));
1365 offload_images
[num_offload_images
].version
= version
;
1366 offload_images
[num_offload_images
].type
= target_type
;
1367 offload_images
[num_offload_images
].host_table
= host_table
;
1368 offload_images
[num_offload_images
].target_data
= target_data
;
1370 num_offload_images
++;
1371 gomp_mutex_unlock (®ister_lock
);
1375 GOMP_offload_register (const void *host_table
, int target_type
,
1376 const void *target_data
)
1378 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1381 /* This function should be called from every offload image while unloading.
1382 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1383 the target, and TARGET_DATA needed by target plugin. */
1386 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1387 int target_type
, const void *target_data
)
1391 gomp_mutex_lock (®ister_lock
);
1393 /* Unload image from all initialized devices. */
1394 for (i
= 0; i
< num_devices
; i
++)
1396 struct gomp_device_descr
*devicep
= &devices
[i
];
1397 gomp_mutex_lock (&devicep
->lock
);
1398 if (devicep
->type
== target_type
1399 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1400 gomp_unload_image_from_device (devicep
, version
,
1401 host_table
, target_data
);
1402 gomp_mutex_unlock (&devicep
->lock
);
1405 /* Remove image from array of pending images. */
1406 for (i
= 0; i
< num_offload_images
; i
++)
1407 if (offload_images
[i
].target_data
== target_data
)
1409 offload_images
[i
] = offload_images
[--num_offload_images
];
1413 gomp_mutex_unlock (®ister_lock
);
1417 GOMP_offload_unregister (const void *host_table
, int target_type
,
1418 const void *target_data
)
1420 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1423 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1424 must be locked on entry, and remains locked on return. */
1426 attribute_hidden
void
1427 gomp_init_device (struct gomp_device_descr
*devicep
)
1430 if (!devicep
->init_device_func (devicep
->target_id
))
1432 gomp_mutex_unlock (&devicep
->lock
);
1433 gomp_fatal ("device initialization failed");
1436 /* Load to device all images registered by the moment. */
1437 for (i
= 0; i
< num_offload_images
; i
++)
1439 struct offload_image_descr
*image
= &offload_images
[i
];
1440 if (image
->type
== devicep
->type
)
1441 gomp_load_image_to_device (devicep
, image
->version
,
1442 image
->host_table
, image
->target_data
,
1446 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1449 attribute_hidden
void
1450 gomp_unload_device (struct gomp_device_descr
*devicep
)
1452 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1456 /* Unload from device all images registered at the moment. */
1457 for (i
= 0; i
< num_offload_images
; i
++)
1459 struct offload_image_descr
*image
= &offload_images
[i
];
1460 if (image
->type
== devicep
->type
)
1461 gomp_unload_image_from_device (devicep
, image
->version
,
1463 image
->target_data
);
1468 /* Free address mapping tables. MM must be locked on entry, and remains locked
1471 attribute_hidden
void
1472 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1474 while (mem_map
->root
)
1476 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1478 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1484 /* Host fallback for GOMP_target{,_ext} routines. */
1487 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1489 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1491 memset (thr
, '\0', sizeof (*thr
));
1492 if (gomp_places_list
)
1494 thr
->place
= old_thr
.place
;
1495 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1498 gomp_free_thread (thr
);
1502 /* Calculate alignment and size requirements of a private copy of data shared
1503 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1506 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1507 unsigned short *kinds
, size_t *tgt_align
,
1511 for (i
= 0; i
< mapnum
; i
++)
1512 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1514 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1515 if (*tgt_align
< align
)
1517 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1518 *tgt_size
+= sizes
[i
];
1522 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1525 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1526 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1529 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1531 tgt
+= tgt_align
- al
;
1534 for (i
= 0; i
< mapnum
; i
++)
1535 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1537 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1538 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1539 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1540 hostaddrs
[i
] = tgt
+ tgt_size
;
1541 tgt_size
= tgt_size
+ sizes
[i
];
1545 /* Helper function of GOMP_target{,_ext} routines. */
1548 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1549 void (*host_fn
) (void *))
1551 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1552 return (void *) host_fn
;
1555 gomp_mutex_lock (&devicep
->lock
);
1556 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1558 gomp_mutex_unlock (&devicep
->lock
);
1562 struct splay_tree_key_s k
;
1563 k
.host_start
= (uintptr_t) host_fn
;
1564 k
.host_end
= k
.host_start
+ 1;
1565 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1566 gomp_mutex_unlock (&devicep
->lock
);
1570 return (void *) tgt_fn
->tgt_offset
;
1574 /* Called when encountering a target directive. If DEVICE
1575 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1576 GOMP_DEVICE_HOST_FALLBACK (or any value
1577 larger than last available hw device), use host fallback.
1578 FN is address of host code, UNUSED is part of the current ABI, but
1579 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1580 with MAPNUM entries, with addresses of the host objects,
1581 sizes of the host objects (resp. for pointer kind pointer bias
1582 and assumed sizeof (void *) size) and kinds. */
1585 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1586 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1587 unsigned char *kinds
)
1589 struct gomp_device_descr
*devicep
= resolve_device (device
);
1593 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1594 /* All shared memory devices should use the GOMP_target_ext function. */
1595 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1596 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1597 return gomp_target_fallback (fn
, hostaddrs
);
1599 struct target_mem_desc
*tgt_vars
1600 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1601 GOMP_MAP_VARS_TARGET
);
1602 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1604 gomp_unmap_vars (tgt_vars
, true);
1607 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1608 and several arguments have been added:
1609 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1610 DEPEND is array of dependencies, see GOMP_task for details.
1612 ARGS is a pointer to an array consisting of a variable number of both
1613 device-independent and device-specific arguments, which can take one two
1614 elements where the first specifies for which device it is intended, the type
1615 and optionally also the value. If the value is not present in the first
1616 one, the whole second element the actual value. The last element of the
1617 array is a single NULL. Among the device independent can be for example
1618 NUM_TEAMS and THREAD_LIMIT.
1620 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1621 that value, or 1 if teams construct is not present, or 0, if
1622 teams construct does not have num_teams clause and so the choice is
1623 implementation defined, and -1 if it can't be determined on the host
1624 what value will GOMP_teams have on the device.
1625 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1626 body with that value, or 0, if teams construct does not have thread_limit
1627 clause or the teams construct is not present, or -1 if it can't be
1628 determined on the host what value will GOMP_teams have on the device. */
1631 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1632 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1633 unsigned int flags
, void **depend
, void **args
)
1635 struct gomp_device_descr
*devicep
= resolve_device (device
);
1636 size_t tgt_align
= 0, tgt_size
= 0;
1637 bool fpc_done
= false;
1639 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1641 struct gomp_thread
*thr
= gomp_thread ();
1642 /* Create a team if we don't have any around, as nowait
1643 target tasks make sense to run asynchronously even when
1644 outside of any parallel. */
1645 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1647 struct gomp_team
*team
= gomp_new_team (1);
1648 struct gomp_task
*task
= thr
->task
;
1649 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1650 team
->prev_ts
= thr
->ts
;
1651 thr
->ts
.team
= team
;
1652 thr
->ts
.team_id
= 0;
1653 thr
->ts
.work_share
= &team
->work_shares
[0];
1654 thr
->ts
.last_work_share
= NULL
;
1655 #ifdef HAVE_SYNC_BUILTINS
1656 thr
->ts
.single_count
= 0;
1658 thr
->ts
.static_trip
= 0;
1659 thr
->task
= &team
->implicit_task
[0];
1660 gomp_init_task (thr
->task
, NULL
, icv
);
1666 thr
->task
= &team
->implicit_task
[0];
1669 pthread_setspecific (gomp_thread_destructor
, thr
);
1672 && !thr
->task
->final_task
)
1674 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1675 sizes
, kinds
, flags
, depend
, args
,
1676 GOMP_TARGET_TASK_BEFORE_MAP
);
1681 /* If there are depend clauses, but nowait is not present
1682 (or we are in a final task), block the parent task until the
1683 dependencies are resolved and then just continue with the rest
1684 of the function as if it is a merged task. */
1687 struct gomp_thread
*thr
= gomp_thread ();
1688 if (thr
->task
&& thr
->task
->depend_hash
)
1690 /* If we might need to wait, copy firstprivate now. */
1691 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1692 &tgt_align
, &tgt_size
);
1695 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1696 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1697 tgt_align
, tgt_size
);
1700 gomp_task_maybe_wait_for_dependencies (depend
);
1706 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1707 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1708 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1712 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1713 &tgt_align
, &tgt_size
);
1716 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1717 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1718 tgt_align
, tgt_size
);
1721 gomp_target_fallback (fn
, hostaddrs
);
1725 struct target_mem_desc
*tgt_vars
;
1726 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1730 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1731 &tgt_align
, &tgt_size
);
1734 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1735 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1736 tgt_align
, tgt_size
);
1742 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1743 true, GOMP_MAP_VARS_TARGET
);
1744 devicep
->run_func (devicep
->target_id
, fn_addr
,
1745 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1748 gomp_unmap_vars (tgt_vars
, true);
1751 /* Host fallback for GOMP_target_data{,_ext} routines. */
1754 gomp_target_data_fallback (void)
1756 struct gomp_task_icv
*icv
= gomp_icv (false);
1757 if (icv
->target_data
)
1759 /* Even when doing a host fallback, if there are any active
1760 #pragma omp target data constructs, need to remember the
1761 new #pragma omp target data, otherwise GOMP_target_end_data
1762 would get out of sync. */
1763 struct target_mem_desc
*tgt
1764 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1765 GOMP_MAP_VARS_DATA
);
1766 tgt
->prev
= icv
->target_data
;
1767 icv
->target_data
= tgt
;
1772 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1773 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1775 struct gomp_device_descr
*devicep
= resolve_device (device
);
1778 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1779 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1780 return gomp_target_data_fallback ();
1782 struct target_mem_desc
*tgt
1783 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1784 GOMP_MAP_VARS_DATA
);
1785 struct gomp_task_icv
*icv
= gomp_icv (true);
1786 tgt
->prev
= icv
->target_data
;
1787 icv
->target_data
= tgt
;
1791 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1792 size_t *sizes
, unsigned short *kinds
)
1794 struct gomp_device_descr
*devicep
= resolve_device (device
);
1797 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1798 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1799 return gomp_target_data_fallback ();
1801 struct target_mem_desc
*tgt
1802 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1803 GOMP_MAP_VARS_DATA
);
1804 struct gomp_task_icv
*icv
= gomp_icv (true);
1805 tgt
->prev
= icv
->target_data
;
1806 icv
->target_data
= tgt
;
1810 GOMP_target_end_data (void)
1812 struct gomp_task_icv
*icv
= gomp_icv (false);
1813 if (icv
->target_data
)
1815 struct target_mem_desc
*tgt
= icv
->target_data
;
1816 icv
->target_data
= tgt
->prev
;
1817 gomp_unmap_vars (tgt
, true);
1822 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1823 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1825 struct gomp_device_descr
*devicep
= resolve_device (device
);
1828 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1829 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1832 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1836 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1837 size_t *sizes
, unsigned short *kinds
,
1838 unsigned int flags
, void **depend
)
1840 struct gomp_device_descr
*devicep
= resolve_device (device
);
1842 /* If there are depend clauses, but nowait is not present,
1843 block the parent task until the dependencies are resolved
1844 and then just continue with the rest of the function as if it
1845 is a merged task. Until we are able to schedule task during
1846 variable mapping or unmapping, ignore nowait if depend clauses
1850 struct gomp_thread
*thr
= gomp_thread ();
1851 if (thr
->task
&& thr
->task
->depend_hash
)
1853 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1855 && !thr
->task
->final_task
)
1857 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1858 mapnum
, hostaddrs
, sizes
, kinds
,
1859 flags
| GOMP_TARGET_FLAG_UPDATE
,
1860 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
1865 struct gomp_team
*team
= thr
->ts
.team
;
1866 /* If parallel or taskgroup has been cancelled, don't start new
1868 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1870 if (gomp_team_barrier_cancelled (&team
->barrier
))
1872 if (thr
->task
->taskgroup
)
1874 if (thr
->task
->taskgroup
->cancelled
)
1876 if (thr
->task
->taskgroup
->workshare
1877 && thr
->task
->taskgroup
->prev
1878 && thr
->task
->taskgroup
->prev
->cancelled
)
1883 gomp_task_maybe_wait_for_dependencies (depend
);
1889 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1890 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1893 struct gomp_thread
*thr
= gomp_thread ();
1894 struct gomp_team
*team
= thr
->ts
.team
;
1895 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1896 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1898 if (gomp_team_barrier_cancelled (&team
->barrier
))
1900 if (thr
->task
->taskgroup
)
1902 if (thr
->task
->taskgroup
->cancelled
)
1904 if (thr
->task
->taskgroup
->workshare
1905 && thr
->task
->taskgroup
->prev
1906 && thr
->task
->taskgroup
->prev
->cancelled
)
1911 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
1915 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
1916 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1918 const int typemask
= 0xff;
1920 gomp_mutex_lock (&devicep
->lock
);
1921 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1923 gomp_mutex_unlock (&devicep
->lock
);
1927 for (i
= 0; i
< mapnum
; i
++)
1929 struct splay_tree_key_s cur_node
;
1930 unsigned char kind
= kinds
[i
] & typemask
;
1934 case GOMP_MAP_ALWAYS_FROM
:
1935 case GOMP_MAP_DELETE
:
1936 case GOMP_MAP_RELEASE
:
1937 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1938 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
1939 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1940 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1941 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1942 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1943 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
1944 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1948 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
1950 if ((kind
== GOMP_MAP_DELETE
1951 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
1952 && k
->refcount
!= REFCOUNT_INFINITY
)
1955 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
1956 || kind
== GOMP_MAP_ALWAYS_FROM
)
1957 gomp_copy_dev2host (devicep
, (void *) cur_node
.host_start
,
1958 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1959 + cur_node
.host_start
1961 cur_node
.host_end
- cur_node
.host_start
);
1962 if (k
->refcount
== 0)
1964 splay_tree_remove (&devicep
->mem_map
, k
);
1966 splay_tree_insert (&devicep
->mem_map
,
1967 (splay_tree_node
) k
->link_key
);
1968 if (k
->tgt
->refcount
> 1)
1971 gomp_unmap_tgt (k
->tgt
);
1976 gomp_mutex_unlock (&devicep
->lock
);
1977 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1982 gomp_mutex_unlock (&devicep
->lock
);
1986 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
1987 size_t *sizes
, unsigned short *kinds
,
1988 unsigned int flags
, void **depend
)
1990 struct gomp_device_descr
*devicep
= resolve_device (device
);
1992 /* If there are depend clauses, but nowait is not present,
1993 block the parent task until the dependencies are resolved
1994 and then just continue with the rest of the function as if it
1995 is a merged task. Until we are able to schedule task during
1996 variable mapping or unmapping, ignore nowait if depend clauses
2000 struct gomp_thread
*thr
= gomp_thread ();
2001 if (thr
->task
&& thr
->task
->depend_hash
)
2003 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2005 && !thr
->task
->final_task
)
2007 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2008 mapnum
, hostaddrs
, sizes
, kinds
,
2009 flags
, depend
, NULL
,
2010 GOMP_TARGET_TASK_DATA
))
2015 struct gomp_team
*team
= thr
->ts
.team
;
2016 /* If parallel or taskgroup has been cancelled, don't start new
2018 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2020 if (gomp_team_barrier_cancelled (&team
->barrier
))
2022 if (thr
->task
->taskgroup
)
2024 if (thr
->task
->taskgroup
->cancelled
)
2026 if (thr
->task
->taskgroup
->workshare
2027 && thr
->task
->taskgroup
->prev
2028 && thr
->task
->taskgroup
->prev
->cancelled
)
2033 gomp_task_maybe_wait_for_dependencies (depend
);
2039 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2040 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2043 struct gomp_thread
*thr
= gomp_thread ();
2044 struct gomp_team
*team
= thr
->ts
.team
;
2045 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2046 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2048 if (gomp_team_barrier_cancelled (&team
->barrier
))
2050 if (thr
->task
->taskgroup
)
2052 if (thr
->task
->taskgroup
->cancelled
)
2054 if (thr
->task
->taskgroup
->workshare
2055 && thr
->task
->taskgroup
->prev
2056 && thr
->task
->taskgroup
->prev
->cancelled
)
2062 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2063 for (i
= 0; i
< mapnum
; i
++)
2064 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2066 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2067 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2071 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2072 true, GOMP_MAP_VARS_ENTER_DATA
);
2074 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2078 gomp_target_task_fn (void *data
)
2080 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2081 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2083 if (ttask
->fn
!= NULL
)
2087 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2088 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2089 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2091 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2092 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2096 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2099 gomp_unmap_vars (ttask
->tgt
, true);
2103 void *actual_arguments
;
2104 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2107 actual_arguments
= ttask
->hostaddrs
;
2111 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2112 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2113 GOMP_MAP_VARS_TARGET
);
2114 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2116 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2118 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2119 ttask
->args
, (void *) ttask
);
2122 else if (devicep
== NULL
2123 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2124 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2128 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2129 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2130 ttask
->kinds
, true);
2131 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2132 for (i
= 0; i
< ttask
->mapnum
; i
++)
2133 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2135 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2136 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2137 GOMP_MAP_VARS_ENTER_DATA
);
2138 i
+= ttask
->sizes
[i
];
2141 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2142 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2144 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2150 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2154 struct gomp_task_icv
*icv
= gomp_icv (true);
2155 icv
->thread_limit_var
2156 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2162 omp_target_alloc (size_t size
, int device_num
)
2164 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2165 return malloc (size
);
2170 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2171 if (devicep
== NULL
)
2174 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2175 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2176 return malloc (size
);
2178 gomp_mutex_lock (&devicep
->lock
);
2179 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2180 gomp_mutex_unlock (&devicep
->lock
);
2185 omp_target_free (void *device_ptr
, int device_num
)
2187 if (device_ptr
== NULL
)
2190 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2199 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2200 if (devicep
== NULL
)
2203 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2204 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2210 gomp_mutex_lock (&devicep
->lock
);
2211 gomp_free_device_memory (devicep
, device_ptr
);
2212 gomp_mutex_unlock (&devicep
->lock
);
2216 omp_target_is_present (const void *ptr
, int device_num
)
2221 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2227 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2228 if (devicep
== NULL
)
2231 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2232 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2235 gomp_mutex_lock (&devicep
->lock
);
2236 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2237 struct splay_tree_key_s cur_node
;
2239 cur_node
.host_start
= (uintptr_t) ptr
;
2240 cur_node
.host_end
= cur_node
.host_start
;
2241 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2242 int ret
= n
!= NULL
;
2243 gomp_mutex_unlock (&devicep
->lock
);
2248 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2249 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2252 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2255 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2257 if (dst_device_num
< 0)
2260 dst_devicep
= resolve_device (dst_device_num
);
2261 if (dst_devicep
== NULL
)
2264 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2265 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2268 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2270 if (src_device_num
< 0)
2273 src_devicep
= resolve_device (src_device_num
);
2274 if (src_devicep
== NULL
)
2277 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2278 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2281 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2283 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2286 if (src_devicep
== NULL
)
2288 gomp_mutex_lock (&dst_devicep
->lock
);
2289 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2290 (char *) dst
+ dst_offset
,
2291 (char *) src
+ src_offset
, length
);
2292 gomp_mutex_unlock (&dst_devicep
->lock
);
2293 return (ret
? 0 : EINVAL
);
2295 if (dst_devicep
== NULL
)
2297 gomp_mutex_lock (&src_devicep
->lock
);
2298 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2299 (char *) dst
+ dst_offset
,
2300 (char *) src
+ src_offset
, length
);
2301 gomp_mutex_unlock (&src_devicep
->lock
);
2302 return (ret
? 0 : EINVAL
);
2304 if (src_devicep
== dst_devicep
)
2306 gomp_mutex_lock (&src_devicep
->lock
);
2307 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2308 (char *) dst
+ dst_offset
,
2309 (char *) src
+ src_offset
, length
);
2310 gomp_mutex_unlock (&src_devicep
->lock
);
2311 return (ret
? 0 : EINVAL
);
2317 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2318 int num_dims
, const size_t *volume
,
2319 const size_t *dst_offsets
,
2320 const size_t *src_offsets
,
2321 const size_t *dst_dimensions
,
2322 const size_t *src_dimensions
,
2323 struct gomp_device_descr
*dst_devicep
,
2324 struct gomp_device_descr
*src_devicep
)
2326 size_t dst_slice
= element_size
;
2327 size_t src_slice
= element_size
;
2328 size_t j
, dst_off
, src_off
, length
;
2333 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2334 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2335 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2337 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2339 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2343 else if (src_devicep
== NULL
)
2344 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2345 (char *) dst
+ dst_off
,
2346 (const char *) src
+ src_off
,
2348 else if (dst_devicep
== NULL
)
2349 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2350 (char *) dst
+ dst_off
,
2351 (const char *) src
+ src_off
,
2353 else if (src_devicep
== dst_devicep
)
2354 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2355 (char *) dst
+ dst_off
,
2356 (const char *) src
+ src_off
,
2360 return ret
? 0 : EINVAL
;
2363 /* FIXME: it would be nice to have some plugin function to handle
2364 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2365 be handled in the generic recursion below, and for host-host it
2366 should be used even for any num_dims >= 2. */
2368 for (i
= 1; i
< num_dims
; i
++)
2369 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2370 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2372 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2373 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2375 for (j
= 0; j
< volume
[0]; j
++)
2377 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2378 (const char *) src
+ src_off
,
2379 element_size
, num_dims
- 1,
2380 volume
+ 1, dst_offsets
+ 1,
2381 src_offsets
+ 1, dst_dimensions
+ 1,
2382 src_dimensions
+ 1, dst_devicep
,
2386 dst_off
+= dst_slice
;
2387 src_off
+= src_slice
;
2393 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2394 int num_dims
, const size_t *volume
,
2395 const size_t *dst_offsets
,
2396 const size_t *src_offsets
,
2397 const size_t *dst_dimensions
,
2398 const size_t *src_dimensions
,
2399 int dst_device_num
, int src_device_num
)
2401 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2406 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2408 if (dst_device_num
< 0)
2411 dst_devicep
= resolve_device (dst_device_num
);
2412 if (dst_devicep
== NULL
)
2415 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2416 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2419 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2421 if (src_device_num
< 0)
2424 src_devicep
= resolve_device (src_device_num
);
2425 if (src_devicep
== NULL
)
2428 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2429 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2433 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2437 gomp_mutex_lock (&src_devicep
->lock
);
2438 else if (dst_devicep
)
2439 gomp_mutex_lock (&dst_devicep
->lock
);
2440 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2441 volume
, dst_offsets
, src_offsets
,
2442 dst_dimensions
, src_dimensions
,
2443 dst_devicep
, src_devicep
);
2445 gomp_mutex_unlock (&src_devicep
->lock
);
2446 else if (dst_devicep
)
2447 gomp_mutex_unlock (&dst_devicep
->lock
);
2452 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2453 size_t size
, size_t device_offset
, int device_num
)
2455 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2461 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2462 if (devicep
== NULL
)
2465 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2466 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2469 gomp_mutex_lock (&devicep
->lock
);
2471 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2472 struct splay_tree_key_s cur_node
;
2475 cur_node
.host_start
= (uintptr_t) host_ptr
;
2476 cur_node
.host_end
= cur_node
.host_start
+ size
;
2477 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2480 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2481 == (uintptr_t) device_ptr
+ device_offset
2482 && n
->host_start
<= cur_node
.host_start
2483 && n
->host_end
>= cur_node
.host_end
)
2488 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2489 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2493 tgt
->to_free
= NULL
;
2495 tgt
->list_count
= 0;
2496 tgt
->device_descr
= devicep
;
2497 splay_tree_node array
= tgt
->array
;
2498 splay_tree_key k
= &array
->key
;
2499 k
->host_start
= cur_node
.host_start
;
2500 k
->host_end
= cur_node
.host_end
;
2502 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2503 k
->refcount
= REFCOUNT_INFINITY
;
2505 array
->right
= NULL
;
2506 splay_tree_insert (&devicep
->mem_map
, array
);
2509 gomp_mutex_unlock (&devicep
->lock
);
2514 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2516 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2522 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2523 if (devicep
== NULL
)
2526 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2529 gomp_mutex_lock (&devicep
->lock
);
2531 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2532 struct splay_tree_key_s cur_node
;
2535 cur_node
.host_start
= (uintptr_t) ptr
;
2536 cur_node
.host_end
= cur_node
.host_start
;
2537 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2539 && n
->host_start
== cur_node
.host_start
2540 && n
->refcount
== REFCOUNT_INFINITY
2541 && n
->tgt
->tgt_start
== 0
2542 && n
->tgt
->to_free
== NULL
2543 && n
->tgt
->refcount
== 1
2544 && n
->tgt
->list_count
== 0)
2546 splay_tree_remove (&devicep
->mem_map
, n
);
2547 gomp_unmap_tgt (n
->tgt
);
2551 gomp_mutex_unlock (&devicep
->lock
);
2556 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2559 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2560 return gomp_pause_host ();
2561 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2563 /* Do nothing for target devices for now. */
2568 omp_pause_resource_all (omp_pause_resource_t kind
)
2571 if (gomp_pause_host ())
2573 /* Do nothing for target devices for now. */
2577 ialias (omp_pause_resource
)
2578 ialias (omp_pause_resource_all
)
2580 #ifdef PLUGIN_SUPPORT
2582 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2584 The handles of the found functions are stored in the corresponding fields
2585 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2588 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2589 const char *plugin_name
)
2591 const char *err
= NULL
, *last_missing
= NULL
;
2593 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2597 /* Check if all required functions are available in the plugin and store
2598 their handlers. None of the symbols can legitimately be NULL,
2599 so we don't need to check dlerror all the time. */
2601 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2603 /* Similar, but missing functions are not an error. Return false if
2604 failed, true otherwise. */
2605 #define DLSYM_OPT(f, n) \
2606 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2607 || (last_missing = #n, 0))
2610 if (device
->version_func () != GOMP_VERSION
)
2612 err
= "plugin version mismatch";
2619 DLSYM (get_num_devices
);
2620 DLSYM (init_device
);
2621 DLSYM (fini_device
);
2623 DLSYM (unload_image
);
2628 device
->capabilities
= device
->get_caps_func ();
2629 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2633 DLSYM_OPT (can_run
, can_run
);
2636 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2638 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
2639 || !DLSYM_OPT (openacc
.register_async_cleanup
,
2640 openacc_register_async_cleanup
)
2641 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
2642 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
2643 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
2644 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
2645 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
2646 || !DLSYM_OPT (openacc
.async_wait_all_async
,
2647 openacc_async_wait_all_async
)
2648 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
2649 || !DLSYM_OPT (openacc
.create_thread_data
,
2650 openacc_create_thread_data
)
2651 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2652 openacc_destroy_thread_data
))
2654 /* Require all the OpenACC handlers if we have
2655 GOMP_OFFLOAD_CAP_OPENACC_200. */
2656 err
= "plugin missing OpenACC handler function";
2661 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2662 openacc_cuda_get_current_device
);
2663 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2664 openacc_cuda_get_current_context
);
2665 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
2666 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
2667 if (cuda
&& cuda
!= 4)
2669 /* Make sure all the CUDA functions are there if any of them are. */
2670 err
= "plugin missing OpenACC CUDA handler function";
2682 gomp_error ("while loading %s: %s", plugin_name
, err
);
2684 gomp_error ("missing function was %s", last_missing
);
2686 dlclose (plugin_handle
);
2691 /* This function finalizes all initialized devices. */
2694 gomp_target_fini (void)
2697 for (i
= 0; i
< num_devices
; i
++)
2700 struct gomp_device_descr
*devicep
= &devices
[i
];
2701 gomp_mutex_lock (&devicep
->lock
);
2702 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2704 ret
= devicep
->fini_device_func (devicep
->target_id
);
2705 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2707 gomp_mutex_unlock (&devicep
->lock
);
2709 gomp_fatal ("device finalization failed");
2713 /* This function initializes the runtime needed for offloading.
2714 It parses the list of offload targets and tries to load the plugins for
2715 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2716 will be set, and the array DEVICES initialized, containing descriptors for
2717 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2721 gomp_target_init (void)
2723 const char *prefix
="libgomp-plugin-";
2724 const char *suffix
= SONAME_SUFFIX (1);
2725 const char *cur
, *next
;
2727 int i
, new_num_devices
;
2732 cur
= OFFLOAD_TARGETS
;
2736 struct gomp_device_descr current_device
;
2737 size_t prefix_len
, suffix_len
, cur_len
;
2739 next
= strchr (cur
, ',');
2741 prefix_len
= strlen (prefix
);
2742 cur_len
= next
? next
- cur
: strlen (cur
);
2743 suffix_len
= strlen (suffix
);
2745 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
2752 memcpy (plugin_name
, prefix
, prefix_len
);
2753 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
2754 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
2756 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2758 new_num_devices
= current_device
.get_num_devices_func ();
2759 if (new_num_devices
>= 1)
2761 /* Augment DEVICES and NUM_DEVICES. */
2763 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2764 * sizeof (struct gomp_device_descr
));
2772 current_device
.name
= current_device
.get_name_func ();
2773 /* current_device.capabilities has already been set. */
2774 current_device
.type
= current_device
.get_type_func ();
2775 current_device
.mem_map
.root
= NULL
;
2776 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2777 current_device
.openacc
.data_environ
= NULL
;
2778 for (i
= 0; i
< new_num_devices
; i
++)
2780 current_device
.target_id
= i
;
2781 devices
[num_devices
] = current_device
;
2782 gomp_mutex_init (&devices
[num_devices
].lock
);
2793 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2794 NUM_DEVICES_OPENMP. */
2795 struct gomp_device_descr
*devices_s
2796 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2803 num_devices_openmp
= 0;
2804 for (i
= 0; i
< num_devices
; i
++)
2805 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2806 devices_s
[num_devices_openmp
++] = devices
[i
];
2807 int num_devices_after_openmp
= num_devices_openmp
;
2808 for (i
= 0; i
< num_devices
; i
++)
2809 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2810 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2812 devices
= devices_s
;
2814 for (i
= 0; i
< num_devices
; i
++)
2816 /* The 'devices' array can be moved (by the realloc call) until we have
2817 found all the plugins, so registering with the OpenACC runtime (which
2818 takes a copy of the pointer argument) must be delayed until now. */
2819 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2820 goacc_register (&devices
[i
]);
2823 if (atexit (gomp_target_fini
) != 0)
2824 gomp_fatal ("atexit failed");
2827 #else /* PLUGIN_SUPPORT */
2828 /* If dlfcn.h is unavailable we always fallback to host execution.
2829 GOMP_target* routines are just stubs for this case. */
2831 gomp_target_init (void)
2834 #endif /* PLUGIN_SUPPORT */