1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock
;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr
{
61 enum offload_target_type type
;
62 const void *host_table
;
63 const void *target_data
;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr
*offload_images
;
69 /* Total number of offload images. */
70 static int num_offload_images
;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr
*devices
;
75 /* Total number of available devices. */
76 static int num_devices
;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp
;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old
, size_t size
)
86 void *ret
= realloc (old
, size
);
89 gomp_mutex_unlock (®ister_lock
);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
95 /* The comparison function. */
98 splay_compare (splay_tree_key x
, splay_tree_key y
)
100 if (x
->host_start
== x
->host_end
101 && y
->host_start
== y
->host_end
)
103 if (x
->host_end
<= y
->host_start
)
105 if (x
->host_start
>= y
->host_end
)
110 #include "splay-tree.h"
112 attribute_hidden
void
113 gomp_init_targets_once (void)
115 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
119 gomp_get_num_devices (void)
121 gomp_init_targets_once ();
122 return num_devices_openmp
;
125 static struct gomp_device_descr
*
126 resolve_device (int device_id
)
128 if (device_id
== GOMP_DEVICE_ICV
)
130 struct gomp_task_icv
*icv
= gomp_icv (false);
131 device_id
= icv
->default_device_var
;
134 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
137 gomp_mutex_lock (&devices
[device_id
].lock
);
138 if (!devices
[device_id
].is_initialized
)
139 gomp_init_device (&devices
[device_id
]);
140 gomp_mutex_unlock (&devices
[device_id
].lock
);
142 return &devices
[device_id
];
146 static inline splay_tree_key
147 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
149 if (key
->host_start
!= key
->host_end
)
150 return splay_tree_lookup (mem_map
, key
);
153 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
158 n
= splay_tree_lookup (mem_map
, key
);
162 return splay_tree_lookup (mem_map
, key
);
165 /* Handle the case where gomp_map_lookup found oldn for newn.
166 Helper function of gomp_map_vars. */
169 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
170 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
174 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
175 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
176 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
177 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
179 if ((kind
& GOMP_MAP_FLAG_FORCE
)
180 || oldn
->host_start
> newn
->host_start
181 || oldn
->host_end
< newn
->host_end
)
183 gomp_mutex_unlock (&devicep
->lock
);
184 gomp_fatal ("Trying to map into device [%p..%p) object when "
185 "[%p..%p) is already mapped",
186 (void *) newn
->host_start
, (void *) newn
->host_end
,
187 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
190 if (GOMP_MAP_ALWAYS_TO_P (kind
))
191 devicep
->host2dev_func (devicep
->target_id
,
192 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
193 + newn
->host_start
- oldn
->host_start
),
194 (void *) newn
->host_start
,
195 newn
->host_end
- newn
->host_start
);
196 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
201 get_kind (bool short_mapkind
, void *kinds
, int idx
)
203 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
204 : ((unsigned char *) kinds
)[idx
];
208 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
209 uintptr_t target_offset
, uintptr_t bias
)
211 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
212 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
213 struct splay_tree_key_s cur_node
;
215 cur_node
.host_start
= host_ptr
;
216 if (cur_node
.host_start
== (uintptr_t) NULL
)
218 cur_node
.tgt_offset
= (uintptr_t) NULL
;
219 /* FIXME: see comment about coalescing host/dev transfers below. */
220 devicep
->host2dev_func (devicep
->target_id
,
221 (void *) (tgt
->tgt_start
+ target_offset
),
222 (void *) &cur_node
.tgt_offset
,
226 /* Add bias to the pointer value. */
227 cur_node
.host_start
+= bias
;
228 cur_node
.host_end
= cur_node
.host_start
;
229 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
232 gomp_mutex_unlock (&devicep
->lock
);
233 gomp_fatal ("Pointer target of array section wasn't mapped");
235 cur_node
.host_start
-= n
->host_start
;
237 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
238 /* At this point tgt_offset is target address of the
239 array section. Now subtract bias to get what we want
240 to initialize the pointer with. */
241 cur_node
.tgt_offset
-= bias
;
242 /* FIXME: see comment about coalescing host/dev transfers below. */
243 devicep
->host2dev_func (devicep
->target_id
,
244 (void *) (tgt
->tgt_start
+ target_offset
),
245 (void *) &cur_node
.tgt_offset
,
250 gomp_map_fields_existing (struct target_mem_desc
*tgt
, splay_tree_key n
,
251 size_t first
, size_t i
, void **hostaddrs
,
252 size_t *sizes
, void *kinds
)
254 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
255 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
256 struct splay_tree_key_s cur_node
;
258 const bool short_mapkind
= true;
259 const int typemask
= short_mapkind
? 0xff : 0x7;
261 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
262 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
263 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
264 kind
= get_kind (short_mapkind
, kinds
, i
);
267 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
269 gomp_map_vars_existing (devicep
, n2
, &cur_node
,
270 &tgt
->list
[i
], kind
& typemask
);
275 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
277 cur_node
.host_start
--;
278 n2
= splay_tree_lookup (mem_map
, &cur_node
);
279 cur_node
.host_start
++;
282 && n2
->host_start
- n
->host_start
283 == n2
->tgt_offset
- n
->tgt_offset
)
285 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
291 n2
= splay_tree_lookup (mem_map
, &cur_node
);
295 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
297 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
302 gomp_mutex_unlock (&devicep
->lock
);
303 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
304 "other mapped elements from the same structure weren't mapped "
305 "together with it", (void *) cur_node
.host_start
,
306 (void *) cur_node
.host_end
);
309 attribute_hidden
struct target_mem_desc
*
310 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
311 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
312 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
314 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
315 bool has_firstprivate
= false;
316 const int rshift
= short_mapkind
? 8 : 3;
317 const int typemask
= short_mapkind
? 0xff : 0x7;
318 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
319 struct splay_tree_key_s cur_node
;
320 struct target_mem_desc
*tgt
321 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
322 tgt
->list_count
= mapnum
;
323 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
324 tgt
->device_descr
= devicep
;
333 tgt_align
= sizeof (void *);
335 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
337 size_t align
= 4 * sizeof (void *);
339 tgt_size
= mapnum
* sizeof (void *);
342 gomp_mutex_lock (&devicep
->lock
);
344 for (i
= 0; i
< mapnum
; i
++)
346 int kind
= get_kind (short_mapkind
, kinds
, i
);
347 if (hostaddrs
[i
] == NULL
348 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
350 tgt
->list
[i
].key
= NULL
;
351 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
354 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
356 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
357 cur_node
.host_end
= cur_node
.host_start
;
358 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
361 gomp_mutex_unlock (&devicep
->lock
);
362 gomp_fatal ("use_device_ptr pointer wasn't mapped");
364 cur_node
.host_start
-= n
->host_start
;
366 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
367 + cur_node
.host_start
);
368 tgt
->list
[i
].key
= NULL
;
369 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
372 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
374 size_t first
= i
+ 1;
375 size_t last
= i
+ sizes
[i
];
376 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
377 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
379 tgt
->list
[i
].key
= NULL
;
380 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
381 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
384 size_t align
= (size_t) 1 << (kind
>> rshift
);
385 if (tgt_align
< align
)
387 tgt_size
-= (uintptr_t) hostaddrs
[first
]
388 - (uintptr_t) hostaddrs
[i
];
389 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
390 tgt_size
+= cur_node
.host_end
- (uintptr_t) hostaddrs
[i
];
391 not_found_cnt
+= last
- i
;
392 for (i
= first
; i
<= last
; i
++)
393 tgt
->list
[i
].key
= NULL
;
397 for (i
= first
; i
<= last
; i
++)
398 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
403 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
404 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
405 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
407 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
408 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
410 tgt
->list
[i
].key
= NULL
;
412 size_t align
= (size_t) 1 << (kind
>> rshift
);
413 if (tgt_align
< align
)
415 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
416 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
417 has_firstprivate
= true;
421 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
423 n
= gomp_map_lookup (mem_map
, &cur_node
);
426 tgt
->list
[i
].key
= NULL
;
427 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
432 n
= splay_tree_lookup (mem_map
, &cur_node
);
434 gomp_map_vars_existing (devicep
, n
, &cur_node
, &tgt
->list
[i
],
438 tgt
->list
[i
].key
= NULL
;
440 size_t align
= (size_t) 1 << (kind
>> rshift
);
442 if (tgt_align
< align
)
444 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
445 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
446 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
449 for (j
= i
+ 1; j
< mapnum
; j
++)
450 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
453 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
454 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
455 > cur_node
.host_end
))
459 tgt
->list
[j
].key
= NULL
;
470 gomp_mutex_unlock (&devicep
->lock
);
471 gomp_fatal ("unexpected aggregation");
473 tgt
->to_free
= devaddrs
[0];
474 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
475 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
477 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
479 /* Allocate tgt_align aligned tgt_size block of memory. */
480 /* FIXME: Perhaps change interface to allocate properly aligned
482 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
483 tgt_size
+ tgt_align
- 1);
484 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
485 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
486 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
496 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
497 tgt_size
= mapnum
* sizeof (void *);
500 if (not_found_cnt
|| has_firstprivate
)
503 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
504 splay_tree_node array
= tgt
->array
;
505 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
506 uintptr_t field_tgt_base
= 0;
508 for (i
= 0; i
< mapnum
; i
++)
509 if (tgt
->list
[i
].key
== NULL
)
511 int kind
= get_kind (short_mapkind
, kinds
, i
);
512 if (hostaddrs
[i
] == NULL
)
514 switch (kind
& typemask
)
516 size_t align
, len
, first
, last
;
518 case GOMP_MAP_FIRSTPRIVATE
:
519 align
= (size_t) 1 << (kind
>> rshift
);
520 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
521 tgt
->list
[i
].offset
= tgt_size
;
523 devicep
->host2dev_func (devicep
->target_id
,
524 (void *) (tgt
->tgt_start
+ tgt_size
),
525 (void *) hostaddrs
[i
], len
);
528 case GOMP_MAP_FIRSTPRIVATE_INT
:
529 case GOMP_MAP_USE_DEVICE_PTR
:
530 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
532 case GOMP_MAP_STRUCT
:
535 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
536 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
538 if (tgt
->list
[first
].key
!= NULL
)
540 n
= splay_tree_lookup (mem_map
, &cur_node
);
543 size_t align
= (size_t) 1 << (kind
>> rshift
);
544 tgt_size
-= (uintptr_t) hostaddrs
[first
]
545 - (uintptr_t) hostaddrs
[i
];
546 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
547 tgt_size
+= (uintptr_t) hostaddrs
[first
]
548 - (uintptr_t) hostaddrs
[i
];
549 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
550 field_tgt_offset
= tgt_size
;
551 field_tgt_clear
= last
;
552 tgt_size
+= cur_node
.host_end
553 - (uintptr_t) hostaddrs
[first
];
556 for (i
= first
; i
<= last
; i
++)
557 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
564 splay_tree_key k
= &array
->key
;
565 k
->host_start
= (uintptr_t) hostaddrs
[i
];
566 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
567 k
->host_end
= k
->host_start
+ sizes
[i
];
569 k
->host_end
= k
->host_start
+ sizeof (void *);
570 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
572 gomp_map_vars_existing (devicep
, n
, k
, &tgt
->list
[i
],
576 size_t align
= (size_t) 1 << (kind
>> rshift
);
577 tgt
->list
[i
].key
= k
;
579 if (field_tgt_clear
!= ~(size_t) 0)
581 k
->tgt_offset
= k
->host_start
- field_tgt_base
583 if (i
== field_tgt_clear
)
584 field_tgt_clear
= ~(size_t) 0;
588 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
589 k
->tgt_offset
= tgt_size
;
590 tgt_size
+= k
->host_end
- k
->host_start
;
592 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
593 tgt
->list
[i
].always_copy_from
594 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
595 tgt
->list
[i
].offset
= 0;
596 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
598 k
->async_refcount
= 0;
602 splay_tree_insert (mem_map
, array
);
603 switch (kind
& typemask
)
607 case GOMP_MAP_FORCE_ALLOC
:
608 case GOMP_MAP_FORCE_FROM
:
609 case GOMP_MAP_ALWAYS_FROM
:
612 case GOMP_MAP_TOFROM
:
613 case GOMP_MAP_FORCE_TO
:
614 case GOMP_MAP_FORCE_TOFROM
:
615 case GOMP_MAP_ALWAYS_TO
:
616 case GOMP_MAP_ALWAYS_TOFROM
:
617 /* FIXME: Perhaps add some smarts, like if copying
618 several adjacent fields from host to target, use some
619 host buffer to avoid sending each var individually. */
620 devicep
->host2dev_func (devicep
->target_id
,
621 (void *) (tgt
->tgt_start
623 (void *) k
->host_start
,
624 k
->host_end
- k
->host_start
);
626 case GOMP_MAP_POINTER
:
627 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
628 k
->tgt_offset
, sizes
[i
]);
630 case GOMP_MAP_TO_PSET
:
631 /* FIXME: see above FIXME comment. */
632 devicep
->host2dev_func (devicep
->target_id
,
633 (void *) (tgt
->tgt_start
635 (void *) k
->host_start
,
636 k
->host_end
- k
->host_start
);
638 for (j
= i
+ 1; j
< mapnum
; j
++)
639 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
643 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
644 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
649 tgt
->list
[j
].key
= k
;
650 tgt
->list
[j
].copy_from
= false;
651 tgt
->list
[j
].always_copy_from
= false;
652 if (k
->refcount
!= REFCOUNT_INFINITY
)
654 gomp_map_pointer (tgt
,
655 (uintptr_t) *(void **) hostaddrs
[j
],
657 + ((uintptr_t) hostaddrs
[j
]
663 case GOMP_MAP_FORCE_PRESENT
:
665 /* We already looked up the memory region above and it
667 size_t size
= k
->host_end
- k
->host_start
;
668 gomp_mutex_unlock (&devicep
->lock
);
669 #ifdef HAVE_INTTYPES_H
670 gomp_fatal ("present clause: !acc_is_present (%p, "
671 "%"PRIu64
" (0x%"PRIx64
"))",
672 (void *) k
->host_start
,
673 (uint64_t) size
, (uint64_t) size
);
675 gomp_fatal ("present clause: !acc_is_present (%p, "
676 "%lu (0x%lx))", (void *) k
->host_start
,
677 (unsigned long) size
, (unsigned long) size
);
681 case GOMP_MAP_FORCE_DEVICEPTR
:
682 assert (k
->host_end
- k
->host_start
== sizeof (void *));
684 devicep
->host2dev_func (devicep
->target_id
,
685 (void *) (tgt
->tgt_start
687 (void *) k
->host_start
,
691 gomp_mutex_unlock (&devicep
->lock
);
692 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
700 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
702 for (i
= 0; i
< mapnum
; i
++)
704 if (tgt
->list
[i
].key
== NULL
)
706 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
707 cur_node
.tgt_offset
= (uintptr_t) hostaddrs
[i
];
708 else if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
709 cur_node
.tgt_offset
= 0;
710 else if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
711 cur_node
.tgt_offset
= tgt
->list
[i
+ 1].key
->tgt
->tgt_start
712 + tgt
->list
[i
+ 1].key
->tgt_offset
713 + tgt
->list
[i
+ 1].offset
714 + (uintptr_t) hostaddrs
[i
]
715 - (uintptr_t) hostaddrs
[i
+ 1];
717 cur_node
.tgt_offset
= tgt
->tgt_start
718 + tgt
->list
[i
].offset
;
721 cur_node
.tgt_offset
= tgt
->list
[i
].key
->tgt
->tgt_start
722 + tgt
->list
[i
].key
->tgt_offset
723 + tgt
->list
[i
].offset
;
724 /* FIXME: see above FIXME comment. */
725 devicep
->host2dev_func (devicep
->target_id
,
726 (void *) (tgt
->tgt_start
727 + i
* sizeof (void *)),
728 (void *) &cur_node
.tgt_offset
,
733 /* If the variable from "omp target enter data" map-list was already mapped,
734 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
736 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
742 gomp_mutex_unlock (&devicep
->lock
);
747 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
749 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
751 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
757 /* Decrease the refcount for a set of mapped variables, and queue asychronous
758 copies from the device back to the host after any work that has been issued.
759 Because the regions are still "live", increment an asynchronous reference
760 count to indicate that they should not be unmapped from host-side data
761 structures until the asynchronous copy has completed. */
763 attribute_hidden
void
764 gomp_copy_from_async (struct target_mem_desc
*tgt
)
766 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
769 gomp_mutex_lock (&devicep
->lock
);
771 for (i
= 0; i
< tgt
->list_count
; i
++)
772 if (tgt
->list
[i
].key
== NULL
)
774 else if (tgt
->list
[i
].key
->refcount
> 1)
776 tgt
->list
[i
].key
->refcount
--;
777 tgt
->list
[i
].key
->async_refcount
++;
781 splay_tree_key k
= tgt
->list
[i
].key
;
782 if (tgt
->list
[i
].copy_from
)
783 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
784 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
785 k
->host_end
- k
->host_start
);
788 gomp_mutex_unlock (&devicep
->lock
);
791 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
792 variables back from device to host: if it is false, it is assumed that this
793 has been done already, i.e. by gomp_copy_from_async above. */
795 attribute_hidden
void
796 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
798 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
800 if (tgt
->list_count
== 0)
806 gomp_mutex_lock (&devicep
->lock
);
809 for (i
= 0; i
< tgt
->list_count
; i
++)
811 splay_tree_key k
= tgt
->list
[i
].key
;
815 bool do_unmap
= false;
816 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
818 else if (k
->refcount
== 1)
820 if (k
->async_refcount
> 0)
829 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
830 || tgt
->list
[i
].always_copy_from
)
831 devicep
->dev2host_func (devicep
->target_id
,
832 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
833 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
834 + tgt
->list
[i
].offset
),
835 tgt
->list
[i
].length
);
838 splay_tree_remove (&devicep
->mem_map
, k
);
839 if (k
->tgt
->refcount
> 1)
842 gomp_unmap_tgt (k
->tgt
);
846 if (tgt
->refcount
> 1)
849 gomp_unmap_tgt (tgt
);
851 gomp_mutex_unlock (&devicep
->lock
);
855 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
856 size_t *sizes
, void *kinds
, bool short_mapkind
)
859 struct splay_tree_key_s cur_node
;
860 const int typemask
= short_mapkind
? 0xff : 0x7;
868 gomp_mutex_lock (&devicep
->lock
);
869 for (i
= 0; i
< mapnum
; i
++)
872 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
873 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
874 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
877 int kind
= get_kind (short_mapkind
, kinds
, i
);
878 if (n
->host_start
> cur_node
.host_start
879 || n
->host_end
< cur_node
.host_end
)
881 gomp_mutex_unlock (&devicep
->lock
);
882 gomp_fatal ("Trying to update [%p..%p) object when "
883 "only [%p..%p) is mapped",
884 (void *) cur_node
.host_start
,
885 (void *) cur_node
.host_end
,
886 (void *) n
->host_start
,
887 (void *) n
->host_end
);
889 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
890 devicep
->host2dev_func (devicep
->target_id
,
891 (void *) (n
->tgt
->tgt_start
893 + cur_node
.host_start
895 (void *) cur_node
.host_start
,
896 cur_node
.host_end
- cur_node
.host_start
);
897 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
898 devicep
->dev2host_func (devicep
->target_id
,
899 (void *) cur_node
.host_start
,
900 (void *) (n
->tgt
->tgt_start
902 + cur_node
.host_start
904 cur_node
.host_end
- cur_node
.host_start
);
907 gomp_mutex_unlock (&devicep
->lock
);
910 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
911 And insert to splay tree the mapping between addresses from HOST_TABLE and
912 from loaded target image. We rely in the host and device compiler
913 emitting variable and functions in the same order. */
916 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
917 const void *host_table
, const void *target_data
,
918 bool is_register_lock
)
920 void **host_func_table
= ((void ***) host_table
)[0];
921 void **host_funcs_end
= ((void ***) host_table
)[1];
922 void **host_var_table
= ((void ***) host_table
)[2];
923 void **host_vars_end
= ((void ***) host_table
)[3];
925 /* The func table contains only addresses, the var table contains addresses
926 and corresponding sizes. */
927 int num_funcs
= host_funcs_end
- host_func_table
;
928 int num_vars
= (host_vars_end
- host_var_table
) / 2;
930 /* Load image to device and get target addresses for the image. */
931 struct addr_pair
*target_table
= NULL
;
932 int i
, num_target_entries
;
935 = devicep
->load_image_func (devicep
->target_id
, version
,
936 target_data
, &target_table
);
938 if (num_target_entries
!= num_funcs
+ num_vars
)
940 gomp_mutex_unlock (&devicep
->lock
);
941 if (is_register_lock
)
942 gomp_mutex_unlock (®ister_lock
);
943 gomp_fatal ("Cannot map target functions or variables"
944 " (expected %u, have %u)", num_funcs
+ num_vars
,
948 /* Insert host-target address mapping into splay tree. */
949 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
950 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
951 tgt
->refcount
= REFCOUNT_INFINITY
;
957 tgt
->device_descr
= devicep
;
958 splay_tree_node array
= tgt
->array
;
960 for (i
= 0; i
< num_funcs
; i
++)
962 splay_tree_key k
= &array
->key
;
963 k
->host_start
= (uintptr_t) host_func_table
[i
];
964 k
->host_end
= k
->host_start
+ 1;
966 k
->tgt_offset
= target_table
[i
].start
;
967 k
->refcount
= REFCOUNT_INFINITY
;
968 k
->async_refcount
= 0;
971 splay_tree_insert (&devicep
->mem_map
, array
);
975 for (i
= 0; i
< num_vars
; i
++)
977 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
978 if (target_var
->end
- target_var
->start
979 != (uintptr_t) host_var_table
[i
* 2 + 1])
981 gomp_mutex_unlock (&devicep
->lock
);
982 if (is_register_lock
)
983 gomp_mutex_unlock (®ister_lock
);
984 gomp_fatal ("Can't map target variables (size mismatch)");
987 splay_tree_key k
= &array
->key
;
988 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
989 k
->host_end
= k
->host_start
+ (uintptr_t) host_var_table
[i
* 2 + 1];
991 k
->tgt_offset
= target_var
->start
;
992 k
->refcount
= REFCOUNT_INFINITY
;
993 k
->async_refcount
= 0;
996 splay_tree_insert (&devicep
->mem_map
, array
);
1000 free (target_table
);
1003 /* Unload the mappings described by target_data from device DEVICE_P.
1004 The device must be locked. */
1007 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1009 const void *host_table
, const void *target_data
)
1011 void **host_func_table
= ((void ***) host_table
)[0];
1012 void **host_funcs_end
= ((void ***) host_table
)[1];
1013 void **host_var_table
= ((void ***) host_table
)[2];
1014 void **host_vars_end
= ((void ***) host_table
)[3];
1016 /* The func table contains only addresses, the var table contains addresses
1017 and corresponding sizes. */
1018 int num_funcs
= host_funcs_end
- host_func_table
;
1019 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1022 struct splay_tree_key_s k
;
1023 splay_tree_key node
= NULL
;
1025 /* Find mapping at start of node array */
1026 if (num_funcs
|| num_vars
)
1028 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1029 : (uintptr_t) host_var_table
[0]);
1030 k
.host_end
= k
.host_start
+ 1;
1031 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1034 devicep
->unload_image_func (devicep
->target_id
, version
, target_data
);
1036 /* Remove mappings from splay tree. */
1037 for (j
= 0; j
< num_funcs
; j
++)
1039 k
.host_start
= (uintptr_t) host_func_table
[j
];
1040 k
.host_end
= k
.host_start
+ 1;
1041 splay_tree_remove (&devicep
->mem_map
, &k
);
1044 for (j
= 0; j
< num_vars
; j
++)
1046 k
.host_start
= (uintptr_t) host_var_table
[j
* 2];
1047 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[j
* 2 + 1];
1048 splay_tree_remove (&devicep
->mem_map
, &k
);
1058 /* This function should be called from every offload image while loading.
1059 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1060 the target, and TARGET_DATA needed by target plugin. */
1063 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1064 int target_type
, const void *target_data
)
1068 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1069 gomp_fatal ("Library too old for offload (version %u < %u)",
1070 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1072 gomp_mutex_lock (®ister_lock
);
1074 /* Load image to all initialized devices. */
1075 for (i
= 0; i
< num_devices
; i
++)
1077 struct gomp_device_descr
*devicep
= &devices
[i
];
1078 gomp_mutex_lock (&devicep
->lock
);
1079 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
1080 gomp_load_image_to_device (devicep
, version
,
1081 host_table
, target_data
, true);
1082 gomp_mutex_unlock (&devicep
->lock
);
1085 /* Insert image to array of pending images. */
1087 = gomp_realloc_unlock (offload_images
,
1088 (num_offload_images
+ 1)
1089 * sizeof (struct offload_image_descr
));
1090 offload_images
[num_offload_images
].version
= version
;
1091 offload_images
[num_offload_images
].type
= target_type
;
1092 offload_images
[num_offload_images
].host_table
= host_table
;
1093 offload_images
[num_offload_images
].target_data
= target_data
;
1095 num_offload_images
++;
1096 gomp_mutex_unlock (®ister_lock
);
1100 GOMP_offload_register (const void *host_table
, int target_type
,
1101 const void *target_data
)
1103 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1106 /* This function should be called from every offload image while unloading.
1107 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1108 the target, and TARGET_DATA needed by target plugin. */
1111 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1112 int target_type
, const void *target_data
)
1116 gomp_mutex_lock (®ister_lock
);
1118 /* Unload image from all initialized devices. */
1119 for (i
= 0; i
< num_devices
; i
++)
1121 struct gomp_device_descr
*devicep
= &devices
[i
];
1122 gomp_mutex_lock (&devicep
->lock
);
1123 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
1124 gomp_unload_image_from_device (devicep
, version
,
1125 host_table
, target_data
);
1126 gomp_mutex_unlock (&devicep
->lock
);
1129 /* Remove image from array of pending images. */
1130 for (i
= 0; i
< num_offload_images
; i
++)
1131 if (offload_images
[i
].target_data
== target_data
)
1133 offload_images
[i
] = offload_images
[--num_offload_images
];
1137 gomp_mutex_unlock (®ister_lock
);
1141 GOMP_offload_unregister (const void *host_table
, int target_type
,
1142 const void *target_data
)
1144 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1147 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1148 must be locked on entry, and remains locked on return. */
1150 attribute_hidden
void
1151 gomp_init_device (struct gomp_device_descr
*devicep
)
1154 devicep
->init_device_func (devicep
->target_id
);
1156 /* Load to device all images registered by the moment. */
1157 for (i
= 0; i
< num_offload_images
; i
++)
1159 struct offload_image_descr
*image
= &offload_images
[i
];
1160 if (image
->type
== devicep
->type
)
1161 gomp_load_image_to_device (devicep
, image
->version
,
1162 image
->host_table
, image
->target_data
,
1166 devicep
->is_initialized
= true;
1169 attribute_hidden
void
1170 gomp_unload_device (struct gomp_device_descr
*devicep
)
1172 if (devicep
->is_initialized
)
1176 /* Unload from device all images registered at the moment. */
1177 for (i
= 0; i
< num_offload_images
; i
++)
1179 struct offload_image_descr
*image
= &offload_images
[i
];
1180 if (image
->type
== devicep
->type
)
1181 gomp_unload_image_from_device (devicep
, image
->version
,
1183 image
->target_data
);
1188 /* Free address mapping tables. MM must be locked on entry, and remains locked
1191 attribute_hidden
void
1192 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1194 while (mem_map
->root
)
1196 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1198 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1204 /* This function de-initializes the target device, specified by DEVICEP.
1205 DEVICEP must be locked on entry, and remains locked on return. */
1207 attribute_hidden
void
1208 gomp_fini_device (struct gomp_device_descr
*devicep
)
1210 if (devicep
->is_initialized
)
1211 devicep
->fini_device_func (devicep
->target_id
);
1213 devicep
->is_initialized
= false;
1216 /* Host fallback for GOMP_target{,_41} routines. */
1219 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1221 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1223 memset (thr
, '\0', sizeof (*thr
));
1224 if (gomp_places_list
)
1226 thr
->place
= old_thr
.place
;
1227 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1230 gomp_free_thread (thr
);
1234 /* Helper function of GOMP_target{,_41} routines. */
1237 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1238 void (*host_fn
) (void *))
1240 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1241 return (void *) host_fn
;
1244 gomp_mutex_lock (&devicep
->lock
);
1245 struct splay_tree_key_s k
;
1246 k
.host_start
= (uintptr_t) host_fn
;
1247 k
.host_end
= k
.host_start
+ 1;
1248 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1249 gomp_mutex_unlock (&devicep
->lock
);
1251 gomp_fatal ("Target function wasn't mapped");
1253 return (void *) tgt_fn
->tgt_offset
;
1257 /* Called when encountering a target directive. If DEVICE
1258 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1259 GOMP_DEVICE_HOST_FALLBACK (or any value
1260 larger than last available hw device), use host fallback.
1261 FN is address of host code, UNUSED is part of the current ABI, but
1262 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1263 with MAPNUM entries, with addresses of the host objects,
1264 sizes of the host objects (resp. for pointer kind pointer bias
1265 and assumed sizeof (void *) size) and kinds. */
1268 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1269 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1270 unsigned char *kinds
)
1272 struct gomp_device_descr
*devicep
= resolve_device (device
);
1275 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1276 return gomp_target_fallback (fn
, hostaddrs
);
1278 void *fn_addr
= gomp_get_target_fn_addr (devicep
, fn
);
1280 struct target_mem_desc
*tgt_vars
1281 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1282 GOMP_MAP_VARS_TARGET
);
1283 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1285 memset (thr
, '\0', sizeof (*thr
));
1286 if (gomp_places_list
)
1288 thr
->place
= old_thr
.place
;
1289 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1291 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1292 gomp_free_thread (thr
);
1294 gomp_unmap_vars (tgt_vars
, true);
1298 GOMP_target_41 (int device
, void (*fn
) (void *), size_t mapnum
,
1299 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1300 unsigned int flags
, void **depend
)
1302 struct gomp_device_descr
*devicep
= resolve_device (device
);
1304 /* If there are depend clauses, but nowait is not present,
1305 block the parent task until the dependencies are resolved
1306 and then just continue with the rest of the function as if it
1307 is a merged task. */
1310 struct gomp_thread
*thr
= gomp_thread ();
1311 if (thr
->task
&& thr
->task
->depend_hash
)
1312 gomp_task_maybe_wait_for_dependencies (depend
);
1316 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1318 size_t i
, tgt_align
= 0, tgt_size
= 0;
1320 for (i
= 0; i
< mapnum
; i
++)
1321 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1323 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1324 if (tgt_align
< align
)
1326 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1327 tgt_size
+= sizes
[i
];
1331 tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1332 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1334 tgt
+= tgt_align
- al
;
1336 for (i
= 0; i
< mapnum
; i
++)
1337 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1339 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1340 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1341 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1342 hostaddrs
[i
] = tgt
+ tgt_size
;
1343 tgt_size
= tgt_size
+ sizes
[i
];
1346 gomp_target_fallback (fn
, hostaddrs
);
1350 void *fn_addr
= gomp_get_target_fn_addr (devicep
, fn
);
1352 struct target_mem_desc
*tgt_vars
1353 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1354 GOMP_MAP_VARS_TARGET
);
1355 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1357 memset (thr
, '\0', sizeof (*thr
));
1358 if (gomp_places_list
)
1360 thr
->place
= old_thr
.place
;
1361 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1363 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1364 gomp_free_thread (thr
);
1366 gomp_unmap_vars (tgt_vars
, true);
1369 /* Host fallback for GOMP_target_data{,_41} routines. */
1372 gomp_target_data_fallback (void)
1374 struct gomp_task_icv
*icv
= gomp_icv (false);
1375 if (icv
->target_data
)
1377 /* Even when doing a host fallback, if there are any active
1378 #pragma omp target data constructs, need to remember the
1379 new #pragma omp target data, otherwise GOMP_target_end_data
1380 would get out of sync. */
1381 struct target_mem_desc
*tgt
1382 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1383 GOMP_MAP_VARS_DATA
);
1384 tgt
->prev
= icv
->target_data
;
1385 icv
->target_data
= tgt
;
1390 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1391 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1393 struct gomp_device_descr
*devicep
= resolve_device (device
);
1396 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1397 return gomp_target_data_fallback ();
1399 struct target_mem_desc
*tgt
1400 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1401 GOMP_MAP_VARS_DATA
);
1402 struct gomp_task_icv
*icv
= gomp_icv (true);
1403 tgt
->prev
= icv
->target_data
;
1404 icv
->target_data
= tgt
;
1408 GOMP_target_data_41 (int device
, size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1409 unsigned short *kinds
)
1411 struct gomp_device_descr
*devicep
= resolve_device (device
);
1414 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1415 return gomp_target_data_fallback ();
1417 struct target_mem_desc
*tgt
1418 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1419 GOMP_MAP_VARS_DATA
);
1420 struct gomp_task_icv
*icv
= gomp_icv (true);
1421 tgt
->prev
= icv
->target_data
;
1422 icv
->target_data
= tgt
;
1426 GOMP_target_end_data (void)
1428 struct gomp_task_icv
*icv
= gomp_icv (false);
1429 if (icv
->target_data
)
1431 struct target_mem_desc
*tgt
= icv
->target_data
;
1432 icv
->target_data
= tgt
->prev
;
1433 gomp_unmap_vars (tgt
, true);
1438 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1439 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1441 struct gomp_device_descr
*devicep
= resolve_device (device
);
1444 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1447 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1451 GOMP_target_update_41 (int device
, size_t mapnum
, void **hostaddrs
,
1452 size_t *sizes
, unsigned short *kinds
,
1453 unsigned int flags
, void **depend
)
1455 struct gomp_device_descr
*devicep
= resolve_device (device
);
1457 /* If there are depend clauses, but nowait is not present,
1458 block the parent task until the dependencies are resolved
1459 and then just continue with the rest of the function as if it
1460 is a merged task. Until we are able to schedule task during
1461 variable mapping or unmapping, ignore nowait if depend clauses
1465 struct gomp_thread
*thr
= gomp_thread ();
1466 if (thr
->task
&& thr
->task
->depend_hash
)
1468 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1470 && !thr
->task
->final_task
)
1472 gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1473 mapnum
, hostaddrs
, sizes
, kinds
,
1474 flags
| GOMP_TARGET_FLAG_UPDATE
,
1479 struct gomp_team
*team
= thr
->ts
.team
;
1480 /* If parallel or taskgroup has been cancelled, don't start new
1483 && (gomp_team_barrier_cancelled (&team
->barrier
)
1484 || (thr
->task
->taskgroup
1485 && thr
->task
->taskgroup
->cancelled
)))
1488 gomp_task_maybe_wait_for_dependencies (depend
);
1493 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1496 struct gomp_thread
*thr
= gomp_thread ();
1497 struct gomp_team
*team
= thr
->ts
.team
;
1498 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1500 && (gomp_team_barrier_cancelled (&team
->barrier
)
1501 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1504 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
1508 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
1509 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1511 const int typemask
= 0xff;
1513 gomp_mutex_lock (&devicep
->lock
);
1514 for (i
= 0; i
< mapnum
; i
++)
1516 struct splay_tree_key_s cur_node
;
1517 unsigned char kind
= kinds
[i
] & typemask
;
1521 case GOMP_MAP_ALWAYS_FROM
:
1522 case GOMP_MAP_DELETE
:
1523 case GOMP_MAP_RELEASE
:
1524 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1525 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
1526 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1527 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1528 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1529 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1530 ? gomp_map_lookup (&devicep
->mem_map
, &cur_node
)
1531 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1535 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
1537 if ((kind
== GOMP_MAP_DELETE
1538 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
1539 && k
->refcount
!= REFCOUNT_INFINITY
)
1542 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
1543 || kind
== GOMP_MAP_ALWAYS_FROM
)
1544 devicep
->dev2host_func (devicep
->target_id
,
1545 (void *) cur_node
.host_start
,
1546 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1547 + cur_node
.host_start
1549 cur_node
.host_end
- cur_node
.host_start
);
1550 if (k
->refcount
== 0)
1552 splay_tree_remove (&devicep
->mem_map
, k
);
1553 if (k
->tgt
->refcount
> 1)
1556 gomp_unmap_tgt (k
->tgt
);
1561 gomp_mutex_unlock (&devicep
->lock
);
1562 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1567 gomp_mutex_unlock (&devicep
->lock
);
1571 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
1572 size_t *sizes
, unsigned short *kinds
,
1573 unsigned int flags
, void **depend
)
1575 struct gomp_device_descr
*devicep
= resolve_device (device
);
1577 /* If there are depend clauses, but nowait is not present,
1578 block the parent task until the dependencies are resolved
1579 and then just continue with the rest of the function as if it
1580 is a merged task. Until we are able to schedule task during
1581 variable mapping or unmapping, ignore nowait if depend clauses
1585 struct gomp_thread
*thr
= gomp_thread ();
1586 if (thr
->task
&& thr
->task
->depend_hash
)
1588 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1590 && !thr
->task
->final_task
)
1592 gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1593 mapnum
, hostaddrs
, sizes
, kinds
,
1598 struct gomp_team
*team
= thr
->ts
.team
;
1599 /* If parallel or taskgroup has been cancelled, don't start new
1602 && (gomp_team_barrier_cancelled (&team
->barrier
)
1603 || (thr
->task
->taskgroup
1604 && thr
->task
->taskgroup
->cancelled
)))
1607 gomp_task_maybe_wait_for_dependencies (depend
);
1612 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1615 struct gomp_thread
*thr
= gomp_thread ();
1616 struct gomp_team
*team
= thr
->ts
.team
;
1617 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1619 && (gomp_team_barrier_cancelled (&team
->barrier
)
1620 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1624 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1625 for (i
= 0; i
< mapnum
; i
++)
1626 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1628 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
1629 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1633 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
1634 true, GOMP_MAP_VARS_ENTER_DATA
);
1636 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
1640 gomp_target_task_fn (void *data
)
1642 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
1643 if (ttask
->fn
!= NULL
)
1645 /* GOMP_target_41 */
1647 else if (ttask
->devicep
== NULL
1648 || !(ttask
->devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1652 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
1653 gomp_update (ttask
->devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1654 ttask
->kinds
, true);
1655 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1656 for (i
= 0; i
< ttask
->mapnum
; i
++)
1657 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1659 gomp_map_vars (ttask
->devicep
, ttask
->sizes
[i
] + 1,
1660 &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
1661 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1662 i
+= ttask
->sizes
[i
];
1665 gomp_map_vars (ttask
->devicep
, 1, &ttask
->hostaddrs
[i
], NULL
,
1666 &ttask
->sizes
[i
], &ttask
->kinds
[i
],
1667 true, GOMP_MAP_VARS_ENTER_DATA
);
1669 gomp_exit_data (ttask
->devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
1670 ttask
->sizes
, ttask
->kinds
);
1674 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1678 struct gomp_task_icv
*icv
= gomp_icv (true);
1679 icv
->thread_limit_var
1680 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1686 omp_target_alloc (size_t size
, int device_num
)
1688 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1689 return malloc (size
);
1694 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1695 if (devicep
== NULL
)
1698 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1699 return malloc (size
);
1701 gomp_mutex_lock (&devicep
->lock
);
1702 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
1703 gomp_mutex_unlock (&devicep
->lock
);
1708 omp_target_free (void *device_ptr
, int device_num
)
1710 if (device_ptr
== NULL
)
1713 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1722 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1723 if (devicep
== NULL
)
1726 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1732 gomp_mutex_lock (&devicep
->lock
);
1733 devicep
->free_func (devicep
->target_id
, device_ptr
);
1734 gomp_mutex_unlock (&devicep
->lock
);
1738 omp_target_is_present (void *ptr
, int device_num
)
1743 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1749 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1750 if (devicep
== NULL
)
1753 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1756 gomp_mutex_lock (&devicep
->lock
);
1757 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
1758 struct splay_tree_key_s cur_node
;
1760 cur_node
.host_start
= (uintptr_t) ptr
;
1761 cur_node
.host_end
= cur_node
.host_start
;
1762 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1763 int ret
= n
!= NULL
;
1764 gomp_mutex_unlock (&devicep
->lock
);
1769 omp_target_memcpy (void *dst
, void *src
, size_t length
, size_t dst_offset
,
1770 size_t src_offset
, int dst_device_num
, int src_device_num
)
1772 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
1774 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1776 if (dst_device_num
< 0)
1779 dst_devicep
= resolve_device (dst_device_num
);
1780 if (dst_devicep
== NULL
)
1783 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1786 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1788 if (src_device_num
< 0)
1791 src_devicep
= resolve_device (src_device_num
);
1792 if (src_devicep
== NULL
)
1795 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1798 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
1800 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
1803 if (src_devicep
== NULL
)
1805 gomp_mutex_lock (&dst_devicep
->lock
);
1806 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
1807 (char *) dst
+ dst_offset
,
1808 (char *) src
+ src_offset
, length
);
1809 gomp_mutex_unlock (&dst_devicep
->lock
);
1812 if (dst_devicep
== NULL
)
1814 gomp_mutex_lock (&src_devicep
->lock
);
1815 src_devicep
->dev2host_func (src_devicep
->target_id
,
1816 (char *) dst
+ dst_offset
,
1817 (char *) src
+ src_offset
, length
);
1818 gomp_mutex_unlock (&src_devicep
->lock
);
1821 if (src_devicep
== dst_devicep
)
1823 gomp_mutex_lock (&src_devicep
->lock
);
1824 src_devicep
->dev2dev_func (src_devicep
->target_id
,
1825 (char *) dst
+ dst_offset
,
1826 (char *) src
+ src_offset
, length
);
1827 gomp_mutex_unlock (&src_devicep
->lock
);
1834 omp_target_memcpy_rect_worker (void *dst
, void *src
, size_t element_size
,
1835 int num_dims
, const size_t *volume
,
1836 const size_t *dst_offsets
,
1837 const size_t *src_offsets
,
1838 const size_t *dst_dimensions
,
1839 const size_t *src_dimensions
,
1840 struct gomp_device_descr
*dst_devicep
,
1841 struct gomp_device_descr
*src_devicep
)
1843 size_t dst_slice
= element_size
;
1844 size_t src_slice
= element_size
;
1845 size_t j
, dst_off
, src_off
, length
;
1850 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
1851 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
1852 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
1854 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
1855 memcpy ((char *) dst
+ dst_off
, (char *) src
+ src_off
, length
);
1856 else if (src_devicep
== NULL
)
1857 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
1858 (char *) dst
+ dst_off
,
1859 (char *) src
+ src_off
, length
);
1860 else if (dst_devicep
== NULL
)
1861 src_devicep
->dev2host_func (src_devicep
->target_id
,
1862 (char *) dst
+ dst_off
,
1863 (char *) src
+ src_off
, length
);
1864 else if (src_devicep
== dst_devicep
)
1865 src_devicep
->dev2dev_func (src_devicep
->target_id
,
1866 (char *) dst
+ dst_off
,
1867 (char *) src
+ src_off
, length
);
1873 /* FIXME: it would be nice to have some plugin function to handle
1874 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
1875 be handled in the generic recursion below, and for host-host it
1876 should be used even for any num_dims >= 2. */
1878 for (i
= 1; i
< num_dims
; i
++)
1879 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
1880 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
1882 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
1883 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
1885 for (j
= 0; j
< volume
[0]; j
++)
1887 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
1888 (char *) src
+ src_off
,
1889 element_size
, num_dims
- 1,
1890 volume
+ 1, dst_offsets
+ 1,
1891 src_offsets
+ 1, dst_dimensions
+ 1,
1892 src_dimensions
+ 1, dst_devicep
,
1896 dst_off
+= dst_slice
;
1897 src_off
+= src_slice
;
1903 omp_target_memcpy_rect (void *dst
, void *src
, size_t element_size
,
1904 int num_dims
, const size_t *volume
,
1905 const size_t *dst_offsets
,
1906 const size_t *src_offsets
,
1907 const size_t *dst_dimensions
,
1908 const size_t *src_dimensions
,
1909 int dst_device_num
, int src_device_num
)
1911 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
1916 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1918 if (dst_device_num
< 0)
1921 dst_devicep
= resolve_device (dst_device_num
);
1922 if (dst_devicep
== NULL
)
1925 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1928 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1930 if (src_device_num
< 0)
1933 src_devicep
= resolve_device (src_device_num
);
1934 if (src_devicep
== NULL
)
1937 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1941 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
1945 gomp_mutex_lock (&src_devicep
->lock
);
1946 else if (dst_devicep
)
1947 gomp_mutex_lock (&dst_devicep
->lock
);
1948 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
1949 volume
, dst_offsets
, src_offsets
,
1950 dst_dimensions
, src_dimensions
,
1951 dst_devicep
, src_devicep
);
1953 gomp_mutex_unlock (&src_devicep
->lock
);
1954 else if (dst_devicep
)
1955 gomp_mutex_unlock (&dst_devicep
->lock
);
1960 omp_target_associate_ptr (void *host_ptr
, void *device_ptr
, size_t size
,
1961 size_t device_offset
, int device_num
)
1963 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1969 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1970 if (devicep
== NULL
)
1973 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1976 gomp_mutex_lock (&devicep
->lock
);
1978 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
1979 struct splay_tree_key_s cur_node
;
1982 cur_node
.host_start
= (uintptr_t) host_ptr
;
1983 cur_node
.host_end
= cur_node
.host_start
+ size
;
1984 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1987 if (n
->tgt
->tgt_start
+ n
->tgt_offset
1988 == (uintptr_t) device_ptr
+ device_offset
1989 && n
->host_start
<= cur_node
.host_start
1990 && n
->host_end
>= cur_node
.host_end
)
1995 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1996 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2000 tgt
->to_free
= NULL
;
2002 tgt
->list_count
= 0;
2003 tgt
->device_descr
= devicep
;
2004 splay_tree_node array
= tgt
->array
;
2005 splay_tree_key k
= &array
->key
;
2006 k
->host_start
= cur_node
.host_start
;
2007 k
->host_end
= cur_node
.host_end
;
2009 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2010 k
->refcount
= REFCOUNT_INFINITY
;
2011 k
->async_refcount
= 0;
2013 array
->right
= NULL
;
2014 splay_tree_insert (&devicep
->mem_map
, array
);
2017 gomp_mutex_unlock (&devicep
->lock
);
2022 omp_target_disassociate_ptr (void *ptr
, int device_num
)
2024 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2030 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2031 if (devicep
== NULL
)
2034 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2037 gomp_mutex_lock (&devicep
->lock
);
2039 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2040 struct splay_tree_key_s cur_node
;
2043 cur_node
.host_start
= (uintptr_t) ptr
;
2044 cur_node
.host_end
= cur_node
.host_start
;
2045 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2047 && n
->host_start
== cur_node
.host_start
2048 && n
->refcount
== REFCOUNT_INFINITY
2049 && n
->tgt
->tgt_start
== 0
2050 && n
->tgt
->to_free
== NULL
2051 && n
->tgt
->refcount
== 1
2052 && n
->tgt
->list_count
== 0)
2054 splay_tree_remove (&devicep
->mem_map
, n
);
2055 gomp_unmap_tgt (n
->tgt
);
2059 gomp_mutex_unlock (&devicep
->lock
);
2063 #ifdef PLUGIN_SUPPORT
2065 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2067 The handles of the found functions are stored in the corresponding fields
2068 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2071 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2072 const char *plugin_name
)
2074 const char *err
= NULL
, *last_missing
= NULL
;
2076 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2080 /* Check if all required functions are available in the plugin and store
2081 their handlers. None of the symbols can legitimately be NULL,
2082 so we don't need to check dlerror all the time. */
2084 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2086 /* Similar, but missing functions are not an error. Return false if
2087 failed, true otherwise. */
2088 #define DLSYM_OPT(f, n) \
2089 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2090 || (last_missing = #n, 0))
2093 if (device
->version_func () != GOMP_VERSION
)
2095 err
= "plugin version mismatch";
2102 DLSYM (get_num_devices
);
2103 DLSYM (init_device
);
2104 DLSYM (fini_device
);
2106 DLSYM (unload_image
);
2111 device
->capabilities
= device
->get_caps_func ();
2112 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2117 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2119 if (!DLSYM_OPT (openacc
.exec
, openacc_parallel
)
2120 || !DLSYM_OPT (openacc
.register_async_cleanup
,
2121 openacc_register_async_cleanup
)
2122 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
2123 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
2124 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
2125 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
2126 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
2127 || !DLSYM_OPT (openacc
.async_wait_all_async
,
2128 openacc_async_wait_all_async
)
2129 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
2130 || !DLSYM_OPT (openacc
.create_thread_data
,
2131 openacc_create_thread_data
)
2132 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2133 openacc_destroy_thread_data
))
2135 /* Require all the OpenACC handlers if we have
2136 GOMP_OFFLOAD_CAP_OPENACC_200. */
2137 err
= "plugin missing OpenACC handler function";
2142 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2143 openacc_get_current_cuda_device
);
2144 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2145 openacc_get_current_cuda_context
);
2146 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
2147 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
2148 if (cuda
&& cuda
!= 4)
2150 /* Make sure all the CUDA functions are there if any of them are. */
2151 err
= "plugin missing OpenACC CUDA handler function";
2163 gomp_error ("while loading %s: %s", plugin_name
, err
);
2165 gomp_error ("missing function was %s", last_missing
);
2167 dlclose (plugin_handle
);
2172 /* This function initializes the runtime needed for offloading.
2173 It parses the list of offload targets and tries to load the plugins for
2174 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2175 will be set, and the array DEVICES initialized, containing descriptors for
2176 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2180 gomp_target_init (void)
2182 const char *prefix
="libgomp-plugin-";
2183 const char *suffix
= SONAME_SUFFIX (1);
2184 const char *cur
, *next
;
2186 int i
, new_num_devices
;
2191 cur
= OFFLOAD_TARGETS
;
2195 struct gomp_device_descr current_device
;
2197 next
= strchr (cur
, ',');
2199 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
2200 + strlen (prefix
) + strlen (suffix
));
2207 strcpy (plugin_name
, prefix
);
2208 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
2209 strcat (plugin_name
, suffix
);
2211 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2213 new_num_devices
= current_device
.get_num_devices_func ();
2214 if (new_num_devices
>= 1)
2216 /* Augment DEVICES and NUM_DEVICES. */
2218 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2219 * sizeof (struct gomp_device_descr
));
2227 current_device
.name
= current_device
.get_name_func ();
2228 /* current_device.capabilities has already been set. */
2229 current_device
.type
= current_device
.get_type_func ();
2230 current_device
.mem_map
.root
= NULL
;
2231 current_device
.is_initialized
= false;
2232 current_device
.openacc
.data_environ
= NULL
;
2233 for (i
= 0; i
< new_num_devices
; i
++)
2235 current_device
.target_id
= i
;
2236 devices
[num_devices
] = current_device
;
2237 gomp_mutex_init (&devices
[num_devices
].lock
);
2248 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2249 NUM_DEVICES_OPENMP. */
2250 struct gomp_device_descr
*devices_s
2251 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2258 num_devices_openmp
= 0;
2259 for (i
= 0; i
< num_devices
; i
++)
2260 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2261 devices_s
[num_devices_openmp
++] = devices
[i
];
2262 int num_devices_after_openmp
= num_devices_openmp
;
2263 for (i
= 0; i
< num_devices
; i
++)
2264 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2265 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2267 devices
= devices_s
;
2269 for (i
= 0; i
< num_devices
; i
++)
2271 /* The 'devices' array can be moved (by the realloc call) until we have
2272 found all the plugins, so registering with the OpenACC runtime (which
2273 takes a copy of the pointer argument) must be delayed until now. */
2274 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2275 goacc_register (&devices
[i
]);
2279 #else /* PLUGIN_SUPPORT */
2280 /* If dlfcn.h is unavailable we always fallback to host execution.
2281 GOMP_target* routines are just stubs for this case. */
2283 gomp_target_init (void)
2286 #endif /* PLUGIN_SUPPORT */