1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock
;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr
{
61 enum offload_target_type type
;
62 const void *host_table
;
63 const void *target_data
;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr
*offload_images
;
69 /* Total number of offload images. */
70 static int num_offload_images
;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr
*devices
;
75 /* Total number of available devices. */
76 static int num_devices
;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp
;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old
, size_t size
)
86 void *ret
= realloc (old
, size
);
89 gomp_mutex_unlock (®ister_lock
);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp
;
108 static struct gomp_device_descr
*
109 resolve_device (int device_id
)
111 if (device_id
== GOMP_DEVICE_ICV
)
113 struct gomp_task_icv
*icv
= gomp_icv (false);
114 device_id
= icv
->default_device_var
;
117 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 gomp_mutex_lock (&devices
[device_id
].lock
);
121 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
122 gomp_init_device (&devices
[device_id
]);
123 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
125 gomp_mutex_unlock (&devices
[device_id
].lock
);
128 gomp_mutex_unlock (&devices
[device_id
].lock
);
130 return &devices
[device_id
];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
137 if (key
->host_start
!= key
->host_end
)
138 return splay_tree_lookup (mem_map
, key
);
141 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
146 n
= splay_tree_lookup (mem_map
, key
);
150 return splay_tree_lookup (mem_map
, key
);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
156 if (key
->host_start
!= key
->host_end
)
157 return splay_tree_lookup (mem_map
, key
);
160 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
165 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
166 gomp_map_0len_lookup found oldn for newn.
167 Helper function of gomp_map_vars. */
170 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
171 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
175 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
176 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
177 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
178 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
180 if ((kind
& GOMP_MAP_FLAG_FORCE
)
181 || oldn
->host_start
> newn
->host_start
182 || oldn
->host_end
< newn
->host_end
)
184 gomp_mutex_unlock (&devicep
->lock
);
185 gomp_fatal ("Trying to map into device [%p..%p) object when "
186 "[%p..%p) is already mapped",
187 (void *) newn
->host_start
, (void *) newn
->host_end
,
188 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
191 if (GOMP_MAP_ALWAYS_TO_P (kind
))
192 devicep
->host2dev_func (devicep
->target_id
,
193 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
194 + newn
->host_start
- oldn
->host_start
),
195 (void *) newn
->host_start
,
196 newn
->host_end
- newn
->host_start
);
197 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
202 get_kind (bool short_mapkind
, void *kinds
, int idx
)
204 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
205 : ((unsigned char *) kinds
)[idx
];
209 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
210 uintptr_t target_offset
, uintptr_t bias
)
212 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
213 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
214 struct splay_tree_key_s cur_node
;
216 cur_node
.host_start
= host_ptr
;
217 if (cur_node
.host_start
== (uintptr_t) NULL
)
219 cur_node
.tgt_offset
= (uintptr_t) NULL
;
220 /* FIXME: see comment about coalescing host/dev transfers below. */
221 devicep
->host2dev_func (devicep
->target_id
,
222 (void *) (tgt
->tgt_start
+ target_offset
),
223 (void *) &cur_node
.tgt_offset
,
227 /* Add bias to the pointer value. */
228 cur_node
.host_start
+= bias
;
229 cur_node
.host_end
= cur_node
.host_start
;
230 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
233 gomp_mutex_unlock (&devicep
->lock
);
234 gomp_fatal ("Pointer target of array section wasn't mapped");
236 cur_node
.host_start
-= n
->host_start
;
238 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
239 /* At this point tgt_offset is target address of the
240 array section. Now subtract bias to get what we want
241 to initialize the pointer with. */
242 cur_node
.tgt_offset
-= bias
;
243 /* FIXME: see comment about coalescing host/dev transfers below. */
244 devicep
->host2dev_func (devicep
->target_id
,
245 (void *) (tgt
->tgt_start
+ target_offset
),
246 (void *) &cur_node
.tgt_offset
,
251 gomp_map_fields_existing (struct target_mem_desc
*tgt
, splay_tree_key n
,
252 size_t first
, size_t i
, void **hostaddrs
,
253 size_t *sizes
, void *kinds
)
255 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
256 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
257 struct splay_tree_key_s cur_node
;
259 const bool short_mapkind
= true;
260 const int typemask
= short_mapkind
? 0xff : 0x7;
262 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
263 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
264 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
265 kind
= get_kind (short_mapkind
, kinds
, i
);
268 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
270 gomp_map_vars_existing (devicep
, n2
, &cur_node
,
271 &tgt
->list
[i
], kind
& typemask
);
276 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
278 cur_node
.host_start
--;
279 n2
= splay_tree_lookup (mem_map
, &cur_node
);
280 cur_node
.host_start
++;
283 && n2
->host_start
- n
->host_start
284 == n2
->tgt_offset
- n
->tgt_offset
)
286 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
292 n2
= splay_tree_lookup (mem_map
, &cur_node
);
296 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
298 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
303 gomp_mutex_unlock (&devicep
->lock
);
304 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
305 "other mapped elements from the same structure weren't mapped "
306 "together with it", (void *) cur_node
.host_start
,
307 (void *) cur_node
.host_end
);
310 static inline uintptr_t
311 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
313 if (tgt
->list
[i
].key
!= NULL
)
314 return tgt
->list
[i
].key
->tgt
->tgt_start
315 + tgt
->list
[i
].key
->tgt_offset
316 + tgt
->list
[i
].offset
;
317 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
318 return (uintptr_t) hostaddrs
[i
];
319 if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
321 if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
322 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
323 + tgt
->list
[i
+ 1].key
->tgt_offset
324 + tgt
->list
[i
+ 1].offset
325 + (uintptr_t) hostaddrs
[i
]
326 - (uintptr_t) hostaddrs
[i
+ 1];
327 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
330 attribute_hidden
struct target_mem_desc
*
331 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
332 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
333 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
335 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
336 bool has_firstprivate
= false;
337 const int rshift
= short_mapkind
? 8 : 3;
338 const int typemask
= short_mapkind
? 0xff : 0x7;
339 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
340 struct splay_tree_key_s cur_node
;
341 struct target_mem_desc
*tgt
342 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
343 tgt
->list_count
= mapnum
;
344 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
345 tgt
->device_descr
= devicep
;
354 tgt_align
= sizeof (void *);
356 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
358 size_t align
= 4 * sizeof (void *);
360 tgt_size
= mapnum
* sizeof (void *);
363 gomp_mutex_lock (&devicep
->lock
);
364 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
366 gomp_mutex_unlock (&devicep
->lock
);
371 for (i
= 0; i
< mapnum
; i
++)
373 int kind
= get_kind (short_mapkind
, kinds
, i
);
374 if (hostaddrs
[i
] == NULL
375 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
377 tgt
->list
[i
].key
= NULL
;
378 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
381 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
383 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
384 cur_node
.host_end
= cur_node
.host_start
;
385 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
388 gomp_mutex_unlock (&devicep
->lock
);
389 gomp_fatal ("use_device_ptr pointer wasn't mapped");
391 cur_node
.host_start
-= n
->host_start
;
393 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
394 + cur_node
.host_start
);
395 tgt
->list
[i
].key
= NULL
;
396 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
399 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
401 size_t first
= i
+ 1;
402 size_t last
= i
+ sizes
[i
];
403 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
404 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
406 tgt
->list
[i
].key
= NULL
;
407 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
408 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
411 size_t align
= (size_t) 1 << (kind
>> rshift
);
412 if (tgt_align
< align
)
414 tgt_size
-= (uintptr_t) hostaddrs
[first
]
415 - (uintptr_t) hostaddrs
[i
];
416 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
417 tgt_size
+= cur_node
.host_end
- (uintptr_t) hostaddrs
[i
];
418 not_found_cnt
+= last
- i
;
419 for (i
= first
; i
<= last
; i
++)
420 tgt
->list
[i
].key
= NULL
;
424 for (i
= first
; i
<= last
; i
++)
425 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
430 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
432 tgt
->list
[i
].key
= NULL
;
433 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
434 has_firstprivate
= true;
437 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
438 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
439 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
441 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
442 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
444 tgt
->list
[i
].key
= NULL
;
446 size_t align
= (size_t) 1 << (kind
>> rshift
);
447 if (tgt_align
< align
)
449 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
450 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
451 has_firstprivate
= true;
455 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
457 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
460 tgt
->list
[i
].key
= NULL
;
461 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
466 n
= splay_tree_lookup (mem_map
, &cur_node
);
467 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
468 gomp_map_vars_existing (devicep
, n
, &cur_node
, &tgt
->list
[i
],
472 tgt
->list
[i
].key
= NULL
;
474 size_t align
= (size_t) 1 << (kind
>> rshift
);
476 if (tgt_align
< align
)
478 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
479 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
480 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
483 for (j
= i
+ 1; j
< mapnum
; j
++)
484 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
487 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
488 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
489 > cur_node
.host_end
))
493 tgt
->list
[j
].key
= NULL
;
504 gomp_mutex_unlock (&devicep
->lock
);
505 gomp_fatal ("unexpected aggregation");
507 tgt
->to_free
= devaddrs
[0];
508 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
509 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
511 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
513 /* Allocate tgt_align aligned tgt_size block of memory. */
514 /* FIXME: Perhaps change interface to allocate properly aligned
516 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
517 tgt_size
+ tgt_align
- 1);
518 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
519 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
520 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
530 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
531 tgt_size
= mapnum
* sizeof (void *);
534 if (not_found_cnt
|| has_firstprivate
)
537 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
538 splay_tree_node array
= tgt
->array
;
539 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
540 uintptr_t field_tgt_base
= 0;
542 for (i
= 0; i
< mapnum
; i
++)
543 if (tgt
->list
[i
].key
== NULL
)
545 int kind
= get_kind (short_mapkind
, kinds
, i
);
546 if (hostaddrs
[i
] == NULL
)
548 switch (kind
& typemask
)
550 size_t align
, len
, first
, last
;
552 case GOMP_MAP_FIRSTPRIVATE
:
553 align
= (size_t) 1 << (kind
>> rshift
);
554 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
555 tgt
->list
[i
].offset
= tgt_size
;
557 devicep
->host2dev_func (devicep
->target_id
,
558 (void *) (tgt
->tgt_start
+ tgt_size
),
559 (void *) hostaddrs
[i
], len
);
562 case GOMP_MAP_FIRSTPRIVATE_INT
:
563 case GOMP_MAP_USE_DEVICE_PTR
:
564 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
566 case GOMP_MAP_STRUCT
:
569 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
570 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
572 if (tgt
->list
[first
].key
!= NULL
)
574 n
= splay_tree_lookup (mem_map
, &cur_node
);
577 size_t align
= (size_t) 1 << (kind
>> rshift
);
578 tgt_size
-= (uintptr_t) hostaddrs
[first
]
579 - (uintptr_t) hostaddrs
[i
];
580 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
581 tgt_size
+= (uintptr_t) hostaddrs
[first
]
582 - (uintptr_t) hostaddrs
[i
];
583 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
584 field_tgt_offset
= tgt_size
;
585 field_tgt_clear
= last
;
586 tgt_size
+= cur_node
.host_end
587 - (uintptr_t) hostaddrs
[first
];
590 for (i
= first
; i
<= last
; i
++)
591 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
595 case GOMP_MAP_ALWAYS_POINTER
:
596 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
597 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
598 n
= splay_tree_lookup (mem_map
, &cur_node
);
600 || n
->host_start
> cur_node
.host_start
601 || n
->host_end
< cur_node
.host_end
)
603 gomp_mutex_unlock (&devicep
->lock
);
604 gomp_fatal ("always pointer not mapped");
606 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
607 != GOMP_MAP_ALWAYS_POINTER
)
608 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
609 if (cur_node
.tgt_offset
)
610 cur_node
.tgt_offset
-= sizes
[i
];
611 devicep
->host2dev_func (devicep
->target_id
,
612 (void *) (n
->tgt
->tgt_start
614 + cur_node
.host_start
616 (void *) &cur_node
.tgt_offset
,
618 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
619 + cur_node
.host_start
- n
->host_start
;
624 splay_tree_key k
= &array
->key
;
625 k
->host_start
= (uintptr_t) hostaddrs
[i
];
626 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
627 k
->host_end
= k
->host_start
+ sizes
[i
];
629 k
->host_end
= k
->host_start
+ sizeof (void *);
630 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
631 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
632 gomp_map_vars_existing (devicep
, n
, k
, &tgt
->list
[i
],
637 if (n
&& n
->refcount
== REFCOUNT_LINK
)
639 /* Replace target address of the pointer with target address
640 of mapped object in the splay tree. */
641 splay_tree_remove (mem_map
, n
);
644 size_t align
= (size_t) 1 << (kind
>> rshift
);
645 tgt
->list
[i
].key
= k
;
647 if (field_tgt_clear
!= ~(size_t) 0)
649 k
->tgt_offset
= k
->host_start
- field_tgt_base
651 if (i
== field_tgt_clear
)
652 field_tgt_clear
= ~(size_t) 0;
656 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
657 k
->tgt_offset
= tgt_size
;
658 tgt_size
+= k
->host_end
- k
->host_start
;
660 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
661 tgt
->list
[i
].always_copy_from
662 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
663 tgt
->list
[i
].offset
= 0;
664 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
666 k
->async_refcount
= 0;
670 splay_tree_insert (mem_map
, array
);
671 switch (kind
& typemask
)
675 case GOMP_MAP_FORCE_ALLOC
:
676 case GOMP_MAP_FORCE_FROM
:
677 case GOMP_MAP_ALWAYS_FROM
:
680 case GOMP_MAP_TOFROM
:
681 case GOMP_MAP_FORCE_TO
:
682 case GOMP_MAP_FORCE_TOFROM
:
683 case GOMP_MAP_ALWAYS_TO
:
684 case GOMP_MAP_ALWAYS_TOFROM
:
685 /* FIXME: Perhaps add some smarts, like if copying
686 several adjacent fields from host to target, use some
687 host buffer to avoid sending each var individually. */
688 devicep
->host2dev_func (devicep
->target_id
,
689 (void *) (tgt
->tgt_start
691 (void *) k
->host_start
,
692 k
->host_end
- k
->host_start
);
694 case GOMP_MAP_POINTER
:
695 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
696 k
->tgt_offset
, sizes
[i
]);
698 case GOMP_MAP_TO_PSET
:
699 /* FIXME: see above FIXME comment. */
700 devicep
->host2dev_func (devicep
->target_id
,
701 (void *) (tgt
->tgt_start
703 (void *) k
->host_start
,
704 k
->host_end
- k
->host_start
);
706 for (j
= i
+ 1; j
< mapnum
; j
++)
707 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
711 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
712 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
717 tgt
->list
[j
].key
= k
;
718 tgt
->list
[j
].copy_from
= false;
719 tgt
->list
[j
].always_copy_from
= false;
720 if (k
->refcount
!= REFCOUNT_INFINITY
)
722 gomp_map_pointer (tgt
,
723 (uintptr_t) *(void **) hostaddrs
[j
],
725 + ((uintptr_t) hostaddrs
[j
]
731 case GOMP_MAP_FORCE_PRESENT
:
733 /* We already looked up the memory region above and it
735 size_t size
= k
->host_end
- k
->host_start
;
736 gomp_mutex_unlock (&devicep
->lock
);
737 #ifdef HAVE_INTTYPES_H
738 gomp_fatal ("present clause: !acc_is_present (%p, "
739 "%"PRIu64
" (0x%"PRIx64
"))",
740 (void *) k
->host_start
,
741 (uint64_t) size
, (uint64_t) size
);
743 gomp_fatal ("present clause: !acc_is_present (%p, "
744 "%lu (0x%lx))", (void *) k
->host_start
,
745 (unsigned long) size
, (unsigned long) size
);
749 case GOMP_MAP_FORCE_DEVICEPTR
:
750 assert (k
->host_end
- k
->host_start
== sizeof (void *));
752 devicep
->host2dev_func (devicep
->target_id
,
753 (void *) (tgt
->tgt_start
755 (void *) k
->host_start
,
759 gomp_mutex_unlock (&devicep
->lock
);
760 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
766 /* Set link pointer on target to the device address of the
768 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
769 devicep
->host2dev_func (devicep
->target_id
,
770 (void *) n
->tgt_offset
,
771 &tgt_addr
, sizeof (void *));
778 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
780 for (i
= 0; i
< mapnum
; i
++)
782 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
783 /* FIXME: see above FIXME comment. */
784 devicep
->host2dev_func (devicep
->target_id
,
785 (void *) (tgt
->tgt_start
786 + i
* sizeof (void *)),
787 (void *) &cur_node
.tgt_offset
,
792 /* If the variable from "omp target enter data" map-list was already mapped,
793 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
795 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
801 gomp_mutex_unlock (&devicep
->lock
);
806 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
808 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
810 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
816 /* Decrease the refcount for a set of mapped variables, and queue asychronous
817 copies from the device back to the host after any work that has been issued.
818 Because the regions are still "live", increment an asynchronous reference
819 count to indicate that they should not be unmapped from host-side data
820 structures until the asynchronous copy has completed. */
822 attribute_hidden
void
823 gomp_copy_from_async (struct target_mem_desc
*tgt
)
825 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
828 gomp_mutex_lock (&devicep
->lock
);
830 for (i
= 0; i
< tgt
->list_count
; i
++)
831 if (tgt
->list
[i
].key
== NULL
)
833 else if (tgt
->list
[i
].key
->refcount
> 1)
835 tgt
->list
[i
].key
->refcount
--;
836 tgt
->list
[i
].key
->async_refcount
++;
840 splay_tree_key k
= tgt
->list
[i
].key
;
841 if (tgt
->list
[i
].copy_from
)
842 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
843 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
844 k
->host_end
- k
->host_start
);
847 gomp_mutex_unlock (&devicep
->lock
);
850 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
851 variables back from device to host: if it is false, it is assumed that this
852 has been done already, i.e. by gomp_copy_from_async above. */
854 attribute_hidden
void
855 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
857 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
859 if (tgt
->list_count
== 0)
865 gomp_mutex_lock (&devicep
->lock
);
866 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
868 gomp_mutex_unlock (&devicep
->lock
);
875 for (i
= 0; i
< tgt
->list_count
; i
++)
877 splay_tree_key k
= tgt
->list
[i
].key
;
881 bool do_unmap
= false;
882 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
884 else if (k
->refcount
== 1)
886 if (k
->async_refcount
> 0)
895 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
896 || tgt
->list
[i
].always_copy_from
)
897 devicep
->dev2host_func (devicep
->target_id
,
898 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
899 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
900 + tgt
->list
[i
].offset
),
901 tgt
->list
[i
].length
);
904 splay_tree_remove (&devicep
->mem_map
, k
);
906 splay_tree_insert (&devicep
->mem_map
,
907 (splay_tree_node
) k
->link_key
);
908 if (k
->tgt
->refcount
> 1)
911 gomp_unmap_tgt (k
->tgt
);
915 if (tgt
->refcount
> 1)
918 gomp_unmap_tgt (tgt
);
920 gomp_mutex_unlock (&devicep
->lock
);
924 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
925 size_t *sizes
, void *kinds
, bool short_mapkind
)
928 struct splay_tree_key_s cur_node
;
929 const int typemask
= short_mapkind
? 0xff : 0x7;
937 gomp_mutex_lock (&devicep
->lock
);
938 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
940 gomp_mutex_unlock (&devicep
->lock
);
944 for (i
= 0; i
< mapnum
; i
++)
947 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
948 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
949 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
952 int kind
= get_kind (short_mapkind
, kinds
, i
);
953 if (n
->host_start
> cur_node
.host_start
954 || n
->host_end
< cur_node
.host_end
)
956 gomp_mutex_unlock (&devicep
->lock
);
957 gomp_fatal ("Trying to update [%p..%p) object when "
958 "only [%p..%p) is mapped",
959 (void *) cur_node
.host_start
,
960 (void *) cur_node
.host_end
,
961 (void *) n
->host_start
,
962 (void *) n
->host_end
);
964 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
965 devicep
->host2dev_func (devicep
->target_id
,
966 (void *) (n
->tgt
->tgt_start
968 + cur_node
.host_start
970 (void *) cur_node
.host_start
,
971 cur_node
.host_end
- cur_node
.host_start
);
972 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
973 devicep
->dev2host_func (devicep
->target_id
,
974 (void *) cur_node
.host_start
,
975 (void *) (n
->tgt
->tgt_start
977 + cur_node
.host_start
979 cur_node
.host_end
- cur_node
.host_start
);
982 gomp_mutex_unlock (&devicep
->lock
);
985 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
986 And insert to splay tree the mapping between addresses from HOST_TABLE and
987 from loaded target image. We rely in the host and device compiler
988 emitting variable and functions in the same order. */
991 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
992 const void *host_table
, const void *target_data
,
993 bool is_register_lock
)
995 void **host_func_table
= ((void ***) host_table
)[0];
996 void **host_funcs_end
= ((void ***) host_table
)[1];
997 void **host_var_table
= ((void ***) host_table
)[2];
998 void **host_vars_end
= ((void ***) host_table
)[3];
1000 /* The func table contains only addresses, the var table contains addresses
1001 and corresponding sizes. */
1002 int num_funcs
= host_funcs_end
- host_func_table
;
1003 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1005 /* Load image to device and get target addresses for the image. */
1006 struct addr_pair
*target_table
= NULL
;
1007 int i
, num_target_entries
;
1010 = devicep
->load_image_func (devicep
->target_id
, version
,
1011 target_data
, &target_table
);
1013 if (num_target_entries
!= num_funcs
+ num_vars
)
1015 gomp_mutex_unlock (&devicep
->lock
);
1016 if (is_register_lock
)
1017 gomp_mutex_unlock (®ister_lock
);
1018 gomp_fatal ("Cannot map target functions or variables"
1019 " (expected %u, have %u)", num_funcs
+ num_vars
,
1020 num_target_entries
);
1023 /* Insert host-target address mapping into splay tree. */
1024 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1025 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1026 tgt
->refcount
= REFCOUNT_INFINITY
;
1029 tgt
->to_free
= NULL
;
1031 tgt
->list_count
= 0;
1032 tgt
->device_descr
= devicep
;
1033 splay_tree_node array
= tgt
->array
;
1035 for (i
= 0; i
< num_funcs
; i
++)
1037 splay_tree_key k
= &array
->key
;
1038 k
->host_start
= (uintptr_t) host_func_table
[i
];
1039 k
->host_end
= k
->host_start
+ 1;
1041 k
->tgt_offset
= target_table
[i
].start
;
1042 k
->refcount
= REFCOUNT_INFINITY
;
1043 k
->async_refcount
= 0;
1046 array
->right
= NULL
;
1047 splay_tree_insert (&devicep
->mem_map
, array
);
1051 /* Most significant bit of the size in host and target tables marks
1052 "omp declare target link" variables. */
1053 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1054 const uintptr_t size_mask
= ~link_bit
;
1056 for (i
= 0; i
< num_vars
; i
++)
1058 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1059 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1061 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1063 gomp_mutex_unlock (&devicep
->lock
);
1064 if (is_register_lock
)
1065 gomp_mutex_unlock (®ister_lock
);
1066 gomp_fatal ("Cannot map target variables (size mismatch)");
1069 splay_tree_key k
= &array
->key
;
1070 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1072 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1074 k
->tgt_offset
= target_var
->start
;
1075 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1076 k
->async_refcount
= 0;
1079 array
->right
= NULL
;
1080 splay_tree_insert (&devicep
->mem_map
, array
);
1084 free (target_table
);
1087 /* Unload the mappings described by target_data from device DEVICE_P.
1088 The device must be locked. */
1091 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1093 const void *host_table
, const void *target_data
)
1095 void **host_func_table
= ((void ***) host_table
)[0];
1096 void **host_funcs_end
= ((void ***) host_table
)[1];
1097 void **host_var_table
= ((void ***) host_table
)[2];
1098 void **host_vars_end
= ((void ***) host_table
)[3];
1100 /* The func table contains only addresses, the var table contains addresses
1101 and corresponding sizes. */
1102 int num_funcs
= host_funcs_end
- host_func_table
;
1103 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1105 struct splay_tree_key_s k
;
1106 splay_tree_key node
= NULL
;
1108 /* Find mapping at start of node array */
1109 if (num_funcs
|| num_vars
)
1111 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1112 : (uintptr_t) host_var_table
[0]);
1113 k
.host_end
= k
.host_start
+ 1;
1114 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1117 devicep
->unload_image_func (devicep
->target_id
, version
, target_data
);
1119 /* Remove mappings from splay tree. */
1121 for (i
= 0; i
< num_funcs
; i
++)
1123 k
.host_start
= (uintptr_t) host_func_table
[i
];
1124 k
.host_end
= k
.host_start
+ 1;
1125 splay_tree_remove (&devicep
->mem_map
, &k
);
1128 /* Most significant bit of the size in host and target tables marks
1129 "omp declare target link" variables. */
1130 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1131 const uintptr_t size_mask
= ~link_bit
;
1132 bool is_tgt_unmapped
= false;
1134 for (i
= 0; i
< num_vars
; i
++)
1136 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1138 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1140 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1141 splay_tree_remove (&devicep
->mem_map
, &k
);
1144 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1145 splay_tree_remove (&devicep
->mem_map
, n
);
1148 if (n
->tgt
->refcount
> 1)
1152 is_tgt_unmapped
= true;
1153 gomp_unmap_tgt (n
->tgt
);
1159 if (node
&& !is_tgt_unmapped
)
1166 /* This function should be called from every offload image while loading.
1167 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1168 the target, and TARGET_DATA needed by target plugin. */
1171 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1172 int target_type
, const void *target_data
)
1176 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1177 gomp_fatal ("Library too old for offload (version %u < %u)",
1178 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1180 gomp_mutex_lock (®ister_lock
);
1182 /* Load image to all initialized devices. */
1183 for (i
= 0; i
< num_devices
; i
++)
1185 struct gomp_device_descr
*devicep
= &devices
[i
];
1186 gomp_mutex_lock (&devicep
->lock
);
1187 if (devicep
->type
== target_type
1188 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1189 gomp_load_image_to_device (devicep
, version
,
1190 host_table
, target_data
, true);
1191 gomp_mutex_unlock (&devicep
->lock
);
1194 /* Insert image to array of pending images. */
1196 = gomp_realloc_unlock (offload_images
,
1197 (num_offload_images
+ 1)
1198 * sizeof (struct offload_image_descr
));
1199 offload_images
[num_offload_images
].version
= version
;
1200 offload_images
[num_offload_images
].type
= target_type
;
1201 offload_images
[num_offload_images
].host_table
= host_table
;
1202 offload_images
[num_offload_images
].target_data
= target_data
;
1204 num_offload_images
++;
1205 gomp_mutex_unlock (®ister_lock
);
1209 GOMP_offload_register (const void *host_table
, int target_type
,
1210 const void *target_data
)
1212 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1215 /* This function should be called from every offload image while unloading.
1216 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1217 the target, and TARGET_DATA needed by target plugin. */
1220 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1221 int target_type
, const void *target_data
)
1225 gomp_mutex_lock (®ister_lock
);
1227 /* Unload image from all initialized devices. */
1228 for (i
= 0; i
< num_devices
; i
++)
1230 struct gomp_device_descr
*devicep
= &devices
[i
];
1231 gomp_mutex_lock (&devicep
->lock
);
1232 if (devicep
->type
== target_type
1233 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1234 gomp_unload_image_from_device (devicep
, version
,
1235 host_table
, target_data
);
1236 gomp_mutex_unlock (&devicep
->lock
);
1239 /* Remove image from array of pending images. */
1240 for (i
= 0; i
< num_offload_images
; i
++)
1241 if (offload_images
[i
].target_data
== target_data
)
1243 offload_images
[i
] = offload_images
[--num_offload_images
];
1247 gomp_mutex_unlock (®ister_lock
);
1251 GOMP_offload_unregister (const void *host_table
, int target_type
,
1252 const void *target_data
)
1254 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1257 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1258 must be locked on entry, and remains locked on return. */
1260 attribute_hidden
void
1261 gomp_init_device (struct gomp_device_descr
*devicep
)
1264 devicep
->init_device_func (devicep
->target_id
);
1266 /* Load to device all images registered by the moment. */
1267 for (i
= 0; i
< num_offload_images
; i
++)
1269 struct offload_image_descr
*image
= &offload_images
[i
];
1270 if (image
->type
== devicep
->type
)
1271 gomp_load_image_to_device (devicep
, image
->version
,
1272 image
->host_table
, image
->target_data
,
1276 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1279 attribute_hidden
void
1280 gomp_unload_device (struct gomp_device_descr
*devicep
)
1282 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1286 /* Unload from device all images registered at the moment. */
1287 for (i
= 0; i
< num_offload_images
; i
++)
1289 struct offload_image_descr
*image
= &offload_images
[i
];
1290 if (image
->type
== devicep
->type
)
1291 gomp_unload_image_from_device (devicep
, image
->version
,
1293 image
->target_data
);
1298 /* Free address mapping tables. MM must be locked on entry, and remains locked
1301 attribute_hidden
void
1302 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1304 while (mem_map
->root
)
1306 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1308 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1314 /* Host fallback for GOMP_target{,_ext} routines. */
1317 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1319 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1321 memset (thr
, '\0', sizeof (*thr
));
1322 if (gomp_places_list
)
1324 thr
->place
= old_thr
.place
;
1325 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1328 gomp_free_thread (thr
);
1332 /* Calculate alignment and size requirements of a private copy of data shared
1333 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1336 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1337 unsigned short *kinds
, size_t *tgt_align
,
1341 for (i
= 0; i
< mapnum
; i
++)
1342 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1344 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1345 if (*tgt_align
< align
)
1347 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1348 *tgt_size
+= sizes
[i
];
1352 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1355 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1356 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1359 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1361 tgt
+= tgt_align
- al
;
1364 for (i
= 0; i
< mapnum
; i
++)
1365 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1367 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1368 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1369 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1370 hostaddrs
[i
] = tgt
+ tgt_size
;
1371 tgt_size
= tgt_size
+ sizes
[i
];
1375 /* Host fallback with firstprivate map-type handling. */
1378 gomp_target_fallback_firstprivate (void (*fn
) (void *), size_t mapnum
,
1379 void **hostaddrs
, size_t *sizes
,
1380 unsigned short *kinds
)
1382 size_t tgt_align
= 0, tgt_size
= 0;
1383 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
, &tgt_align
,
1387 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1388 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
, tgt_align
,
1391 gomp_target_fallback (fn
, hostaddrs
);
1394 /* Handle firstprivate map-type for shared memory devices and the host
1395 fallback. Return the pointer of firstprivate copies which has to be freed
1399 gomp_target_unshare_firstprivate (size_t mapnum
, void **hostaddrs
,
1400 size_t *sizes
, unsigned short *kinds
)
1402 size_t tgt_align
= 0, tgt_size
= 0;
1405 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
, &tgt_align
,
1409 tgt
= gomp_malloc (tgt_size
+ tgt_align
- 1);
1410 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
, tgt_align
,
1416 /* Helper function of GOMP_target{,_ext} routines. */
1419 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1420 void (*host_fn
) (void *))
1422 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1423 return (void *) host_fn
;
1426 gomp_mutex_lock (&devicep
->lock
);
1427 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1429 gomp_mutex_unlock (&devicep
->lock
);
1433 struct splay_tree_key_s k
;
1434 k
.host_start
= (uintptr_t) host_fn
;
1435 k
.host_end
= k
.host_start
+ 1;
1436 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1437 gomp_mutex_unlock (&devicep
->lock
);
1441 return (void *) tgt_fn
->tgt_offset
;
1445 /* Called when encountering a target directive. If DEVICE
1446 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1447 GOMP_DEVICE_HOST_FALLBACK (or any value
1448 larger than last available hw device), use host fallback.
1449 FN is address of host code, UNUSED is part of the current ABI, but
1450 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1451 with MAPNUM entries, with addresses of the host objects,
1452 sizes of the host objects (resp. for pointer kind pointer bias
1453 and assumed sizeof (void *) size) and kinds. */
1456 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1457 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1458 unsigned char *kinds
)
1460 struct gomp_device_descr
*devicep
= resolve_device (device
);
1464 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1465 /* All shared memory devices should use the GOMP_target_ext function. */
1466 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1467 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1468 return gomp_target_fallback (fn
, hostaddrs
);
1470 struct target_mem_desc
*tgt_vars
1471 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1472 GOMP_MAP_VARS_TARGET
);
1473 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1475 gomp_unmap_vars (tgt_vars
, true);
1478 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1479 and several arguments have been added:
1480 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1481 DEPEND is array of dependencies, see GOMP_task for details.
1483 ARGS is a pointer to an array consisting of a variable number of both
1484 device-independent and device-specific arguments, which can take one two
1485 elements where the first specifies for which device it is intended, the type
1486 and optionally also the value. If the value is not present in the first
1487 one, the whole second element the actual value. The last element of the
1488 array is a single NULL. Among the device independent can be for example
1489 NUM_TEAMS and THREAD_LIMIT.
1491 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1492 that value, or 1 if teams construct is not present, or 0, if
1493 teams construct does not have num_teams clause and so the choice is
1494 implementation defined, and -1 if it can't be determined on the host
1495 what value will GOMP_teams have on the device.
1496 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1497 body with that value, or 0, if teams construct does not have thread_limit
1498 clause or the teams construct is not present, or -1 if it can't be
1499 determined on the host what value will GOMP_teams have on the device. */
1502 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1503 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1504 unsigned int flags
, void **depend
, void **args
)
1506 struct gomp_device_descr
*devicep
= resolve_device (device
);
1508 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1510 struct gomp_thread
*thr
= gomp_thread ();
1511 /* Create a team if we don't have any around, as nowait
1512 target tasks make sense to run asynchronously even when
1513 outside of any parallel. */
1514 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1516 struct gomp_team
*team
= gomp_new_team (1);
1517 struct gomp_task
*task
= thr
->task
;
1518 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1519 team
->prev_ts
= thr
->ts
;
1520 thr
->ts
.team
= team
;
1521 thr
->ts
.team_id
= 0;
1522 thr
->ts
.work_share
= &team
->work_shares
[0];
1523 thr
->ts
.last_work_share
= NULL
;
1524 #ifdef HAVE_SYNC_BUILTINS
1525 thr
->ts
.single_count
= 0;
1527 thr
->ts
.static_trip
= 0;
1528 thr
->task
= &team
->implicit_task
[0];
1529 gomp_init_task (thr
->task
, NULL
, icv
);
1535 thr
->task
= &team
->implicit_task
[0];
1538 pthread_setspecific (gomp_thread_destructor
, thr
);
1541 && !thr
->task
->final_task
)
1543 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1544 sizes
, kinds
, flags
, depend
, args
,
1545 GOMP_TARGET_TASK_BEFORE_MAP
);
1550 /* If there are depend clauses, but nowait is not present
1551 (or we are in a final task), block the parent task until the
1552 dependencies are resolved and then just continue with the rest
1553 of the function as if it is a merged task. */
1556 struct gomp_thread
*thr
= gomp_thread ();
1557 if (thr
->task
&& thr
->task
->depend_hash
)
1558 gomp_task_maybe_wait_for_dependencies (depend
);
1563 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1564 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1565 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1567 gomp_target_fallback_firstprivate (fn
, mapnum
, hostaddrs
, sizes
, kinds
);
1571 struct target_mem_desc
*tgt_vars
;
1573 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1575 fpc
= gomp_target_unshare_firstprivate (mapnum
, hostaddrs
, sizes
, kinds
);
1579 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1580 true, GOMP_MAP_VARS_TARGET
);
1581 devicep
->run_func (devicep
->target_id
, fn_addr
,
1582 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1585 gomp_unmap_vars (tgt_vars
, true);
1590 /* Host fallback for GOMP_target_data{,_ext} routines. */
1593 gomp_target_data_fallback (void)
1595 struct gomp_task_icv
*icv
= gomp_icv (false);
1596 if (icv
->target_data
)
1598 /* Even when doing a host fallback, if there are any active
1599 #pragma omp target data constructs, need to remember the
1600 new #pragma omp target data, otherwise GOMP_target_end_data
1601 would get out of sync. */
1602 struct target_mem_desc
*tgt
1603 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1604 GOMP_MAP_VARS_DATA
);
1605 tgt
->prev
= icv
->target_data
;
1606 icv
->target_data
= tgt
;
1611 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1612 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1614 struct gomp_device_descr
*devicep
= resolve_device (device
);
1617 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1618 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1619 return gomp_target_data_fallback ();
1621 struct target_mem_desc
*tgt
1622 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1623 GOMP_MAP_VARS_DATA
);
1624 struct gomp_task_icv
*icv
= gomp_icv (true);
1625 tgt
->prev
= icv
->target_data
;
1626 icv
->target_data
= tgt
;
1630 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1631 size_t *sizes
, unsigned short *kinds
)
1633 struct gomp_device_descr
*devicep
= resolve_device (device
);
1636 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1637 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1638 return gomp_target_data_fallback ();
1640 struct target_mem_desc
*tgt
1641 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1642 GOMP_MAP_VARS_DATA
);
1643 struct gomp_task_icv
*icv
= gomp_icv (true);
1644 tgt
->prev
= icv
->target_data
;
1645 icv
->target_data
= tgt
;
1649 GOMP_target_end_data (void)
1651 struct gomp_task_icv
*icv
= gomp_icv (false);
1652 if (icv
->target_data
)
1654 struct target_mem_desc
*tgt
= icv
->target_data
;
1655 icv
->target_data
= tgt
->prev
;
1656 gomp_unmap_vars (tgt
, true);
1661 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1662 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1664 struct gomp_device_descr
*devicep
= resolve_device (device
);
1667 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1668 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1671 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1675 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1676 size_t *sizes
, unsigned short *kinds
,
1677 unsigned int flags
, void **depend
)
1679 struct gomp_device_descr
*devicep
= resolve_device (device
);
1681 /* If there are depend clauses, but nowait is not present,
1682 block the parent task until the dependencies are resolved
1683 and then just continue with the rest of the function as if it
1684 is a merged task. Until we are able to schedule task during
1685 variable mapping or unmapping, ignore nowait if depend clauses
1689 struct gomp_thread
*thr
= gomp_thread ();
1690 if (thr
->task
&& thr
->task
->depend_hash
)
1692 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1694 && !thr
->task
->final_task
)
1696 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1697 mapnum
, hostaddrs
, sizes
, kinds
,
1698 flags
| GOMP_TARGET_FLAG_UPDATE
,
1699 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
1704 struct gomp_team
*team
= thr
->ts
.team
;
1705 /* If parallel or taskgroup has been cancelled, don't start new
1708 && (gomp_team_barrier_cancelled (&team
->barrier
)
1709 || (thr
->task
->taskgroup
1710 && thr
->task
->taskgroup
->cancelled
)))
1713 gomp_task_maybe_wait_for_dependencies (depend
);
1719 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1720 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1723 struct gomp_thread
*thr
= gomp_thread ();
1724 struct gomp_team
*team
= thr
->ts
.team
;
1725 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1727 && (gomp_team_barrier_cancelled (&team
->barrier
)
1728 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1731 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
1735 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
1736 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1738 const int typemask
= 0xff;
1740 gomp_mutex_lock (&devicep
->lock
);
1741 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1743 gomp_mutex_unlock (&devicep
->lock
);
1747 for (i
= 0; i
< mapnum
; i
++)
1749 struct splay_tree_key_s cur_node
;
1750 unsigned char kind
= kinds
[i
] & typemask
;
1754 case GOMP_MAP_ALWAYS_FROM
:
1755 case GOMP_MAP_DELETE
:
1756 case GOMP_MAP_RELEASE
:
1757 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1758 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
1759 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1760 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1761 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1762 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1763 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
1764 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1768 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
1770 if ((kind
== GOMP_MAP_DELETE
1771 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
1772 && k
->refcount
!= REFCOUNT_INFINITY
)
1775 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
1776 || kind
== GOMP_MAP_ALWAYS_FROM
)
1777 devicep
->dev2host_func (devicep
->target_id
,
1778 (void *) cur_node
.host_start
,
1779 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1780 + cur_node
.host_start
1782 cur_node
.host_end
- cur_node
.host_start
);
1783 if (k
->refcount
== 0)
1785 splay_tree_remove (&devicep
->mem_map
, k
);
1787 splay_tree_insert (&devicep
->mem_map
,
1788 (splay_tree_node
) k
->link_key
);
1789 if (k
->tgt
->refcount
> 1)
1792 gomp_unmap_tgt (k
->tgt
);
1797 gomp_mutex_unlock (&devicep
->lock
);
1798 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1803 gomp_mutex_unlock (&devicep
->lock
);
1807 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
1808 size_t *sizes
, unsigned short *kinds
,
1809 unsigned int flags
, void **depend
)
1811 struct gomp_device_descr
*devicep
= resolve_device (device
);
1813 /* If there are depend clauses, but nowait is not present,
1814 block the parent task until the dependencies are resolved
1815 and then just continue with the rest of the function as if it
1816 is a merged task. Until we are able to schedule task during
1817 variable mapping or unmapping, ignore nowait if depend clauses
1821 struct gomp_thread
*thr
= gomp_thread ();
1822 if (thr
->task
&& thr
->task
->depend_hash
)
1824 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1826 && !thr
->task
->final_task
)
1828 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1829 mapnum
, hostaddrs
, sizes
, kinds
,
1830 flags
, depend
, NULL
,
1831 GOMP_TARGET_TASK_DATA
))
1836 struct gomp_team
*team
= thr
->ts
.team
;
1837 /* If parallel or taskgroup has been cancelled, don't start new
1840 && (gomp_team_barrier_cancelled (&team
->barrier
)
1841 || (thr
->task
->taskgroup
1842 && thr
->task
->taskgroup
->cancelled
)))
1845 gomp_task_maybe_wait_for_dependencies (depend
);
1851 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1852 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1855 struct gomp_thread
*thr
= gomp_thread ();
1856 struct gomp_team
*team
= thr
->ts
.team
;
1857 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1859 && (gomp_team_barrier_cancelled (&team
->barrier
)
1860 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1864 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1865 for (i
= 0; i
< mapnum
; i
++)
1866 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1868 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
1869 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1873 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
1874 true, GOMP_MAP_VARS_ENTER_DATA
);
1876 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
1880 gomp_target_task_fn (void *data
)
1882 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
1883 struct gomp_device_descr
*devicep
= ttask
->devicep
;
1885 if (ttask
->fn
!= NULL
)
1889 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1890 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
1891 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1893 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
1894 gomp_target_fallback_firstprivate (ttask
->fn
, ttask
->mapnum
,
1895 ttask
->hostaddrs
, ttask
->sizes
,
1900 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1903 gomp_unmap_vars (ttask
->tgt
, true);
1907 void *actual_arguments
;
1908 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1911 ttask
->firstprivate_copies
1912 = gomp_target_unshare_firstprivate (ttask
->mapnum
, ttask
->hostaddrs
,
1913 ttask
->sizes
, ttask
->kinds
);
1914 actual_arguments
= ttask
->hostaddrs
;
1918 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
1919 NULL
, ttask
->sizes
, ttask
->kinds
, true,
1920 GOMP_MAP_VARS_TARGET
);
1921 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
1923 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
1925 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
1926 ttask
->args
, (void *) ttask
);
1929 else if (devicep
== NULL
1930 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1931 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1935 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
1936 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1937 ttask
->kinds
, true);
1938 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1939 for (i
= 0; i
< ttask
->mapnum
; i
++)
1940 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1942 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
1943 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
1944 GOMP_MAP_VARS_ENTER_DATA
);
1945 i
+= ttask
->sizes
[i
];
1948 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
1949 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1951 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1957 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1961 struct gomp_task_icv
*icv
= gomp_icv (true);
1962 icv
->thread_limit_var
1963 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1969 omp_target_alloc (size_t size
, int device_num
)
1971 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1972 return malloc (size
);
1977 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1978 if (devicep
== NULL
)
1981 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1982 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1983 return malloc (size
);
1985 gomp_mutex_lock (&devicep
->lock
);
1986 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
1987 gomp_mutex_unlock (&devicep
->lock
);
1992 omp_target_free (void *device_ptr
, int device_num
)
1994 if (device_ptr
== NULL
)
1997 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2006 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2007 if (devicep
== NULL
)
2010 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2011 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2017 gomp_mutex_lock (&devicep
->lock
);
2018 devicep
->free_func (devicep
->target_id
, device_ptr
);
2019 gomp_mutex_unlock (&devicep
->lock
);
2023 omp_target_is_present (void *ptr
, int device_num
)
2028 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2034 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2035 if (devicep
== NULL
)
2038 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2039 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2042 gomp_mutex_lock (&devicep
->lock
);
2043 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2044 struct splay_tree_key_s cur_node
;
2046 cur_node
.host_start
= (uintptr_t) ptr
;
2047 cur_node
.host_end
= cur_node
.host_start
;
2048 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2049 int ret
= n
!= NULL
;
2050 gomp_mutex_unlock (&devicep
->lock
);
2055 omp_target_memcpy (void *dst
, void *src
, size_t length
, size_t dst_offset
,
2056 size_t src_offset
, int dst_device_num
, int src_device_num
)
2058 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2060 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2062 if (dst_device_num
< 0)
2065 dst_devicep
= resolve_device (dst_device_num
);
2066 if (dst_devicep
== NULL
)
2069 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2070 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2073 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2075 if (src_device_num
< 0)
2078 src_devicep
= resolve_device (src_device_num
);
2079 if (src_devicep
== NULL
)
2082 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2083 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2086 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2088 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2091 if (src_devicep
== NULL
)
2093 gomp_mutex_lock (&dst_devicep
->lock
);
2094 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2095 (char *) dst
+ dst_offset
,
2096 (char *) src
+ src_offset
, length
);
2097 gomp_mutex_unlock (&dst_devicep
->lock
);
2100 if (dst_devicep
== NULL
)
2102 gomp_mutex_lock (&src_devicep
->lock
);
2103 src_devicep
->dev2host_func (src_devicep
->target_id
,
2104 (char *) dst
+ dst_offset
,
2105 (char *) src
+ src_offset
, length
);
2106 gomp_mutex_unlock (&src_devicep
->lock
);
2109 if (src_devicep
== dst_devicep
)
2111 gomp_mutex_lock (&src_devicep
->lock
);
2112 src_devicep
->dev2dev_func (src_devicep
->target_id
,
2113 (char *) dst
+ dst_offset
,
2114 (char *) src
+ src_offset
, length
);
2115 gomp_mutex_unlock (&src_devicep
->lock
);
2122 omp_target_memcpy_rect_worker (void *dst
, void *src
, size_t element_size
,
2123 int num_dims
, const size_t *volume
,
2124 const size_t *dst_offsets
,
2125 const size_t *src_offsets
,
2126 const size_t *dst_dimensions
,
2127 const size_t *src_dimensions
,
2128 struct gomp_device_descr
*dst_devicep
,
2129 struct gomp_device_descr
*src_devicep
)
2131 size_t dst_slice
= element_size
;
2132 size_t src_slice
= element_size
;
2133 size_t j
, dst_off
, src_off
, length
;
2138 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2139 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2140 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2142 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2143 memcpy ((char *) dst
+ dst_off
, (char *) src
+ src_off
, length
);
2144 else if (src_devicep
== NULL
)
2145 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2146 (char *) dst
+ dst_off
,
2147 (char *) src
+ src_off
, length
);
2148 else if (dst_devicep
== NULL
)
2149 src_devicep
->dev2host_func (src_devicep
->target_id
,
2150 (char *) dst
+ dst_off
,
2151 (char *) src
+ src_off
, length
);
2152 else if (src_devicep
== dst_devicep
)
2153 src_devicep
->dev2dev_func (src_devicep
->target_id
,
2154 (char *) dst
+ dst_off
,
2155 (char *) src
+ src_off
, length
);
2161 /* FIXME: it would be nice to have some plugin function to handle
2162 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2163 be handled in the generic recursion below, and for host-host it
2164 should be used even for any num_dims >= 2. */
2166 for (i
= 1; i
< num_dims
; i
++)
2167 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2168 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2170 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2171 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2173 for (j
= 0; j
< volume
[0]; j
++)
2175 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2176 (char *) src
+ src_off
,
2177 element_size
, num_dims
- 1,
2178 volume
+ 1, dst_offsets
+ 1,
2179 src_offsets
+ 1, dst_dimensions
+ 1,
2180 src_dimensions
+ 1, dst_devicep
,
2184 dst_off
+= dst_slice
;
2185 src_off
+= src_slice
;
2191 omp_target_memcpy_rect (void *dst
, void *src
, size_t element_size
,
2192 int num_dims
, const size_t *volume
,
2193 const size_t *dst_offsets
,
2194 const size_t *src_offsets
,
2195 const size_t *dst_dimensions
,
2196 const size_t *src_dimensions
,
2197 int dst_device_num
, int src_device_num
)
2199 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2204 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2206 if (dst_device_num
< 0)
2209 dst_devicep
= resolve_device (dst_device_num
);
2210 if (dst_devicep
== NULL
)
2213 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2214 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2217 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2219 if (src_device_num
< 0)
2222 src_devicep
= resolve_device (src_device_num
);
2223 if (src_devicep
== NULL
)
2226 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2227 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2231 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2235 gomp_mutex_lock (&src_devicep
->lock
);
2236 else if (dst_devicep
)
2237 gomp_mutex_lock (&dst_devicep
->lock
);
2238 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2239 volume
, dst_offsets
, src_offsets
,
2240 dst_dimensions
, src_dimensions
,
2241 dst_devicep
, src_devicep
);
2243 gomp_mutex_unlock (&src_devicep
->lock
);
2244 else if (dst_devicep
)
2245 gomp_mutex_unlock (&dst_devicep
->lock
);
2250 omp_target_associate_ptr (void *host_ptr
, void *device_ptr
, size_t size
,
2251 size_t device_offset
, int device_num
)
2253 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2259 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2260 if (devicep
== NULL
)
2263 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2264 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2267 gomp_mutex_lock (&devicep
->lock
);
2269 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2270 struct splay_tree_key_s cur_node
;
2273 cur_node
.host_start
= (uintptr_t) host_ptr
;
2274 cur_node
.host_end
= cur_node
.host_start
+ size
;
2275 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2278 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2279 == (uintptr_t) device_ptr
+ device_offset
2280 && n
->host_start
<= cur_node
.host_start
2281 && n
->host_end
>= cur_node
.host_end
)
2286 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2287 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2291 tgt
->to_free
= NULL
;
2293 tgt
->list_count
= 0;
2294 tgt
->device_descr
= devicep
;
2295 splay_tree_node array
= tgt
->array
;
2296 splay_tree_key k
= &array
->key
;
2297 k
->host_start
= cur_node
.host_start
;
2298 k
->host_end
= cur_node
.host_end
;
2300 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2301 k
->refcount
= REFCOUNT_INFINITY
;
2302 k
->async_refcount
= 0;
2304 array
->right
= NULL
;
2305 splay_tree_insert (&devicep
->mem_map
, array
);
2308 gomp_mutex_unlock (&devicep
->lock
);
2313 omp_target_disassociate_ptr (void *ptr
, int device_num
)
2315 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2321 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2322 if (devicep
== NULL
)
2325 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2328 gomp_mutex_lock (&devicep
->lock
);
2330 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2331 struct splay_tree_key_s cur_node
;
2334 cur_node
.host_start
= (uintptr_t) ptr
;
2335 cur_node
.host_end
= cur_node
.host_start
;
2336 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2338 && n
->host_start
== cur_node
.host_start
2339 && n
->refcount
== REFCOUNT_INFINITY
2340 && n
->tgt
->tgt_start
== 0
2341 && n
->tgt
->to_free
== NULL
2342 && n
->tgt
->refcount
== 1
2343 && n
->tgt
->list_count
== 0)
2345 splay_tree_remove (&devicep
->mem_map
, n
);
2346 gomp_unmap_tgt (n
->tgt
);
2350 gomp_mutex_unlock (&devicep
->lock
);
2354 #ifdef PLUGIN_SUPPORT
2356 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2358 The handles of the found functions are stored in the corresponding fields
2359 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2362 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2363 const char *plugin_name
)
2365 const char *err
= NULL
, *last_missing
= NULL
;
2367 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2371 /* Check if all required functions are available in the plugin and store
2372 their handlers. None of the symbols can legitimately be NULL,
2373 so we don't need to check dlerror all the time. */
2375 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2377 /* Similar, but missing functions are not an error. Return false if
2378 failed, true otherwise. */
2379 #define DLSYM_OPT(f, n) \
2380 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2381 || (last_missing = #n, 0))
2384 if (device
->version_func () != GOMP_VERSION
)
2386 err
= "plugin version mismatch";
2393 DLSYM (get_num_devices
);
2394 DLSYM (init_device
);
2395 DLSYM (fini_device
);
2397 DLSYM (unload_image
);
2402 device
->capabilities
= device
->get_caps_func ();
2403 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2407 DLSYM_OPT (can_run
, can_run
);
2410 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2412 if (!DLSYM_OPT (openacc
.exec
, openacc_parallel
)
2413 || !DLSYM_OPT (openacc
.register_async_cleanup
,
2414 openacc_register_async_cleanup
)
2415 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
2416 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
2417 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
2418 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
2419 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
2420 || !DLSYM_OPT (openacc
.async_wait_all_async
,
2421 openacc_async_wait_all_async
)
2422 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
2423 || !DLSYM_OPT (openacc
.create_thread_data
,
2424 openacc_create_thread_data
)
2425 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2426 openacc_destroy_thread_data
))
2428 /* Require all the OpenACC handlers if we have
2429 GOMP_OFFLOAD_CAP_OPENACC_200. */
2430 err
= "plugin missing OpenACC handler function";
2435 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2436 openacc_get_current_cuda_device
);
2437 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2438 openacc_get_current_cuda_context
);
2439 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
2440 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
2441 if (cuda
&& cuda
!= 4)
2443 /* Make sure all the CUDA functions are there if any of them are. */
2444 err
= "plugin missing OpenACC CUDA handler function";
2456 gomp_error ("while loading %s: %s", plugin_name
, err
);
2458 gomp_error ("missing function was %s", last_missing
);
2460 dlclose (plugin_handle
);
2465 /* This function finalizes all initialized devices. */
2468 gomp_target_fini (void)
2471 for (i
= 0; i
< num_devices
; i
++)
2473 struct gomp_device_descr
*devicep
= &devices
[i
];
2474 gomp_mutex_lock (&devicep
->lock
);
2475 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2477 devicep
->fini_device_func (devicep
->target_id
);
2478 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2480 gomp_mutex_unlock (&devicep
->lock
);
2484 /* This function initializes the runtime needed for offloading.
2485 It parses the list of offload targets and tries to load the plugins for
2486 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2487 will be set, and the array DEVICES initialized, containing descriptors for
2488 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2492 gomp_target_init (void)
2494 const char *prefix
="libgomp-plugin-";
2495 const char *suffix
= SONAME_SUFFIX (1);
2496 const char *cur
, *next
;
2498 int i
, new_num_devices
;
2503 cur
= OFFLOAD_TARGETS
;
2507 struct gomp_device_descr current_device
;
2509 next
= strchr (cur
, ',');
2511 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
2512 + strlen (prefix
) + strlen (suffix
));
2519 strcpy (plugin_name
, prefix
);
2520 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
2521 strcat (plugin_name
, suffix
);
2523 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2525 new_num_devices
= current_device
.get_num_devices_func ();
2526 if (new_num_devices
>= 1)
2528 /* Augment DEVICES and NUM_DEVICES. */
2530 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2531 * sizeof (struct gomp_device_descr
));
2539 current_device
.name
= current_device
.get_name_func ();
2540 /* current_device.capabilities has already been set. */
2541 current_device
.type
= current_device
.get_type_func ();
2542 current_device
.mem_map
.root
= NULL
;
2543 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2544 current_device
.openacc
.data_environ
= NULL
;
2545 for (i
= 0; i
< new_num_devices
; i
++)
2547 current_device
.target_id
= i
;
2548 devices
[num_devices
] = current_device
;
2549 gomp_mutex_init (&devices
[num_devices
].lock
);
2560 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2561 NUM_DEVICES_OPENMP. */
2562 struct gomp_device_descr
*devices_s
2563 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2570 num_devices_openmp
= 0;
2571 for (i
= 0; i
< num_devices
; i
++)
2572 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2573 devices_s
[num_devices_openmp
++] = devices
[i
];
2574 int num_devices_after_openmp
= num_devices_openmp
;
2575 for (i
= 0; i
< num_devices
; i
++)
2576 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2577 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2579 devices
= devices_s
;
2581 for (i
= 0; i
< num_devices
; i
++)
2583 /* The 'devices' array can be moved (by the realloc call) until we have
2584 found all the plugins, so registering with the OpenACC runtime (which
2585 takes a copy of the pointer argument) must be delayed until now. */
2586 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2587 goacc_register (&devices
[i
]);
2590 if (atexit (gomp_target_fini
) != 0)
2591 gomp_fatal ("atexit failed");
2594 #else /* PLUGIN_SUPPORT */
2595 /* If dlfcn.h is unavailable we always fallback to host execution.
2596 GOMP_target* routines are just stubs for this case. */
2598 gomp_target_init (void)
2601 #endif /* PLUGIN_SUPPORT */