1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock
;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr
{
61 enum offload_target_type type
;
62 const void *host_table
;
63 const void *target_data
;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr
*offload_images
;
69 /* Total number of offload images. */
70 static int num_offload_images
;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr
*devices
;
75 /* Total number of available devices. */
76 static int num_devices
;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp
;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old
, size_t size
)
86 void *ret
= realloc (old
, size
);
89 gomp_mutex_unlock (®ister_lock
);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp
;
108 static struct gomp_device_descr
*
109 resolve_device (int device_id
)
111 if (device_id
== GOMP_DEVICE_ICV
)
113 struct gomp_task_icv
*icv
= gomp_icv (false);
114 device_id
= icv
->default_device_var
;
117 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 gomp_mutex_lock (&devices
[device_id
].lock
);
121 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
122 gomp_init_device (&devices
[device_id
]);
123 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
125 gomp_mutex_unlock (&devices
[device_id
].lock
);
128 gomp_mutex_unlock (&devices
[device_id
].lock
);
130 return &devices
[device_id
];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
137 if (key
->host_start
!= key
->host_end
)
138 return splay_tree_lookup (mem_map
, key
);
141 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
146 n
= splay_tree_lookup (mem_map
, key
);
150 return splay_tree_lookup (mem_map
, key
);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
156 if (key
->host_start
!= key
->host_end
)
157 return splay_tree_lookup (mem_map
, key
);
160 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
166 gomp_device_copy (struct gomp_device_descr
*devicep
,
167 bool (*copy_func
) (int, void *, const void *, size_t),
168 const char *dst
, void *dstaddr
,
169 const char *src
, const void *srcaddr
,
172 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
174 gomp_mutex_unlock (&devicep
->lock
);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
181 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
182 void *d
, const void *h
, size_t sz
)
184 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
188 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
189 void *h
, const void *d
, size_t sz
)
191 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
195 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
197 if (!devicep
->free_func (devicep
->target_id
, devptr
))
199 gomp_mutex_unlock (&devicep
->lock
);
200 gomp_fatal ("error in freeing device memory block at %p", devptr
);
204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
205 gomp_map_0len_lookup found oldn for newn.
206 Helper function of gomp_map_vars. */
209 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
210 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
214 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
215 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
216 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
217 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
219 if ((kind
& GOMP_MAP_FLAG_FORCE
)
220 || oldn
->host_start
> newn
->host_start
221 || oldn
->host_end
< newn
->host_end
)
223 gomp_mutex_unlock (&devicep
->lock
);
224 gomp_fatal ("Trying to map into device [%p..%p) object when "
225 "[%p..%p) is already mapped",
226 (void *) newn
->host_start
, (void *) newn
->host_end
,
227 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
230 if (GOMP_MAP_ALWAYS_TO_P (kind
))
231 gomp_copy_host2dev (devicep
,
232 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
233 + newn
->host_start
- oldn
->host_start
),
234 (void *) newn
->host_start
,
235 newn
->host_end
- newn
->host_start
);
237 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
242 get_kind (bool short_mapkind
, void *kinds
, int idx
)
244 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
245 : ((unsigned char *) kinds
)[idx
];
249 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
250 uintptr_t target_offset
, uintptr_t bias
)
252 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
253 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
254 struct splay_tree_key_s cur_node
;
256 cur_node
.host_start
= host_ptr
;
257 if (cur_node
.host_start
== (uintptr_t) NULL
)
259 cur_node
.tgt_offset
= (uintptr_t) NULL
;
260 /* FIXME: see comment about coalescing host/dev transfers below. */
261 gomp_copy_host2dev (devicep
,
262 (void *) (tgt
->tgt_start
+ target_offset
),
263 (void *) &cur_node
.tgt_offset
,
267 /* Add bias to the pointer value. */
268 cur_node
.host_start
+= bias
;
269 cur_node
.host_end
= cur_node
.host_start
;
270 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
273 gomp_mutex_unlock (&devicep
->lock
);
274 gomp_fatal ("Pointer target of array section wasn't mapped");
276 cur_node
.host_start
-= n
->host_start
;
278 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
279 /* At this point tgt_offset is target address of the
280 array section. Now subtract bias to get what we want
281 to initialize the pointer with. */
282 cur_node
.tgt_offset
-= bias
;
283 /* FIXME: see comment about coalescing host/dev transfers below. */
284 gomp_copy_host2dev (devicep
, (void *) (tgt
->tgt_start
+ target_offset
),
285 (void *) &cur_node
.tgt_offset
, sizeof (void *));
289 gomp_map_fields_existing (struct target_mem_desc
*tgt
, splay_tree_key n
,
290 size_t first
, size_t i
, void **hostaddrs
,
291 size_t *sizes
, void *kinds
)
293 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
294 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
295 struct splay_tree_key_s cur_node
;
297 const bool short_mapkind
= true;
298 const int typemask
= short_mapkind
? 0xff : 0x7;
300 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
301 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
302 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
303 kind
= get_kind (short_mapkind
, kinds
, i
);
306 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
308 gomp_map_vars_existing (devicep
, n2
, &cur_node
,
309 &tgt
->list
[i
], kind
& typemask
);
314 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
316 cur_node
.host_start
--;
317 n2
= splay_tree_lookup (mem_map
, &cur_node
);
318 cur_node
.host_start
++;
321 && n2
->host_start
- n
->host_start
322 == n2
->tgt_offset
- n
->tgt_offset
)
324 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
330 n2
= splay_tree_lookup (mem_map
, &cur_node
);
334 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
336 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
341 gomp_mutex_unlock (&devicep
->lock
);
342 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
343 "other mapped elements from the same structure weren't mapped "
344 "together with it", (void *) cur_node
.host_start
,
345 (void *) cur_node
.host_end
);
348 static inline uintptr_t
349 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
351 if (tgt
->list
[i
].key
!= NULL
)
352 return tgt
->list
[i
].key
->tgt
->tgt_start
353 + tgt
->list
[i
].key
->tgt_offset
354 + tgt
->list
[i
].offset
;
355 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
356 return (uintptr_t) hostaddrs
[i
];
357 if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
359 if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
360 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
361 + tgt
->list
[i
+ 1].key
->tgt_offset
362 + tgt
->list
[i
+ 1].offset
363 + (uintptr_t) hostaddrs
[i
]
364 - (uintptr_t) hostaddrs
[i
+ 1];
365 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
368 attribute_hidden
struct target_mem_desc
*
369 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
370 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
371 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
373 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
374 bool has_firstprivate
= false;
375 const int rshift
= short_mapkind
? 8 : 3;
376 const int typemask
= short_mapkind
? 0xff : 0x7;
377 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
378 struct splay_tree_key_s cur_node
;
379 struct target_mem_desc
*tgt
380 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
381 tgt
->list_count
= mapnum
;
382 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
383 tgt
->device_descr
= devicep
;
392 tgt_align
= sizeof (void *);
394 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
396 size_t align
= 4 * sizeof (void *);
398 tgt_size
= mapnum
* sizeof (void *);
401 gomp_mutex_lock (&devicep
->lock
);
402 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
404 gomp_mutex_unlock (&devicep
->lock
);
409 for (i
= 0; i
< mapnum
; i
++)
411 int kind
= get_kind (short_mapkind
, kinds
, i
);
412 if (hostaddrs
[i
] == NULL
413 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
415 tgt
->list
[i
].key
= NULL
;
416 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
419 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
421 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
422 cur_node
.host_end
= cur_node
.host_start
;
423 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
426 gomp_mutex_unlock (&devicep
->lock
);
427 gomp_fatal ("use_device_ptr pointer wasn't mapped");
429 cur_node
.host_start
-= n
->host_start
;
431 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
432 + cur_node
.host_start
);
433 tgt
->list
[i
].key
= NULL
;
434 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
437 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
439 size_t first
= i
+ 1;
440 size_t last
= i
+ sizes
[i
];
441 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
442 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
444 tgt
->list
[i
].key
= NULL
;
445 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
446 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
449 size_t align
= (size_t) 1 << (kind
>> rshift
);
450 if (tgt_align
< align
)
452 tgt_size
-= (uintptr_t) hostaddrs
[first
]
453 - (uintptr_t) hostaddrs
[i
];
454 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
455 tgt_size
+= cur_node
.host_end
- (uintptr_t) hostaddrs
[i
];
456 not_found_cnt
+= last
- i
;
457 for (i
= first
; i
<= last
; i
++)
458 tgt
->list
[i
].key
= NULL
;
462 for (i
= first
; i
<= last
; i
++)
463 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
468 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
470 tgt
->list
[i
].key
= NULL
;
471 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
472 has_firstprivate
= true;
475 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
476 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
477 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
479 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
480 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
482 tgt
->list
[i
].key
= NULL
;
484 size_t align
= (size_t) 1 << (kind
>> rshift
);
485 if (tgt_align
< align
)
487 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
488 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
489 has_firstprivate
= true;
493 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
495 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
498 tgt
->list
[i
].key
= NULL
;
499 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
504 n
= splay_tree_lookup (mem_map
, &cur_node
);
505 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
506 gomp_map_vars_existing (devicep
, n
, &cur_node
, &tgt
->list
[i
],
510 tgt
->list
[i
].key
= NULL
;
512 size_t align
= (size_t) 1 << (kind
>> rshift
);
514 if (tgt_align
< align
)
516 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
517 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
518 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
521 for (j
= i
+ 1; j
< mapnum
; j
++)
522 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
525 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
526 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
527 > cur_node
.host_end
))
531 tgt
->list
[j
].key
= NULL
;
542 gomp_mutex_unlock (&devicep
->lock
);
543 gomp_fatal ("unexpected aggregation");
545 tgt
->to_free
= devaddrs
[0];
546 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
547 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
549 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
551 /* Allocate tgt_align aligned tgt_size block of memory. */
552 /* FIXME: Perhaps change interface to allocate properly aligned
554 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
555 tgt_size
+ tgt_align
- 1);
558 gomp_mutex_unlock (&devicep
->lock
);
559 gomp_fatal ("device memory allocation fail");
562 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
563 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
564 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
574 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
575 tgt_size
= mapnum
* sizeof (void *);
578 if (not_found_cnt
|| has_firstprivate
)
581 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
582 splay_tree_node array
= tgt
->array
;
583 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
584 uintptr_t field_tgt_base
= 0;
586 for (i
= 0; i
< mapnum
; i
++)
587 if (tgt
->list
[i
].key
== NULL
)
589 int kind
= get_kind (short_mapkind
, kinds
, i
);
590 if (hostaddrs
[i
] == NULL
)
592 switch (kind
& typemask
)
594 size_t align
, len
, first
, last
;
596 case GOMP_MAP_FIRSTPRIVATE
:
597 align
= (size_t) 1 << (kind
>> rshift
);
598 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
599 tgt
->list
[i
].offset
= tgt_size
;
601 gomp_copy_host2dev (devicep
,
602 (void *) (tgt
->tgt_start
+ tgt_size
),
603 (void *) hostaddrs
[i
], len
);
606 case GOMP_MAP_FIRSTPRIVATE_INT
:
607 case GOMP_MAP_USE_DEVICE_PTR
:
608 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
610 case GOMP_MAP_STRUCT
:
613 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
614 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
616 if (tgt
->list
[first
].key
!= NULL
)
618 n
= splay_tree_lookup (mem_map
, &cur_node
);
621 size_t align
= (size_t) 1 << (kind
>> rshift
);
622 tgt_size
-= (uintptr_t) hostaddrs
[first
]
623 - (uintptr_t) hostaddrs
[i
];
624 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
625 tgt_size
+= (uintptr_t) hostaddrs
[first
]
626 - (uintptr_t) hostaddrs
[i
];
627 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
628 field_tgt_offset
= tgt_size
;
629 field_tgt_clear
= last
;
630 tgt_size
+= cur_node
.host_end
631 - (uintptr_t) hostaddrs
[first
];
634 for (i
= first
; i
<= last
; i
++)
635 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
639 case GOMP_MAP_ALWAYS_POINTER
:
640 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
641 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
642 n
= splay_tree_lookup (mem_map
, &cur_node
);
644 || n
->host_start
> cur_node
.host_start
645 || n
->host_end
< cur_node
.host_end
)
647 gomp_mutex_unlock (&devicep
->lock
);
648 gomp_fatal ("always pointer not mapped");
650 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
651 != GOMP_MAP_ALWAYS_POINTER
)
652 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
653 if (cur_node
.tgt_offset
)
654 cur_node
.tgt_offset
-= sizes
[i
];
655 gomp_copy_host2dev (devicep
,
656 (void *) (n
->tgt
->tgt_start
658 + cur_node
.host_start
660 (void *) &cur_node
.tgt_offset
,
662 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
663 + cur_node
.host_start
- n
->host_start
;
668 splay_tree_key k
= &array
->key
;
669 k
->host_start
= (uintptr_t) hostaddrs
[i
];
670 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
671 k
->host_end
= k
->host_start
+ sizes
[i
];
673 k
->host_end
= k
->host_start
+ sizeof (void *);
674 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
675 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
676 gomp_map_vars_existing (devicep
, n
, k
, &tgt
->list
[i
],
681 if (n
&& n
->refcount
== REFCOUNT_LINK
)
683 /* Replace target address of the pointer with target address
684 of mapped object in the splay tree. */
685 splay_tree_remove (mem_map
, n
);
688 size_t align
= (size_t) 1 << (kind
>> rshift
);
689 tgt
->list
[i
].key
= k
;
691 if (field_tgt_clear
!= ~(size_t) 0)
693 k
->tgt_offset
= k
->host_start
- field_tgt_base
695 if (i
== field_tgt_clear
)
696 field_tgt_clear
= ~(size_t) 0;
700 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
701 k
->tgt_offset
= tgt_size
;
702 tgt_size
+= k
->host_end
- k
->host_start
;
704 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
705 tgt
->list
[i
].always_copy_from
706 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
707 tgt
->list
[i
].offset
= 0;
708 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
713 splay_tree_insert (mem_map
, array
);
714 switch (kind
& typemask
)
718 case GOMP_MAP_FORCE_ALLOC
:
719 case GOMP_MAP_FORCE_FROM
:
720 case GOMP_MAP_ALWAYS_FROM
:
723 case GOMP_MAP_TOFROM
:
724 case GOMP_MAP_FORCE_TO
:
725 case GOMP_MAP_FORCE_TOFROM
:
726 case GOMP_MAP_ALWAYS_TO
:
727 case GOMP_MAP_ALWAYS_TOFROM
:
728 /* FIXME: Perhaps add some smarts, like if copying
729 several adjacent fields from host to target, use some
730 host buffer to avoid sending each var individually. */
731 gomp_copy_host2dev (devicep
,
732 (void *) (tgt
->tgt_start
734 (void *) k
->host_start
,
735 k
->host_end
- k
->host_start
);
737 case GOMP_MAP_POINTER
:
738 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
739 k
->tgt_offset
, sizes
[i
]);
741 case GOMP_MAP_TO_PSET
:
742 /* FIXME: see above FIXME comment. */
743 gomp_copy_host2dev (devicep
,
744 (void *) (tgt
->tgt_start
746 (void *) k
->host_start
,
747 k
->host_end
- k
->host_start
);
749 for (j
= i
+ 1; j
< mapnum
; j
++)
750 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
754 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
755 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
760 tgt
->list
[j
].key
= k
;
761 tgt
->list
[j
].copy_from
= false;
762 tgt
->list
[j
].always_copy_from
= false;
763 if (k
->refcount
!= REFCOUNT_INFINITY
)
765 gomp_map_pointer (tgt
,
766 (uintptr_t) *(void **) hostaddrs
[j
],
768 + ((uintptr_t) hostaddrs
[j
]
774 case GOMP_MAP_FORCE_PRESENT
:
776 /* We already looked up the memory region above and it
778 size_t size
= k
->host_end
- k
->host_start
;
779 gomp_mutex_unlock (&devicep
->lock
);
780 #ifdef HAVE_INTTYPES_H
781 gomp_fatal ("present clause: !acc_is_present (%p, "
782 "%"PRIu64
" (0x%"PRIx64
"))",
783 (void *) k
->host_start
,
784 (uint64_t) size
, (uint64_t) size
);
786 gomp_fatal ("present clause: !acc_is_present (%p, "
787 "%lu (0x%lx))", (void *) k
->host_start
,
788 (unsigned long) size
, (unsigned long) size
);
792 case GOMP_MAP_FORCE_DEVICEPTR
:
793 assert (k
->host_end
- k
->host_start
== sizeof (void *));
794 gomp_copy_host2dev (devicep
,
795 (void *) (tgt
->tgt_start
797 (void *) k
->host_start
,
801 gomp_mutex_unlock (&devicep
->lock
);
802 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
808 /* Set link pointer on target to the device address of the
810 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
811 devicep
->host2dev_func (devicep
->target_id
,
812 (void *) n
->tgt_offset
,
813 &tgt_addr
, sizeof (void *));
820 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
822 for (i
= 0; i
< mapnum
; i
++)
824 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
825 /* FIXME: see above FIXME comment. */
826 gomp_copy_host2dev (devicep
,
827 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
828 (void *) &cur_node
.tgt_offset
, sizeof (void *));
832 /* If the variable from "omp target enter data" map-list was already mapped,
833 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
835 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
841 gomp_mutex_unlock (&devicep
->lock
);
846 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
848 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
850 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
856 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
857 variables back from device to host: if it is false, it is assumed that this
858 has been done already. */
860 attribute_hidden
void
861 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
863 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
865 if (tgt
->list_count
== 0)
871 gomp_mutex_lock (&devicep
->lock
);
872 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
874 gomp_mutex_unlock (&devicep
->lock
);
881 for (i
= 0; i
< tgt
->list_count
; i
++)
883 splay_tree_key k
= tgt
->list
[i
].key
;
887 bool do_unmap
= false;
888 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
890 else if (k
->refcount
== 1)
896 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
897 || tgt
->list
[i
].always_copy_from
)
898 gomp_copy_dev2host (devicep
,
899 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
900 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
901 + tgt
->list
[i
].offset
),
902 tgt
->list
[i
].length
);
905 splay_tree_remove (&devicep
->mem_map
, k
);
907 splay_tree_insert (&devicep
->mem_map
,
908 (splay_tree_node
) k
->link_key
);
909 if (k
->tgt
->refcount
> 1)
912 gomp_unmap_tgt (k
->tgt
);
916 if (tgt
->refcount
> 1)
919 gomp_unmap_tgt (tgt
);
921 gomp_mutex_unlock (&devicep
->lock
);
925 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
926 size_t *sizes
, void *kinds
, bool short_mapkind
)
929 struct splay_tree_key_s cur_node
;
930 const int typemask
= short_mapkind
? 0xff : 0x7;
938 gomp_mutex_lock (&devicep
->lock
);
939 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
941 gomp_mutex_unlock (&devicep
->lock
);
945 for (i
= 0; i
< mapnum
; i
++)
948 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
949 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
950 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
953 int kind
= get_kind (short_mapkind
, kinds
, i
);
954 if (n
->host_start
> cur_node
.host_start
955 || n
->host_end
< cur_node
.host_end
)
957 gomp_mutex_unlock (&devicep
->lock
);
958 gomp_fatal ("Trying to update [%p..%p) object when "
959 "only [%p..%p) is mapped",
960 (void *) cur_node
.host_start
,
961 (void *) cur_node
.host_end
,
962 (void *) n
->host_start
,
963 (void *) n
->host_end
);
967 void *hostaddr
= (void *) cur_node
.host_start
;
968 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
969 + cur_node
.host_start
- n
->host_start
);
970 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
972 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
973 gomp_copy_host2dev (devicep
, devaddr
, hostaddr
, size
);
974 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
975 gomp_copy_dev2host (devicep
, hostaddr
, devaddr
, size
);
978 gomp_mutex_unlock (&devicep
->lock
);
981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
982 And insert to splay tree the mapping between addresses from HOST_TABLE and
983 from loaded target image. We rely in the host and device compiler
984 emitting variable and functions in the same order. */
987 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
988 const void *host_table
, const void *target_data
,
989 bool is_register_lock
)
991 void **host_func_table
= ((void ***) host_table
)[0];
992 void **host_funcs_end
= ((void ***) host_table
)[1];
993 void **host_var_table
= ((void ***) host_table
)[2];
994 void **host_vars_end
= ((void ***) host_table
)[3];
996 /* The func table contains only addresses, the var table contains addresses
997 and corresponding sizes. */
998 int num_funcs
= host_funcs_end
- host_func_table
;
999 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1001 /* Load image to device and get target addresses for the image. */
1002 struct addr_pair
*target_table
= NULL
;
1003 int i
, num_target_entries
;
1006 = devicep
->load_image_func (devicep
->target_id
, version
,
1007 target_data
, &target_table
);
1009 if (num_target_entries
!= num_funcs
+ num_vars
)
1011 gomp_mutex_unlock (&devicep
->lock
);
1012 if (is_register_lock
)
1013 gomp_mutex_unlock (®ister_lock
);
1014 gomp_fatal ("Cannot map target functions or variables"
1015 " (expected %u, have %u)", num_funcs
+ num_vars
,
1016 num_target_entries
);
1019 /* Insert host-target address mapping into splay tree. */
1020 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1021 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1022 tgt
->refcount
= REFCOUNT_INFINITY
;
1025 tgt
->to_free
= NULL
;
1027 tgt
->list_count
= 0;
1028 tgt
->device_descr
= devicep
;
1029 splay_tree_node array
= tgt
->array
;
1031 for (i
= 0; i
< num_funcs
; i
++)
1033 splay_tree_key k
= &array
->key
;
1034 k
->host_start
= (uintptr_t) host_func_table
[i
];
1035 k
->host_end
= k
->host_start
+ 1;
1037 k
->tgt_offset
= target_table
[i
].start
;
1038 k
->refcount
= REFCOUNT_INFINITY
;
1041 array
->right
= NULL
;
1042 splay_tree_insert (&devicep
->mem_map
, array
);
1046 /* Most significant bit of the size in host and target tables marks
1047 "omp declare target link" variables. */
1048 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1049 const uintptr_t size_mask
= ~link_bit
;
1051 for (i
= 0; i
< num_vars
; i
++)
1053 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1054 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1056 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1058 gomp_mutex_unlock (&devicep
->lock
);
1059 if (is_register_lock
)
1060 gomp_mutex_unlock (®ister_lock
);
1061 gomp_fatal ("Cannot map target variables (size mismatch)");
1064 splay_tree_key k
= &array
->key
;
1065 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1067 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1069 k
->tgt_offset
= target_var
->start
;
1070 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1073 array
->right
= NULL
;
1074 splay_tree_insert (&devicep
->mem_map
, array
);
1078 free (target_table
);
1081 /* Unload the mappings described by target_data from device DEVICE_P.
1082 The device must be locked. */
1085 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1087 const void *host_table
, const void *target_data
)
1089 void **host_func_table
= ((void ***) host_table
)[0];
1090 void **host_funcs_end
= ((void ***) host_table
)[1];
1091 void **host_var_table
= ((void ***) host_table
)[2];
1092 void **host_vars_end
= ((void ***) host_table
)[3];
1094 /* The func table contains only addresses, the var table contains addresses
1095 and corresponding sizes. */
1096 int num_funcs
= host_funcs_end
- host_func_table
;
1097 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1099 struct splay_tree_key_s k
;
1100 splay_tree_key node
= NULL
;
1102 /* Find mapping at start of node array */
1103 if (num_funcs
|| num_vars
)
1105 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1106 : (uintptr_t) host_var_table
[0]);
1107 k
.host_end
= k
.host_start
+ 1;
1108 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1111 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1113 gomp_mutex_unlock (&devicep
->lock
);
1114 gomp_fatal ("image unload fail");
1117 /* Remove mappings from splay tree. */
1119 for (i
= 0; i
< num_funcs
; i
++)
1121 k
.host_start
= (uintptr_t) host_func_table
[i
];
1122 k
.host_end
= k
.host_start
+ 1;
1123 splay_tree_remove (&devicep
->mem_map
, &k
);
1126 /* Most significant bit of the size in host and target tables marks
1127 "omp declare target link" variables. */
1128 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1129 const uintptr_t size_mask
= ~link_bit
;
1130 bool is_tgt_unmapped
= false;
1132 for (i
= 0; i
< num_vars
; i
++)
1134 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1136 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1138 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1139 splay_tree_remove (&devicep
->mem_map
, &k
);
1142 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1143 splay_tree_remove (&devicep
->mem_map
, n
);
1146 if (n
->tgt
->refcount
> 1)
1150 is_tgt_unmapped
= true;
1151 gomp_unmap_tgt (n
->tgt
);
1157 if (node
&& !is_tgt_unmapped
)
1164 /* This function should be called from every offload image while loading.
1165 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1166 the target, and TARGET_DATA needed by target plugin. */
1169 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1170 int target_type
, const void *target_data
)
1174 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1175 gomp_fatal ("Library too old for offload (version %u < %u)",
1176 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1178 gomp_mutex_lock (®ister_lock
);
1180 /* Load image to all initialized devices. */
1181 for (i
= 0; i
< num_devices
; i
++)
1183 struct gomp_device_descr
*devicep
= &devices
[i
];
1184 gomp_mutex_lock (&devicep
->lock
);
1185 if (devicep
->type
== target_type
1186 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1187 gomp_load_image_to_device (devicep
, version
,
1188 host_table
, target_data
, true);
1189 gomp_mutex_unlock (&devicep
->lock
);
1192 /* Insert image to array of pending images. */
1194 = gomp_realloc_unlock (offload_images
,
1195 (num_offload_images
+ 1)
1196 * sizeof (struct offload_image_descr
));
1197 offload_images
[num_offload_images
].version
= version
;
1198 offload_images
[num_offload_images
].type
= target_type
;
1199 offload_images
[num_offload_images
].host_table
= host_table
;
1200 offload_images
[num_offload_images
].target_data
= target_data
;
1202 num_offload_images
++;
1203 gomp_mutex_unlock (®ister_lock
);
1207 GOMP_offload_register (const void *host_table
, int target_type
,
1208 const void *target_data
)
1210 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1213 /* This function should be called from every offload image while unloading.
1214 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1215 the target, and TARGET_DATA needed by target plugin. */
1218 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1219 int target_type
, const void *target_data
)
1223 gomp_mutex_lock (®ister_lock
);
1225 /* Unload image from all initialized devices. */
1226 for (i
= 0; i
< num_devices
; i
++)
1228 struct gomp_device_descr
*devicep
= &devices
[i
];
1229 gomp_mutex_lock (&devicep
->lock
);
1230 if (devicep
->type
== target_type
1231 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1232 gomp_unload_image_from_device (devicep
, version
,
1233 host_table
, target_data
);
1234 gomp_mutex_unlock (&devicep
->lock
);
1237 /* Remove image from array of pending images. */
1238 for (i
= 0; i
< num_offload_images
; i
++)
1239 if (offload_images
[i
].target_data
== target_data
)
1241 offload_images
[i
] = offload_images
[--num_offload_images
];
1245 gomp_mutex_unlock (®ister_lock
);
1249 GOMP_offload_unregister (const void *host_table
, int target_type
,
1250 const void *target_data
)
1252 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1255 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1256 must be locked on entry, and remains locked on return. */
1258 attribute_hidden
void
1259 gomp_init_device (struct gomp_device_descr
*devicep
)
1262 if (!devicep
->init_device_func (devicep
->target_id
))
1264 gomp_mutex_unlock (&devicep
->lock
);
1265 gomp_fatal ("device initialization failed");
1268 /* Load to device all images registered by the moment. */
1269 for (i
= 0; i
< num_offload_images
; i
++)
1271 struct offload_image_descr
*image
= &offload_images
[i
];
1272 if (image
->type
== devicep
->type
)
1273 gomp_load_image_to_device (devicep
, image
->version
,
1274 image
->host_table
, image
->target_data
,
1278 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1281 attribute_hidden
void
1282 gomp_unload_device (struct gomp_device_descr
*devicep
)
1284 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1288 /* Unload from device all images registered at the moment. */
1289 for (i
= 0; i
< num_offload_images
; i
++)
1291 struct offload_image_descr
*image
= &offload_images
[i
];
1292 if (image
->type
== devicep
->type
)
1293 gomp_unload_image_from_device (devicep
, image
->version
,
1295 image
->target_data
);
1300 /* Free address mapping tables. MM must be locked on entry, and remains locked
1303 attribute_hidden
void
1304 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1306 while (mem_map
->root
)
1308 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1310 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1316 /* Host fallback for GOMP_target{,_ext} routines. */
1319 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1321 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1323 memset (thr
, '\0', sizeof (*thr
));
1324 if (gomp_places_list
)
1326 thr
->place
= old_thr
.place
;
1327 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1330 gomp_free_thread (thr
);
1334 /* Calculate alignment and size requirements of a private copy of data shared
1335 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1338 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1339 unsigned short *kinds
, size_t *tgt_align
,
1343 for (i
= 0; i
< mapnum
; i
++)
1344 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1346 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1347 if (*tgt_align
< align
)
1349 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1350 *tgt_size
+= sizes
[i
];
1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1357 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1358 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1361 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1363 tgt
+= tgt_align
- al
;
1366 for (i
= 0; i
< mapnum
; i
++)
1367 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1369 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1370 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1371 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1372 hostaddrs
[i
] = tgt
+ tgt_size
;
1373 tgt_size
= tgt_size
+ sizes
[i
];
1377 /* Helper function of GOMP_target{,_ext} routines. */
1380 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1381 void (*host_fn
) (void *))
1383 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1384 return (void *) host_fn
;
1387 gomp_mutex_lock (&devicep
->lock
);
1388 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1390 gomp_mutex_unlock (&devicep
->lock
);
1394 struct splay_tree_key_s k
;
1395 k
.host_start
= (uintptr_t) host_fn
;
1396 k
.host_end
= k
.host_start
+ 1;
1397 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1398 gomp_mutex_unlock (&devicep
->lock
);
1402 return (void *) tgt_fn
->tgt_offset
;
1406 /* Called when encountering a target directive. If DEVICE
1407 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1408 GOMP_DEVICE_HOST_FALLBACK (or any value
1409 larger than last available hw device), use host fallback.
1410 FN is address of host code, UNUSED is part of the current ABI, but
1411 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1412 with MAPNUM entries, with addresses of the host objects,
1413 sizes of the host objects (resp. for pointer kind pointer bias
1414 and assumed sizeof (void *) size) and kinds. */
1417 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1418 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1419 unsigned char *kinds
)
1421 struct gomp_device_descr
*devicep
= resolve_device (device
);
1425 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1426 /* All shared memory devices should use the GOMP_target_ext function. */
1427 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1428 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1429 return gomp_target_fallback (fn
, hostaddrs
);
1431 struct target_mem_desc
*tgt_vars
1432 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1433 GOMP_MAP_VARS_TARGET
);
1434 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1436 gomp_unmap_vars (tgt_vars
, true);
1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1440 and several arguments have been added:
1441 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1442 DEPEND is array of dependencies, see GOMP_task for details.
1444 ARGS is a pointer to an array consisting of a variable number of both
1445 device-independent and device-specific arguments, which can take one two
1446 elements where the first specifies for which device it is intended, the type
1447 and optionally also the value. If the value is not present in the first
1448 one, the whole second element the actual value. The last element of the
1449 array is a single NULL. Among the device independent can be for example
1450 NUM_TEAMS and THREAD_LIMIT.
1452 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1453 that value, or 1 if teams construct is not present, or 0, if
1454 teams construct does not have num_teams clause and so the choice is
1455 implementation defined, and -1 if it can't be determined on the host
1456 what value will GOMP_teams have on the device.
1457 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1458 body with that value, or 0, if teams construct does not have thread_limit
1459 clause or the teams construct is not present, or -1 if it can't be
1460 determined on the host what value will GOMP_teams have on the device. */
1463 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1464 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1465 unsigned int flags
, void **depend
, void **args
)
1467 struct gomp_device_descr
*devicep
= resolve_device (device
);
1468 size_t tgt_align
= 0, tgt_size
= 0;
1469 bool fpc_done
= false;
1471 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1473 struct gomp_thread
*thr
= gomp_thread ();
1474 /* Create a team if we don't have any around, as nowait
1475 target tasks make sense to run asynchronously even when
1476 outside of any parallel. */
1477 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1479 struct gomp_team
*team
= gomp_new_team (1);
1480 struct gomp_task
*task
= thr
->task
;
1481 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1482 team
->prev_ts
= thr
->ts
;
1483 thr
->ts
.team
= team
;
1484 thr
->ts
.team_id
= 0;
1485 thr
->ts
.work_share
= &team
->work_shares
[0];
1486 thr
->ts
.last_work_share
= NULL
;
1487 #ifdef HAVE_SYNC_BUILTINS
1488 thr
->ts
.single_count
= 0;
1490 thr
->ts
.static_trip
= 0;
1491 thr
->task
= &team
->implicit_task
[0];
1492 gomp_init_task (thr
->task
, NULL
, icv
);
1498 thr
->task
= &team
->implicit_task
[0];
1501 pthread_setspecific (gomp_thread_destructor
, thr
);
1504 && !thr
->task
->final_task
)
1506 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1507 sizes
, kinds
, flags
, depend
, args
,
1508 GOMP_TARGET_TASK_BEFORE_MAP
);
1513 /* If there are depend clauses, but nowait is not present
1514 (or we are in a final task), block the parent task until the
1515 dependencies are resolved and then just continue with the rest
1516 of the function as if it is a merged task. */
1519 struct gomp_thread
*thr
= gomp_thread ();
1520 if (thr
->task
&& thr
->task
->depend_hash
)
1522 /* If we might need to wait, copy firstprivate now. */
1523 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1524 &tgt_align
, &tgt_size
);
1527 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1528 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1529 tgt_align
, tgt_size
);
1532 gomp_task_maybe_wait_for_dependencies (depend
);
1538 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1539 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1540 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1544 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1545 &tgt_align
, &tgt_size
);
1548 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1549 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1550 tgt_align
, tgt_size
);
1553 gomp_target_fallback (fn
, hostaddrs
);
1557 struct target_mem_desc
*tgt_vars
;
1558 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1562 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1563 &tgt_align
, &tgt_size
);
1566 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1567 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1568 tgt_align
, tgt_size
);
1574 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1575 true, GOMP_MAP_VARS_TARGET
);
1576 devicep
->run_func (devicep
->target_id
, fn_addr
,
1577 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1580 gomp_unmap_vars (tgt_vars
, true);
1583 /* Host fallback for GOMP_target_data{,_ext} routines. */
1586 gomp_target_data_fallback (void)
1588 struct gomp_task_icv
*icv
= gomp_icv (false);
1589 if (icv
->target_data
)
1591 /* Even when doing a host fallback, if there are any active
1592 #pragma omp target data constructs, need to remember the
1593 new #pragma omp target data, otherwise GOMP_target_end_data
1594 would get out of sync. */
1595 struct target_mem_desc
*tgt
1596 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1597 GOMP_MAP_VARS_DATA
);
1598 tgt
->prev
= icv
->target_data
;
1599 icv
->target_data
= tgt
;
1604 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1605 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1607 struct gomp_device_descr
*devicep
= resolve_device (device
);
1610 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1611 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1612 return gomp_target_data_fallback ();
1614 struct target_mem_desc
*tgt
1615 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1616 GOMP_MAP_VARS_DATA
);
1617 struct gomp_task_icv
*icv
= gomp_icv (true);
1618 tgt
->prev
= icv
->target_data
;
1619 icv
->target_data
= tgt
;
1623 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1624 size_t *sizes
, unsigned short *kinds
)
1626 struct gomp_device_descr
*devicep
= resolve_device (device
);
1629 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1630 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1631 return gomp_target_data_fallback ();
1633 struct target_mem_desc
*tgt
1634 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1635 GOMP_MAP_VARS_DATA
);
1636 struct gomp_task_icv
*icv
= gomp_icv (true);
1637 tgt
->prev
= icv
->target_data
;
1638 icv
->target_data
= tgt
;
1642 GOMP_target_end_data (void)
1644 struct gomp_task_icv
*icv
= gomp_icv (false);
1645 if (icv
->target_data
)
1647 struct target_mem_desc
*tgt
= icv
->target_data
;
1648 icv
->target_data
= tgt
->prev
;
1649 gomp_unmap_vars (tgt
, true);
1654 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1655 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1657 struct gomp_device_descr
*devicep
= resolve_device (device
);
1660 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1661 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1664 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1668 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1669 size_t *sizes
, unsigned short *kinds
,
1670 unsigned int flags
, void **depend
)
1672 struct gomp_device_descr
*devicep
= resolve_device (device
);
1674 /* If there are depend clauses, but nowait is not present,
1675 block the parent task until the dependencies are resolved
1676 and then just continue with the rest of the function as if it
1677 is a merged task. Until we are able to schedule task during
1678 variable mapping or unmapping, ignore nowait if depend clauses
1682 struct gomp_thread
*thr
= gomp_thread ();
1683 if (thr
->task
&& thr
->task
->depend_hash
)
1685 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1687 && !thr
->task
->final_task
)
1689 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1690 mapnum
, hostaddrs
, sizes
, kinds
,
1691 flags
| GOMP_TARGET_FLAG_UPDATE
,
1692 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
1697 struct gomp_team
*team
= thr
->ts
.team
;
1698 /* If parallel or taskgroup has been cancelled, don't start new
1701 && (gomp_team_barrier_cancelled (&team
->barrier
)
1702 || (thr
->task
->taskgroup
1703 && thr
->task
->taskgroup
->cancelled
)))
1706 gomp_task_maybe_wait_for_dependencies (depend
);
1712 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1713 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1716 struct gomp_thread
*thr
= gomp_thread ();
1717 struct gomp_team
*team
= thr
->ts
.team
;
1718 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1720 && (gomp_team_barrier_cancelled (&team
->barrier
)
1721 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1724 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
1728 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
1729 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1731 const int typemask
= 0xff;
1733 gomp_mutex_lock (&devicep
->lock
);
1734 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1736 gomp_mutex_unlock (&devicep
->lock
);
1740 for (i
= 0; i
< mapnum
; i
++)
1742 struct splay_tree_key_s cur_node
;
1743 unsigned char kind
= kinds
[i
] & typemask
;
1747 case GOMP_MAP_ALWAYS_FROM
:
1748 case GOMP_MAP_DELETE
:
1749 case GOMP_MAP_RELEASE
:
1750 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1751 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
1752 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1753 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1754 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1755 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1756 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
1757 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1761 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
1763 if ((kind
== GOMP_MAP_DELETE
1764 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
1765 && k
->refcount
!= REFCOUNT_INFINITY
)
1768 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
1769 || kind
== GOMP_MAP_ALWAYS_FROM
)
1770 gomp_copy_dev2host (devicep
, (void *) cur_node
.host_start
,
1771 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1772 + cur_node
.host_start
1774 cur_node
.host_end
- cur_node
.host_start
);
1775 if (k
->refcount
== 0)
1777 splay_tree_remove (&devicep
->mem_map
, k
);
1779 splay_tree_insert (&devicep
->mem_map
,
1780 (splay_tree_node
) k
->link_key
);
1781 if (k
->tgt
->refcount
> 1)
1784 gomp_unmap_tgt (k
->tgt
);
1789 gomp_mutex_unlock (&devicep
->lock
);
1790 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1795 gomp_mutex_unlock (&devicep
->lock
);
1799 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
1800 size_t *sizes
, unsigned short *kinds
,
1801 unsigned int flags
, void **depend
)
1803 struct gomp_device_descr
*devicep
= resolve_device (device
);
1805 /* If there are depend clauses, but nowait is not present,
1806 block the parent task until the dependencies are resolved
1807 and then just continue with the rest of the function as if it
1808 is a merged task. Until we are able to schedule task during
1809 variable mapping or unmapping, ignore nowait if depend clauses
1813 struct gomp_thread
*thr
= gomp_thread ();
1814 if (thr
->task
&& thr
->task
->depend_hash
)
1816 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1818 && !thr
->task
->final_task
)
1820 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1821 mapnum
, hostaddrs
, sizes
, kinds
,
1822 flags
, depend
, NULL
,
1823 GOMP_TARGET_TASK_DATA
))
1828 struct gomp_team
*team
= thr
->ts
.team
;
1829 /* If parallel or taskgroup has been cancelled, don't start new
1832 && (gomp_team_barrier_cancelled (&team
->barrier
)
1833 || (thr
->task
->taskgroup
1834 && thr
->task
->taskgroup
->cancelled
)))
1837 gomp_task_maybe_wait_for_dependencies (depend
);
1843 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1844 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1847 struct gomp_thread
*thr
= gomp_thread ();
1848 struct gomp_team
*team
= thr
->ts
.team
;
1849 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1851 && (gomp_team_barrier_cancelled (&team
->barrier
)
1852 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1856 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1857 for (i
= 0; i
< mapnum
; i
++)
1858 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1860 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
1861 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1865 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
1866 true, GOMP_MAP_VARS_ENTER_DATA
);
1868 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
1872 gomp_target_task_fn (void *data
)
1874 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
1875 struct gomp_device_descr
*devicep
= ttask
->devicep
;
1877 if (ttask
->fn
!= NULL
)
1881 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1882 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
1883 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1885 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
1886 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
1890 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1893 gomp_unmap_vars (ttask
->tgt
, true);
1897 void *actual_arguments
;
1898 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1901 actual_arguments
= ttask
->hostaddrs
;
1905 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
1906 NULL
, ttask
->sizes
, ttask
->kinds
, true,
1907 GOMP_MAP_VARS_TARGET
);
1908 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
1910 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
1912 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
1913 ttask
->args
, (void *) ttask
);
1916 else if (devicep
== NULL
1917 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1918 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1922 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
1923 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1924 ttask
->kinds
, true);
1925 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1926 for (i
= 0; i
< ttask
->mapnum
; i
++)
1927 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1929 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
1930 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
1931 GOMP_MAP_VARS_ENTER_DATA
);
1932 i
+= ttask
->sizes
[i
];
1935 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
1936 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1938 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1944 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1948 struct gomp_task_icv
*icv
= gomp_icv (true);
1949 icv
->thread_limit_var
1950 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1956 omp_target_alloc (size_t size
, int device_num
)
1958 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1959 return malloc (size
);
1964 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1965 if (devicep
== NULL
)
1968 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1969 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1970 return malloc (size
);
1972 gomp_mutex_lock (&devicep
->lock
);
1973 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
1974 gomp_mutex_unlock (&devicep
->lock
);
1979 omp_target_free (void *device_ptr
, int device_num
)
1981 if (device_ptr
== NULL
)
1984 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1993 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1994 if (devicep
== NULL
)
1997 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1998 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2004 gomp_mutex_lock (&devicep
->lock
);
2005 gomp_free_device_memory (devicep
, device_ptr
);
2006 gomp_mutex_unlock (&devicep
->lock
);
2010 omp_target_is_present (void *ptr
, int device_num
)
2015 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2021 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2022 if (devicep
== NULL
)
2025 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2026 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2029 gomp_mutex_lock (&devicep
->lock
);
2030 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2031 struct splay_tree_key_s cur_node
;
2033 cur_node
.host_start
= (uintptr_t) ptr
;
2034 cur_node
.host_end
= cur_node
.host_start
;
2035 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2036 int ret
= n
!= NULL
;
2037 gomp_mutex_unlock (&devicep
->lock
);
2042 omp_target_memcpy (void *dst
, void *src
, size_t length
, size_t dst_offset
,
2043 size_t src_offset
, int dst_device_num
, int src_device_num
)
2045 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2048 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2050 if (dst_device_num
< 0)
2053 dst_devicep
= resolve_device (dst_device_num
);
2054 if (dst_devicep
== NULL
)
2057 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2058 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2061 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2063 if (src_device_num
< 0)
2066 src_devicep
= resolve_device (src_device_num
);
2067 if (src_devicep
== NULL
)
2070 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2071 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2074 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2076 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2079 if (src_devicep
== NULL
)
2081 gomp_mutex_lock (&dst_devicep
->lock
);
2082 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2083 (char *) dst
+ dst_offset
,
2084 (char *) src
+ src_offset
, length
);
2085 gomp_mutex_unlock (&dst_devicep
->lock
);
2086 return (ret
? 0 : EINVAL
);
2088 if (dst_devicep
== NULL
)
2090 gomp_mutex_lock (&src_devicep
->lock
);
2091 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2092 (char *) dst
+ dst_offset
,
2093 (char *) src
+ src_offset
, length
);
2094 gomp_mutex_unlock (&src_devicep
->lock
);
2095 return (ret
? 0 : EINVAL
);
2097 if (src_devicep
== dst_devicep
)
2099 gomp_mutex_lock (&src_devicep
->lock
);
2100 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2101 (char *) dst
+ dst_offset
,
2102 (char *) src
+ src_offset
, length
);
2103 gomp_mutex_unlock (&src_devicep
->lock
);
2104 return (ret
? 0 : EINVAL
);
2110 omp_target_memcpy_rect_worker (void *dst
, void *src
, size_t element_size
,
2111 int num_dims
, const size_t *volume
,
2112 const size_t *dst_offsets
,
2113 const size_t *src_offsets
,
2114 const size_t *dst_dimensions
,
2115 const size_t *src_dimensions
,
2116 struct gomp_device_descr
*dst_devicep
,
2117 struct gomp_device_descr
*src_devicep
)
2119 size_t dst_slice
= element_size
;
2120 size_t src_slice
= element_size
;
2121 size_t j
, dst_off
, src_off
, length
;
2126 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2127 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2128 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2130 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2132 memcpy ((char *) dst
+ dst_off
, (char *) src
+ src_off
, length
);
2135 else if (src_devicep
== NULL
)
2136 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2137 (char *) dst
+ dst_off
,
2138 (char *) src
+ src_off
, length
);
2139 else if (dst_devicep
== NULL
)
2140 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2141 (char *) dst
+ dst_off
,
2142 (char *) src
+ src_off
, length
);
2143 else if (src_devicep
== dst_devicep
)
2144 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2145 (char *) dst
+ dst_off
,
2146 (char *) src
+ src_off
, length
);
2149 return ret
? 0 : EINVAL
;
2152 /* FIXME: it would be nice to have some plugin function to handle
2153 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2154 be handled in the generic recursion below, and for host-host it
2155 should be used even for any num_dims >= 2. */
2157 for (i
= 1; i
< num_dims
; i
++)
2158 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2159 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2161 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2162 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2164 for (j
= 0; j
< volume
[0]; j
++)
2166 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2167 (char *) src
+ src_off
,
2168 element_size
, num_dims
- 1,
2169 volume
+ 1, dst_offsets
+ 1,
2170 src_offsets
+ 1, dst_dimensions
+ 1,
2171 src_dimensions
+ 1, dst_devicep
,
2175 dst_off
+= dst_slice
;
2176 src_off
+= src_slice
;
2182 omp_target_memcpy_rect (void *dst
, void *src
, size_t element_size
,
2183 int num_dims
, const size_t *volume
,
2184 const size_t *dst_offsets
,
2185 const size_t *src_offsets
,
2186 const size_t *dst_dimensions
,
2187 const size_t *src_dimensions
,
2188 int dst_device_num
, int src_device_num
)
2190 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2195 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2197 if (dst_device_num
< 0)
2200 dst_devicep
= resolve_device (dst_device_num
);
2201 if (dst_devicep
== NULL
)
2204 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2205 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2208 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2210 if (src_device_num
< 0)
2213 src_devicep
= resolve_device (src_device_num
);
2214 if (src_devicep
== NULL
)
2217 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2218 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2222 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2226 gomp_mutex_lock (&src_devicep
->lock
);
2227 else if (dst_devicep
)
2228 gomp_mutex_lock (&dst_devicep
->lock
);
2229 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2230 volume
, dst_offsets
, src_offsets
,
2231 dst_dimensions
, src_dimensions
,
2232 dst_devicep
, src_devicep
);
2234 gomp_mutex_unlock (&src_devicep
->lock
);
2235 else if (dst_devicep
)
2236 gomp_mutex_unlock (&dst_devicep
->lock
);
2241 omp_target_associate_ptr (void *host_ptr
, void *device_ptr
, size_t size
,
2242 size_t device_offset
, int device_num
)
2244 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2250 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2251 if (devicep
== NULL
)
2254 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2255 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2258 gomp_mutex_lock (&devicep
->lock
);
2260 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2261 struct splay_tree_key_s cur_node
;
2264 cur_node
.host_start
= (uintptr_t) host_ptr
;
2265 cur_node
.host_end
= cur_node
.host_start
+ size
;
2266 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2269 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2270 == (uintptr_t) device_ptr
+ device_offset
2271 && n
->host_start
<= cur_node
.host_start
2272 && n
->host_end
>= cur_node
.host_end
)
2277 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2278 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2282 tgt
->to_free
= NULL
;
2284 tgt
->list_count
= 0;
2285 tgt
->device_descr
= devicep
;
2286 splay_tree_node array
= tgt
->array
;
2287 splay_tree_key k
= &array
->key
;
2288 k
->host_start
= cur_node
.host_start
;
2289 k
->host_end
= cur_node
.host_end
;
2291 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2292 k
->refcount
= REFCOUNT_INFINITY
;
2294 array
->right
= NULL
;
2295 splay_tree_insert (&devicep
->mem_map
, array
);
2298 gomp_mutex_unlock (&devicep
->lock
);
2303 omp_target_disassociate_ptr (void *ptr
, int device_num
)
2305 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2311 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2312 if (devicep
== NULL
)
2315 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2318 gomp_mutex_lock (&devicep
->lock
);
2320 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2321 struct splay_tree_key_s cur_node
;
2324 cur_node
.host_start
= (uintptr_t) ptr
;
2325 cur_node
.host_end
= cur_node
.host_start
;
2326 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2328 && n
->host_start
== cur_node
.host_start
2329 && n
->refcount
== REFCOUNT_INFINITY
2330 && n
->tgt
->tgt_start
== 0
2331 && n
->tgt
->to_free
== NULL
2332 && n
->tgt
->refcount
== 1
2333 && n
->tgt
->list_count
== 0)
2335 splay_tree_remove (&devicep
->mem_map
, n
);
2336 gomp_unmap_tgt (n
->tgt
);
2340 gomp_mutex_unlock (&devicep
->lock
);
2344 #ifdef PLUGIN_SUPPORT
2346 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2348 The handles of the found functions are stored in the corresponding fields
2349 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2352 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2353 const char *plugin_name
)
2355 const char *err
= NULL
, *last_missing
= NULL
;
2357 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2361 /* Check if all required functions are available in the plugin and store
2362 their handlers. None of the symbols can legitimately be NULL,
2363 so we don't need to check dlerror all the time. */
2365 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2367 /* Similar, but missing functions are not an error. Return false if
2368 failed, true otherwise. */
2369 #define DLSYM_OPT(f, n) \
2370 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2371 || (last_missing = #n, 0))
2374 if (device
->version_func () != GOMP_VERSION
)
2376 err
= "plugin version mismatch";
2383 DLSYM (get_num_devices
);
2384 DLSYM (init_device
);
2385 DLSYM (fini_device
);
2387 DLSYM (unload_image
);
2392 device
->capabilities
= device
->get_caps_func ();
2393 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2397 DLSYM_OPT (can_run
, can_run
);
2400 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2402 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
2403 || !DLSYM_OPT (openacc
.register_async_cleanup
,
2404 openacc_register_async_cleanup
)
2405 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
2406 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
2407 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
2408 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
2409 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
2410 || !DLSYM_OPT (openacc
.async_wait_all_async
,
2411 openacc_async_wait_all_async
)
2412 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
2413 || !DLSYM_OPT (openacc
.create_thread_data
,
2414 openacc_create_thread_data
)
2415 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2416 openacc_destroy_thread_data
))
2418 /* Require all the OpenACC handlers if we have
2419 GOMP_OFFLOAD_CAP_OPENACC_200. */
2420 err
= "plugin missing OpenACC handler function";
2425 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2426 openacc_cuda_get_current_device
);
2427 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2428 openacc_cuda_get_current_context
);
2429 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
2430 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
2431 if (cuda
&& cuda
!= 4)
2433 /* Make sure all the CUDA functions are there if any of them are. */
2434 err
= "plugin missing OpenACC CUDA handler function";
2446 gomp_error ("while loading %s: %s", plugin_name
, err
);
2448 gomp_error ("missing function was %s", last_missing
);
2450 dlclose (plugin_handle
);
2455 /* This function finalizes all initialized devices. */
2458 gomp_target_fini (void)
2461 for (i
= 0; i
< num_devices
; i
++)
2464 struct gomp_device_descr
*devicep
= &devices
[i
];
2465 gomp_mutex_lock (&devicep
->lock
);
2466 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2468 ret
= devicep
->fini_device_func (devicep
->target_id
);
2469 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2471 gomp_mutex_unlock (&devicep
->lock
);
2473 gomp_fatal ("device finalization failed");
2477 /* This function initializes the runtime needed for offloading.
2478 It parses the list of offload targets and tries to load the plugins for
2479 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2480 will be set, and the array DEVICES initialized, containing descriptors for
2481 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2485 gomp_target_init (void)
2487 const char *prefix
="libgomp-plugin-";
2488 const char *suffix
= SONAME_SUFFIX (1);
2489 const char *cur
, *next
;
2491 int i
, new_num_devices
;
2496 cur
= OFFLOAD_TARGETS
;
2500 struct gomp_device_descr current_device
;
2502 next
= strchr (cur
, ',');
2504 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
2505 + strlen (prefix
) + strlen (suffix
));
2512 strcpy (plugin_name
, prefix
);
2513 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
2514 strcat (plugin_name
, suffix
);
2516 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2518 new_num_devices
= current_device
.get_num_devices_func ();
2519 if (new_num_devices
>= 1)
2521 /* Augment DEVICES and NUM_DEVICES. */
2523 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2524 * sizeof (struct gomp_device_descr
));
2532 current_device
.name
= current_device
.get_name_func ();
2533 /* current_device.capabilities has already been set. */
2534 current_device
.type
= current_device
.get_type_func ();
2535 current_device
.mem_map
.root
= NULL
;
2536 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2537 current_device
.openacc
.data_environ
= NULL
;
2538 for (i
= 0; i
< new_num_devices
; i
++)
2540 current_device
.target_id
= i
;
2541 devices
[num_devices
] = current_device
;
2542 gomp_mutex_init (&devices
[num_devices
].lock
);
2553 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2554 NUM_DEVICES_OPENMP. */
2555 struct gomp_device_descr
*devices_s
2556 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2563 num_devices_openmp
= 0;
2564 for (i
= 0; i
< num_devices
; i
++)
2565 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2566 devices_s
[num_devices_openmp
++] = devices
[i
];
2567 int num_devices_after_openmp
= num_devices_openmp
;
2568 for (i
= 0; i
< num_devices
; i
++)
2569 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2570 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2572 devices
= devices_s
;
2574 for (i
= 0; i
< num_devices
; i
++)
2576 /* The 'devices' array can be moved (by the realloc call) until we have
2577 found all the plugins, so registering with the OpenACC runtime (which
2578 takes a copy of the pointer argument) must be delayed until now. */
2579 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2580 goacc_register (&devices
[i
]);
2583 if (atexit (gomp_target_fini
) != 0)
2584 gomp_fatal ("atexit failed");
2587 #else /* PLUGIN_SUPPORT */
2588 /* If dlfcn.h is unavailable we always fallback to host execution.
2589 GOMP_target* routines are just stubs for this case. */
2591 gomp_target_init (void)
2594 #endif /* PLUGIN_SUPPORT */