PR c++/64359
[official-gcc.git] / liboffloadmic / runtime / offload_host.cpp
blob23a873f38860e3ce4d24e78a7c63100fc963fb21
1 /*
2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Forward declaration as the following 2 functions are declared as friend in offload_engine.h
32 // CLANG does not like static to been after friend declaration.
33 static void __offload_init_library_once(void);
34 static void __offload_fini_library(void);
36 #include "offload_host.h"
37 #ifdef MYO_SUPPORT
38 #include "offload_myo_host.h"
39 #endif
41 #include <malloc.h>
42 #ifndef TARGET_WINNT
43 #include <alloca.h>
44 #include <elf.h>
45 #endif // TARGET_WINNT
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/stat.h>
51 #include <sys/types.h>
52 #include <sys/stat.h>
54 #include <algorithm>
55 #include <bitset>
57 #if defined(HOST_WINNT)
58 #define PATH_SEPARATOR ";"
59 #else
60 #define PATH_SEPARATOR ":"
61 #endif
63 #define GET_OFFLOAD_NUMBER(timer_data) \
64 timer_data? timer_data->offload_number : 0
66 #ifdef TARGET_WINNT
67 // Small subset of ELF declarations for Windows which is needed to compile
68 // this file. ELF header is used to understand what binary type is contained
69 // in the target image - shared library or executable.
71 typedef uint16_t Elf64_Half;
72 typedef uint32_t Elf64_Word;
73 typedef uint64_t Elf64_Addr;
74 typedef uint64_t Elf64_Off;
76 #define EI_NIDENT 16
78 #define ET_EXEC 2
79 #define ET_DYN 3
81 typedef struct
83 unsigned char e_ident[EI_NIDENT];
84 Elf64_Half e_type;
85 Elf64_Half e_machine;
86 Elf64_Word e_version;
87 Elf64_Addr e_entry;
88 Elf64_Off e_phoff;
89 Elf64_Off e_shoff;
90 Elf64_Word e_flags;
91 Elf64_Half e_ehsize;
92 Elf64_Half e_phentsize;
93 Elf64_Half e_phnum;
94 Elf64_Half e_shentsize;
95 Elf64_Half e_shnum;
96 Elf64_Half e_shstrndx;
97 } Elf64_Ehdr;
98 #endif // TARGET_WINNT
100 // Host console and file logging
101 const char *prefix;
102 int console_enabled = 0;
103 int offload_number = 0;
105 static const char *htrace_envname = "H_TRACE";
106 static const char *offload_report_envname = "OFFLOAD_REPORT";
107 static char *timer_envname = "H_TIME";
109 // Trace information
110 static const char* vardesc_direction_as_string[] = {
111 "NOCOPY",
112 "IN",
113 "OUT",
114 "INOUT"
116 static const char* vardesc_type_as_string[] = {
117 "unknown",
118 "data",
119 "data_ptr",
120 "func_ptr",
121 "void_ptr",
122 "string_ptr",
123 "dv",
124 "dv_data",
125 "dv_data_slice",
126 "dv_ptr",
127 "dv_ptr_data",
128 "dv_ptr_data_slice",
129 "cean_var",
130 "cean_var_ptr",
131 "c_data_ptr_array",
132 "c_func_ptr_array",
133 "c_void_ptr_array",
134 "c_string_ptr_array"
137 Engine* mic_engines = 0;
138 uint32_t mic_engines_total = 0;
139 pthread_key_t mic_thread_key;
140 MicEnvVar mic_env_vars;
141 uint64_t cpu_frequency = 0;
143 // MIC_STACKSIZE
144 uint32_t mic_stack_size = 12 * 1024 * 1024;
146 // MIC_BUFFERSIZE
147 uint64_t mic_buffer_size = 0;
149 // MIC_LD_LIBRARY_PATH
150 char* mic_library_path = 0;
152 // MIC_PROXY_IO
153 bool mic_proxy_io = true;
155 // MIC_PROXY_FS_ROOT
156 char* mic_proxy_fs_root = 0;
158 // Threshold for creating buffers with large pages. Buffer is created
159 // with large pages hint if its size exceeds the threshold value.
160 // By default large pages are disabled right now (by setting default
161 // value for threshold to MAX) due to HSD 4114629.
162 uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
163 static const char *mic_use_2mb_buffers_envname =
164 "MIC_USE_2MB_BUFFERS";
166 static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
167 static const char *mic_use_async_buffer_write_envname =
168 "MIC_USE_ASYNC_BUFFER_WRITE";
170 static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
171 static const char *mic_use_async_buffer_read_envname =
172 "MIC_USE_ASYNC_BUFFER_READ";
174 // device initialization type
175 OffloadInitType __offload_init_type = c_init_on_offload_all;
176 static const char *offload_init_envname = "OFFLOAD_INIT";
178 // active wait
179 static bool __offload_active_wait = true;
180 static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
182 // OMP_DEFAULT_DEVICE
183 int __omp_device_num = 0;
184 static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
186 // The list of pending target libraries
187 static bool __target_libs;
188 static TargetImageList __target_libs_list;
189 static mutex_t __target_libs_lock;
190 static mutex_t stack_alloc_lock;
192 // Target executable
193 TargetImage* __target_exe;
195 static char * offload_get_src_base(void * ptr, uint8_t type)
197 char *base;
198 if (VAR_TYPE_IS_PTR(type)) {
199 base = *static_cast<char**>(ptr);
201 else if (VAR_TYPE_IS_SCALAR(type)) {
202 base = static_cast<char*>(ptr);
204 else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
205 ArrDesc *dvp;
206 if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
207 const arr_desc *ap = static_cast<const arr_desc*>(ptr);
208 dvp = (type == c_dv_data_slice) ?
209 reinterpret_cast<ArrDesc*>(ap->base) :
210 *reinterpret_cast<ArrDesc**>(ap->base);
212 else {
213 dvp = (type == c_dv_data) ?
214 static_cast<ArrDesc*>(ptr) :
215 *static_cast<ArrDesc**>(ptr);
217 base = reinterpret_cast<char*>(dvp->Base);
219 else {
220 base = NULL;
222 return base;
225 void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
227 // special case for the 'process died' error
228 if (res == COI_PROCESS_DIED) {
229 m_device.fini_process(true);
231 else {
232 switch (msg) {
233 case c_buf_create:
234 if (res == COI_OUT_OF_MEMORY) {
235 msg = c_buf_create_out_of_mem;
237 /* fallthru */
239 case c_buf_create_from_mem:
240 case c_buf_get_address:
241 case c_pipeline_create:
242 case c_pipeline_run_func:
243 LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
244 break;
246 case c_buf_read:
247 case c_buf_write:
248 case c_buf_copy:
249 case c_buf_map:
250 case c_buf_unmap:
251 case c_buf_destroy:
252 case c_buf_set_state:
253 LIBOFFLOAD_ERROR(msg, res);
254 break;
256 default:
257 break;
261 exit(1);
264 _Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
266 switch (res) {
267 case COI_SUCCESS:
268 return OFFLOAD_SUCCESS;
270 case COI_PROCESS_DIED:
271 return OFFLOAD_PROCESS_DIED;
273 case COI_OUT_OF_MEMORY:
274 return OFFLOAD_OUT_OF_MEMORY;
276 default:
277 return OFFLOAD_ERROR;
281 bool OffloadDescriptor::alloc_ptr_data(
282 PtrData* &ptr_data,
283 void *base,
284 int64_t disp,
285 int64_t size,
286 int64_t alloc_disp,
287 int align
290 // total length of base
291 int64_t length = disp + size;
292 bool is_new;
294 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
295 base, length);
297 // add new entry
298 ptr_data = m_device.insert_ptr_data(base, length, is_new);
299 if (is_new) {
301 OFFLOAD_TRACE(3, "Added new association\n");
303 if (length > 0) {
304 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
305 COIRESULT res;
307 // align should be a power of 2
308 if (align > 0 && (align & (align - 1)) == 0) {
309 // offset within mic_buffer. Can do offset optimization
310 // only when source address alignment satisfies requested
311 // alignment on the target (cq172736).
312 if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
313 ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
317 // buffer size and flags
318 uint64_t buffer_size = length + ptr_data->mic_offset;
319 uint32_t buffer_flags = 0;
321 // create buffer with large pages if data length exceeds
322 // large page threshold
323 if (length >= __offload_use_2mb_buffers) {
324 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
327 // create CPU buffer
328 OFFLOAD_DEBUG_TRACE_1(3,
329 GET_OFFLOAD_NUMBER(get_timer_data()),
330 c_offload_create_buf_host,
331 "Creating buffer from source memory %p, "
332 "length %lld\n", base, length);
334 // result is not checked because we can continue without cpu
335 // buffer. In this case we will use COIBufferRead/Write instead
336 // of COIBufferCopy.
337 COI::BufferCreateFromMemory(length,
338 COI_BUFFER_NORMAL,
340 base,
342 &m_device.get_process(),
343 &ptr_data->cpu_buf);
345 OFFLOAD_DEBUG_TRACE_1(3,
346 GET_OFFLOAD_NUMBER(get_timer_data()),
347 c_offload_create_buf_mic,
348 "Creating buffer for sink: size %lld, offset %d, "
349 "flags =0x%x\n", buffer_size - alloc_disp,
350 ptr_data->mic_offset, buffer_flags);
352 // create MIC buffer
353 res = COI::BufferCreate(buffer_size - alloc_disp,
354 COI_BUFFER_NORMAL,
355 buffer_flags,
358 &m_device.get_process(),
359 &ptr_data->mic_buf);
360 if (res != COI_SUCCESS) {
361 if (m_status != 0) {
362 m_status->result = translate_coi_error(res);
364 else if (m_is_mandatory) {
365 report_coi_error(c_buf_create, res);
367 ptr_data->alloc_ptr_data_lock.unlock();
368 return false;
371 // make buffer valid on the device.
372 res = COI::BufferSetState(ptr_data->mic_buf,
373 m_device.get_process(),
374 COI_BUFFER_VALID,
375 COI_BUFFER_NO_MOVE,
376 0, 0, 0);
377 if (res != COI_SUCCESS) {
378 if (m_status != 0) {
379 m_status->result = translate_coi_error(res);
381 else if (m_is_mandatory) {
382 report_coi_error(c_buf_set_state, res);
384 ptr_data->alloc_ptr_data_lock.unlock();
385 return false;
388 res = COI::BufferSetState(ptr_data->mic_buf,
389 COI_PROCESS_SOURCE,
390 COI_BUFFER_INVALID,
391 COI_BUFFER_NO_MOVE,
392 0, 0, 0);
393 if (res != COI_SUCCESS) {
394 if (m_status != 0) {
395 m_status->result = translate_coi_error(res);
397 else if (m_is_mandatory) {
398 report_coi_error(c_buf_set_state, res);
400 ptr_data->alloc_ptr_data_lock.unlock();
401 return false;
405 ptr_data->alloc_disp = alloc_disp;
406 ptr_data->alloc_ptr_data_lock.unlock();
408 else {
409 mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
411 OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
412 "is_static %d\n",
413 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
414 ptr_data->is_static);
416 // This is not a new entry. Make sure that provided address range fits
417 // into existing one.
418 MemRange addr_range(base, length - ptr_data->alloc_disp);
419 if (!ptr_data->cpu_addr.contains(addr_range)) {
420 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
421 exit(1);
424 // if the entry is associated with static data it may not have buffers
425 // created because they are created on demand.
426 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
427 return false;
431 return true;
434 bool OffloadDescriptor::find_ptr_data(
435 PtrData* &ptr_data,
436 void *base,
437 int64_t disp,
438 int64_t size,
439 bool report_error
442 // total length of base
443 int64_t length = disp + size;
445 OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
446 "length %lld\n", base, length);
448 // find existing association in pointer table
449 ptr_data = m_device.find_ptr_data(base);
450 if (ptr_data == 0) {
451 if (report_error) {
452 LIBOFFLOAD_ERROR(c_no_ptr_data, base);
453 exit(1);
455 OFFLOAD_TRACE(3, "Association does not exist\n");
456 return true;
459 OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
460 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
461 ptr_data->is_static);
463 // make sure that provided address range fits into existing one
464 MemRange addr_range(base, length);
465 if (!ptr_data->cpu_addr.contains(addr_range)) {
466 if (report_error) {
467 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
468 exit(1);
470 OFFLOAD_TRACE(3, "Existing association partially overlaps with "
471 "data address range\n");
472 ptr_data = 0;
473 return true;
476 // if the entry is associated with static data it may not have buffers
477 // created because they are created on demand.
478 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
479 return false;
482 return true;
485 bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
487 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
489 if (ptr_data->cpu_buf == 0) {
490 OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
491 ptr_data->cpu_addr.start());
493 COIRESULT res = COI::BufferCreateFromMemory(
494 ptr_data->cpu_addr.length(),
495 COI_BUFFER_NORMAL,
497 const_cast<void*>(ptr_data->cpu_addr.start()),
498 1, &m_device.get_process(),
499 &ptr_data->cpu_buf);
501 if (res != COI_SUCCESS) {
502 if (m_status != 0) {
503 m_status->result = translate_coi_error(res);
504 return false;
506 report_coi_error(c_buf_create_from_mem, res);
510 if (ptr_data->mic_buf == 0) {
511 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
512 ptr_data->mic_addr);
514 COIRESULT res = COI::BufferCreateFromMemory(
515 ptr_data->cpu_addr.length(),
516 COI_BUFFER_NORMAL,
517 COI_SINK_MEMORY,
518 reinterpret_cast<void*>(ptr_data->mic_addr),
519 1, &m_device.get_process(),
520 &ptr_data->mic_buf);
522 if (res != COI_SUCCESS) {
523 if (m_status != 0) {
524 m_status->result = translate_coi_error(res);
525 return false;
527 report_coi_error(c_buf_create_from_mem, res);
531 return true;
534 bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
536 if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
537 COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
538 &ptr_data->mic_addr);
539 if (res != COI_SUCCESS) {
540 if (m_status != 0) {
541 m_status->result = translate_coi_error(res);
543 else if (m_is_mandatory) {
544 report_coi_error(c_buf_get_address, res);
546 return false;
549 return true;
552 bool OffloadDescriptor::nullify_target_stack(
553 COIBUFFER targ_buf,
554 uint64_t size
557 char * ptr = (char*)malloc(size);
558 if (ptr == NULL)
559 LIBOFFLOAD_ERROR(c_malloc);
560 COIRESULT res;
562 memset(ptr, 0, size);
563 res = COI::BufferWrite(
564 targ_buf,
566 ptr,
567 size,
568 COI_COPY_UNSPECIFIED,
569 0, 0, 0);
570 free(ptr);
571 if (res != COI_SUCCESS) {
572 if (m_status != 0) {
573 m_status->result = translate_coi_error(res);
574 return false;
576 report_coi_error(c_buf_write, res);
578 return true;
581 bool OffloadDescriptor::offload_stack_memory_manager(
582 const void * stack_begin,
583 int routine_id,
584 int buf_size,
585 int align,
586 bool *is_new)
588 mutex_locker_t locker(stack_alloc_lock);
590 PersistData * new_el;
591 PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
592 PersistDataList::iterator it_end;
593 int erase = 0;
595 *is_new = false;
597 for (PersistDataList::iterator it = m_device.m_persist_list.begin();
598 it != m_device.m_persist_list.end(); it++) {
599 PersistData cur_el = *it;
601 if (stack_begin > it->stack_cpu_addr) {
602 // this stack data must be destroyed
603 m_destroy_stack.push_front(cur_el.stack_ptr_data);
604 it_end = it;
605 erase++;
607 else if (stack_begin == it->stack_cpu_addr) {
608 if (routine_id != it-> routine_id) {
609 // this stack data must be destroyed
610 m_destroy_stack.push_front(cur_el.stack_ptr_data);
611 it_end = it;
612 erase++;
613 break;
615 else {
616 // stack data is reused
617 m_stack_ptr_data = it->stack_ptr_data;
618 if (erase > 0) {
619 // all obsolete stack sections must be erased from the list
620 m_device.m_persist_list.erase(it_begin, ++it_end);
622 m_in_datalen +=
623 erase * sizeof(new_el->stack_ptr_data->mic_addr);
625 OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
626 m_stack_ptr_data->mic_addr);
627 return true;
630 else if (stack_begin < it->stack_cpu_addr) {
631 break;
635 if (erase > 0) {
636 // all obsolete stack sections must be erased from the list
637 m_device.m_persist_list.erase(it_begin, ++it_end);
638 m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
640 // new stack table is created
641 new_el = new PersistData(stack_begin, routine_id, buf_size);
642 // create MIC buffer
643 COIRESULT res;
644 uint32_t buffer_flags = 0;
646 // create buffer with large pages if data length exceeds
647 // large page threshold
648 if (buf_size >= __offload_use_2mb_buffers) {
649 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
651 res = COI::BufferCreate(buf_size,
652 COI_BUFFER_NORMAL,
653 buffer_flags,
656 &m_device.get_process(),
657 &new_el->stack_ptr_data->mic_buf);
658 if (res != COI_SUCCESS) {
659 if (m_status != 0) {
660 m_status->result = translate_coi_error(res);
662 else if (m_is_mandatory) {
663 report_coi_error(c_buf_create, res);
665 return false;
667 // make buffer valid on the device.
668 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
669 m_device.get_process(),
670 COI_BUFFER_VALID,
671 COI_BUFFER_NO_MOVE,
672 0, 0, 0);
673 if (res != COI_SUCCESS) {
674 if (m_status != 0) {
675 m_status->result = translate_coi_error(res);
677 else if (m_is_mandatory) {
678 report_coi_error(c_buf_set_state, res);
680 return false;
682 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
683 COI_PROCESS_SOURCE,
684 COI_BUFFER_INVALID,
685 COI_BUFFER_NO_MOVE,
686 0, 0, 0);
687 if (res != COI_SUCCESS) {
688 if (m_status != 0) {
689 m_status->result = translate_coi_error(res);
691 else if (m_is_mandatory) {
692 report_coi_error(c_buf_set_state, res);
694 return false;
696 // persistence algorithm requires target stack initialy to be nullified
697 if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
698 return false;
701 m_stack_ptr_data = new_el->stack_ptr_data;
702 init_mic_address(m_stack_ptr_data);
703 OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
704 m_stack_ptr_data->mic_addr);
705 m_device.m_persist_list.push_front(*new_el);
706 init_mic_address(new_el->stack_ptr_data);
707 *is_new = true;
708 return true;
711 bool OffloadDescriptor::setup_descriptors(
712 VarDesc *vars,
713 VarDesc2 *vars2,
714 int vars_total,
715 int entry_id,
716 const void *stack_addr
719 COIRESULT res;
721 OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
723 // make a copy of variable descriptors
724 m_vars_total = vars_total;
725 if (vars_total > 0) {
726 m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
727 if (m_vars == NULL)
728 LIBOFFLOAD_ERROR(c_malloc);
729 memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
730 m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
731 if (m_vars_extra == NULL)
732 LIBOFFLOAD_ERROR(c_malloc);
735 // dependencies
736 m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1));
737 if (m_in_deps == NULL)
738 LIBOFFLOAD_ERROR(c_malloc);
739 if (m_vars_total > 0) {
740 m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
741 if (m_out_deps == NULL)
742 LIBOFFLOAD_ERROR(c_malloc);
745 // copyin/copyout data length
746 m_in_datalen = 0;
747 m_out_datalen = 0;
749 // First pass over variable descriptors
750 // - Calculate size of the input and output non-pointer data
751 // - Allocate buffers for input and output pointers
752 for (int i = 0; i < m_vars_total; i++) {
753 void* alloc_base = NULL;
754 int64_t alloc_disp = 0;
755 int64_t alloc_size;
756 bool src_is_for_mic = (m_vars[i].direction.out ||
757 m_vars[i].into == NULL);
759 const char *var_sname = "";
760 if (vars2 != NULL && i < vars_total) {
761 if (vars2[i].sname != NULL) {
762 var_sname = vars2[i].sname;
765 OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
766 i, var_sname,
767 vardesc_direction_as_string[m_vars[i].direction.bits],
768 vardesc_type_as_string[m_vars[i].type.src]);
769 if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
770 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
771 vardesc_type_as_string[m_vars[i].type.dst]);
773 OFFLOAD_TRACE(2,
774 " type_src=%d, type_dstn=%d, direction=%d, "
775 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
776 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
777 m_vars[i].type.src,
778 m_vars[i].type.dst,
779 m_vars[i].direction.bits,
780 m_vars[i].alloc_if,
781 m_vars[i].free_if,
782 m_vars[i].align,
783 m_vars[i].mic_offset,
784 m_vars[i].flags.bits,
785 m_vars[i].offset,
786 m_vars[i].size,
787 m_vars[i].count,
788 m_vars[i].ptr,
789 m_vars[i].into);
791 if (m_vars[i].alloc != NULL) {
792 // array descriptor
793 const arr_desc *ap =
794 static_cast<const arr_desc*>(m_vars[i].alloc);
796 // debug dump
797 __arr_desc_dump(" ", "ALLOC", ap, 0);
799 __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
801 alloc_base = reinterpret_cast<void*>(ap->base);
804 m_vars_extra[i].cpu_disp = 0;
805 m_vars_extra[i].cpu_offset = 0;
806 m_vars_extra[i].src_data = 0;
807 m_vars_extra[i].read_rng_src = 0;
808 m_vars_extra[i].read_rng_dst = 0;
809 // flag is_arr_ptr_el is 1 only for var_descs generated
810 // for c_data_ptr_array type
811 if (i < vars_total) {
812 m_vars_extra[i].is_arr_ptr_el = 0;
815 switch (m_vars[i].type.src) {
816 case c_data_ptr_array:
818 const arr_desc *ap;
819 const VarDesc3 *vd3 =
820 static_cast<const VarDesc3*>(m_vars[i].ptr);
821 int flags = vd3->array_fields;
822 OFFLOAD_TRACE(2,
823 " pointer array flags = %04x\n", flags);
824 OFFLOAD_TRACE(2,
825 " pointer array type is %s\n",
826 vardesc_type_as_string[flags & 0x3f]);
827 ap = static_cast<const arr_desc*>(vd3->ptr_array);
828 __arr_desc_dump(" ", "ptr array", ap, 0);
829 if (m_vars[i].into) {
830 ap = static_cast<const arr_desc*>(m_vars[i].into);
831 __arr_desc_dump(
832 " ", "into array", ap, 0);
834 if ((flags & (1<<flag_align_is_array)) != 0) {
835 ap = static_cast<const arr_desc*>(vd3->align_array);
836 __arr_desc_dump(
837 " ", "align array", ap, 0);
839 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
840 ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
841 __arr_desc_dump(
842 " ", "alloc_if array", ap, 0);
844 if ((flags & (1<<flag_free_if_is_array)) != 0) {
845 ap = static_cast<const arr_desc*>(vd3->free_if_array);
846 __arr_desc_dump(
847 " ", "free_if array", ap, 0);
849 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
850 ap = static_cast<const arr_desc*>(vd3->extent_start);
851 __arr_desc_dump(
852 " ", "extent_start array", ap, 0);
853 } else if ((flags &
854 (1<<flag_extent_start_is_scalar)) != 0) {
855 OFFLOAD_TRACE(2,
856 " extent_start scalar = %d\n",
857 (int64_t)vd3->extent_start);
859 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
860 ap = static_cast<const arr_desc*>
861 (vd3->extent_elements);
862 __arr_desc_dump(
863 " ", "extent_elements array", ap, 0);
864 } else if ((flags &
865 (1<<flag_extent_elements_is_scalar)) != 0) {
866 OFFLOAD_TRACE(2,
867 " extent_elements scalar = %d\n",
868 (int64_t)vd3->extent_elements);
870 if ((flags & (1<<flag_into_start_is_array)) != 0) {
871 ap = static_cast<const arr_desc*>(vd3->into_start);
872 __arr_desc_dump(
873 " ", "into_start array", ap, 0);
874 } else if ((flags &
875 (1<<flag_into_start_is_scalar)) != 0) {
876 OFFLOAD_TRACE(2,
877 " into_start scalar = %d\n",
878 (int64_t)vd3->into_start);
880 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
881 ap = static_cast<const arr_desc*>(vd3->into_elements);
882 __arr_desc_dump(
883 " ", "into_elements array", ap, 0);
884 } else if ((flags &
885 (1<<flag_into_elements_is_scalar)) != 0) {
886 OFFLOAD_TRACE(2,
887 " into_elements scalar = %d\n",
888 (int64_t)vd3->into_elements);
890 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
891 ap = static_cast<const arr_desc*>(vd3->alloc_start);
892 __arr_desc_dump(
893 " ", "alloc_start array", ap, 0);
894 } else if ((flags &
895 (1<<flag_alloc_start_is_scalar)) != 0) {
896 OFFLOAD_TRACE(2,
897 " alloc_start scalar = %d\n",
898 (int64_t)vd3->alloc_start);
900 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
901 ap = static_cast<const arr_desc*>(vd3->alloc_elements);
902 __arr_desc_dump(
903 " ", "alloc_elements array", ap, 0);
904 } else if ((flags &
905 (1<<flag_alloc_elements_is_scalar)) != 0) {
906 OFFLOAD_TRACE(2,
907 " alloc_elements scalar = %d\n",
908 (int64_t)vd3->alloc_elements);
911 if (!gen_var_descs_for_pointer_array(i)) {
912 return false;
914 break;
916 case c_data:
917 case c_void_ptr:
918 case c_cean_var:
919 // In all uses later
920 // VarDesc.size will have the length of the data to be
921 // transferred
922 // VarDesc.disp will have an offset from base
923 if (m_vars[i].type.src == c_cean_var) {
924 // array descriptor
925 const arr_desc *ap =
926 static_cast<const arr_desc*>(m_vars[i].ptr);
928 // debug dump
929 __arr_desc_dump("", "IN/OUT", ap, 0);
931 // offset and length are derived from the array descriptor
932 __arr_data_offset_and_length(ap, m_vars[i].disp,
933 m_vars[i].size);
934 if (!is_arr_desc_contiguous(ap)) {
935 m_vars[i].flags.is_noncont_src = 1;
936 m_vars_extra[i].read_rng_src =
937 init_read_ranges_arr_desc(ap);
939 // all necessary information about length and offset is
940 // transferred in var descriptor. There is no need to send
941 // array descriptor to the target side.
942 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
944 else {
945 m_vars[i].size *= m_vars[i].count;
946 m_vars[i].disp = 0;
949 if (m_vars[i].direction.bits) {
950 // make sure that transfer size > 0
951 if (m_vars[i].size <= 0) {
952 LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
953 exit(1);
956 if (m_vars[i].flags.is_static) {
957 PtrData *ptr_data;
959 // find data associated with variable
960 if (!find_ptr_data(ptr_data,
961 m_vars[i].ptr,
962 m_vars[i].disp,
963 m_vars[i].size,
964 false)) {
965 return false;
968 if (ptr_data != 0) {
969 // offset to base from the beginning of the buffer
970 // memory
971 m_vars[i].offset =
972 (char*) m_vars[i].ptr -
973 (char*) ptr_data->cpu_addr.start();
975 else {
976 m_vars[i].flags.is_static = false;
977 if (m_vars[i].into == NULL) {
978 m_vars[i].flags.is_static_dstn = false;
981 m_vars_extra[i].src_data = ptr_data;
984 if (m_is_openmp) {
985 if (m_vars[i].flags.is_static) {
986 // Static data is transferred only by omp target
987 // update construct which passes zeros for
988 // alloc_if and free_if.
989 if (m_vars[i].alloc_if || m_vars[i].free_if) {
990 m_vars[i].direction.bits = c_parameter_nocopy;
993 else {
994 AutoData *auto_data;
995 if (m_vars[i].alloc_if) {
996 auto_data = m_device.insert_auto_data(
997 m_vars[i].ptr, m_vars[i].size);
998 auto_data->add_reference();
1000 else {
1001 // TODO: what should be done if var is not in
1002 // the table?
1003 auto_data = m_device.find_auto_data(
1004 m_vars[i].ptr);
1007 // For automatic variables data is transferred
1008 // only if alloc_if == 0 && free_if == 0
1009 // or reference count is 1
1010 if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
1011 auto_data != 0 &&
1012 auto_data->get_reference() != 1) {
1013 m_vars[i].direction.bits = c_parameter_nocopy;
1016 // save data for later use
1017 m_vars_extra[i].auto_data = auto_data;
1021 if (m_vars[i].direction.in &&
1022 !m_vars[i].flags.is_static) {
1023 m_in_datalen += m_vars[i].size;
1025 // for non-static target destination defined as CEAN
1026 // expression we pass to target its size and dist
1027 if (m_vars[i].into == NULL &&
1028 m_vars[i].type.src == c_cean_var) {
1029 m_in_datalen += 2 * sizeof(uint64_t);
1031 m_need_runfunction = true;
1033 if (m_vars[i].direction.out &&
1034 !m_vars[i].flags.is_static) {
1035 m_out_datalen += m_vars[i].size;
1036 m_need_runfunction = true;
1039 break;
1041 case c_dv:
1042 if (m_vars[i].direction.bits ||
1043 m_vars[i].alloc_if ||
1044 m_vars[i].free_if) {
1045 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
1047 // debug dump
1048 __dv_desc_dump("IN/OUT", dvp);
1050 // send dope vector contents excluding base
1051 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1052 m_need_runfunction = true;
1054 break;
1056 case c_string_ptr:
1057 if ((m_vars[i].direction.bits ||
1058 m_vars[i].alloc_if ||
1059 m_vars[i].free_if) &&
1060 m_vars[i].size == 0) {
1061 m_vars[i].size = 1;
1062 m_vars[i].count =
1063 strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
1065 /* fallthru */
1067 case c_data_ptr:
1068 if (m_vars[i].flags.is_stack_buf &&
1069 !m_vars[i].direction.bits &&
1070 m_vars[i].alloc_if) {
1071 // this var_desc is for stack buffer
1072 bool is_new;
1074 if (!offload_stack_memory_manager(
1075 stack_addr, entry_id,
1076 m_vars[i].count, m_vars[i].align, &is_new)) {
1077 return false;
1079 if (is_new) {
1080 m_compute_buffers.push_back(
1081 m_stack_ptr_data->mic_buf);
1082 m_device.m_persist_list.front().cpu_stack_addr =
1083 static_cast<char*>(m_vars[i].ptr);
1085 else {
1086 m_vars[i].flags.sink_addr = 1;
1087 m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
1089 m_vars[i].size = m_destroy_stack.size();
1090 m_vars_extra[i].src_data = m_stack_ptr_data;
1091 // need to add reference for buffer
1092 m_need_runfunction = true;
1093 break;
1095 /* fallthru */
1097 case c_cean_var_ptr:
1098 case c_dv_ptr:
1099 if (m_vars[i].type.src == c_cean_var_ptr) {
1100 // array descriptor
1101 const arr_desc *ap =
1102 static_cast<const arr_desc*>(m_vars[i].ptr);
1104 // debug dump
1105 __arr_desc_dump("", "IN/OUT", ap, 1);
1107 // offset and length are derived from the array descriptor
1108 __arr_data_offset_and_length(ap, m_vars[i].disp,
1109 m_vars[i].size);
1111 if (!is_arr_desc_contiguous(ap)) {
1112 m_vars[i].flags.is_noncont_src = 1;
1113 m_vars_extra[i].read_rng_src =
1114 init_read_ranges_arr_desc(ap);
1116 // all necessary information about length and offset is
1117 // transferred in var descriptor. There is no need to send
1118 // array descriptor to the target side.
1119 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
1121 else if (m_vars[i].type.src == c_dv_ptr) {
1122 // need to send DV to the device unless it is 'nocopy'
1123 if (m_vars[i].direction.bits ||
1124 m_vars[i].alloc_if ||
1125 m_vars[i].free_if) {
1126 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
1128 // debug dump
1129 __dv_desc_dump("IN/OUT", dvp);
1131 m_vars[i].direction.bits = c_parameter_in;
1134 // no displacement
1135 m_vars[i].disp = 0;
1137 else {
1138 // c_data_ptr or c_string_ptr
1139 m_vars[i].size *= m_vars[i].count;
1140 m_vars[i].disp = 0;
1143 if (m_vars[i].direction.bits ||
1144 m_vars[i].alloc_if ||
1145 m_vars[i].free_if) {
1146 PtrData *ptr_data;
1148 // check that buffer length >= 0
1149 if (m_vars[i].alloc_if &&
1150 m_vars[i].disp + m_vars[i].size < 0) {
1151 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1152 exit(1);
1155 // base address
1156 void *base = *static_cast<void**>(m_vars[i].ptr);
1158 // allocate buffer if we have no INTO and don't need
1159 // allocation for the ptr at target
1160 if (src_is_for_mic) {
1161 if (m_vars[i].flags.is_stack_buf) {
1162 // for stack persistent objects ptr data is created
1163 // by var_desc with number 0.
1164 // Its ptr_data is stored at m_stack_ptr_data
1165 ptr_data = m_stack_ptr_data;
1166 m_vars[i].flags.sink_addr = 1;
1168 else if (m_vars[i].alloc_if) {
1169 // add new entry
1170 if (!alloc_ptr_data(
1171 ptr_data,
1172 base,
1173 (alloc_base != NULL) ?
1174 alloc_disp : m_vars[i].disp,
1175 (alloc_base != NULL) ?
1176 alloc_size : m_vars[i].size,
1177 alloc_disp,
1178 (alloc_base != NULL) ?
1179 0 : m_vars[i].align)) {
1180 return false;
1183 if (ptr_data->add_reference() == 0 &&
1184 ptr_data->mic_buf != 0) {
1185 // add buffer to the list of buffers that
1186 // are passed to dispatch call
1187 m_compute_buffers.push_back(
1188 ptr_data->mic_buf);
1190 else {
1191 // will send buffer address to device
1192 m_vars[i].flags.sink_addr = 1;
1195 if (!ptr_data->is_static) {
1196 // need to add reference for buffer
1197 m_need_runfunction = true;
1200 else {
1201 bool error_if_not_found = true;
1202 if (m_is_openmp) {
1203 // For omp target update variable is ignored
1204 // if it does not exist.
1205 if (!m_vars[i].alloc_if &&
1206 !m_vars[i].free_if) {
1207 error_if_not_found = false;
1211 // use existing association from pointer table
1212 if (!find_ptr_data(ptr_data,
1213 base,
1214 m_vars[i].disp,
1215 m_vars[i].size,
1216 error_if_not_found)) {
1217 return false;
1220 if (m_is_openmp) {
1221 // make var nocopy if it does not exist
1222 if (ptr_data == 0) {
1223 m_vars[i].direction.bits =
1224 c_parameter_nocopy;
1228 if (ptr_data != 0) {
1229 m_vars[i].flags.sink_addr = 1;
1233 if (ptr_data != 0) {
1234 if (m_is_openmp) {
1235 // data is transferred only if
1236 // alloc_if == 0 && free_if == 0
1237 // or reference count is 1
1238 if ((m_vars[i].alloc_if ||
1239 m_vars[i].free_if) &&
1240 ptr_data->get_reference() != 1) {
1241 m_vars[i].direction.bits =
1242 c_parameter_nocopy;
1246 if (ptr_data->alloc_disp != 0) {
1247 m_vars[i].flags.alloc_disp = 1;
1248 m_in_datalen += sizeof(alloc_disp);
1251 if (m_vars[i].flags.sink_addr) {
1252 // get buffers's address on the sink
1253 if (!init_mic_address(ptr_data)) {
1254 return false;
1257 m_in_datalen += sizeof(ptr_data->mic_addr);
1260 if (!ptr_data->is_static && m_vars[i].free_if) {
1261 // need to decrement buffer reference on target
1262 m_need_runfunction = true;
1265 // offset to base from the beginning of the buffer
1266 // memory
1267 m_vars[i].offset = (char*) base -
1268 (char*) ptr_data->cpu_addr.start();
1270 // copy other pointer properties to var descriptor
1271 m_vars[i].mic_offset = ptr_data->mic_offset;
1272 m_vars[i].flags.is_static = ptr_data->is_static;
1275 else {
1276 if (!find_ptr_data(ptr_data,
1277 base,
1278 m_vars[i].disp,
1279 m_vars[i].size,
1280 false)) {
1281 return false;
1283 if (ptr_data) {
1284 m_vars[i].offset =
1285 (char*) base -
1286 (char*) ptr_data->cpu_addr.start();
1290 // save pointer data
1291 m_vars_extra[i].src_data = ptr_data;
1293 break;
1295 case c_func_ptr:
1296 if (m_vars[i].direction.in) {
1297 m_in_datalen += __offload_funcs.max_name_length();
1299 if (m_vars[i].direction.out) {
1300 m_out_datalen += __offload_funcs.max_name_length();
1302 m_need_runfunction = true;
1303 break;
1305 case c_dv_data:
1306 case c_dv_ptr_data:
1307 case c_dv_data_slice:
1308 case c_dv_ptr_data_slice:
1309 ArrDesc *dvp;
1310 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1311 const arr_desc *ap;
1312 ap = static_cast<const arr_desc*>(m_vars[i].ptr);
1314 dvp = (m_vars[i].type.src == c_dv_data_slice) ?
1315 reinterpret_cast<ArrDesc*>(ap->base) :
1316 *reinterpret_cast<ArrDesc**>(ap->base);
1318 else {
1319 dvp = (m_vars[i].type.src == c_dv_data) ?
1320 static_cast<ArrDesc*>(m_vars[i].ptr) :
1321 *static_cast<ArrDesc**>(m_vars[i].ptr);
1324 // if allocatable dope vector isn't allocated don't
1325 // transfer its data
1326 if (!__dv_is_allocated(dvp)) {
1327 m_vars[i].direction.bits = c_parameter_nocopy;
1328 m_vars[i].alloc_if = 0;
1329 m_vars[i].free_if = 0;
1331 if (m_vars[i].direction.bits ||
1332 m_vars[i].alloc_if ||
1333 m_vars[i].free_if) {
1334 const arr_desc *ap;
1336 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1337 ap = static_cast<const arr_desc*>(m_vars[i].ptr);
1339 // debug dump
1340 __arr_desc_dump("", "IN/OUT", ap, 0);
1342 if (!__dv_is_contiguous(dvp)) {
1343 m_vars[i].flags.is_noncont_src = 1;
1344 m_vars_extra[i].read_rng_src =
1345 init_read_ranges_dv(dvp);
1348 // size and displacement
1349 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1350 // offset and length are derived from the
1351 // array descriptor
1352 __arr_data_offset_and_length(ap,
1353 m_vars[i].disp,
1354 m_vars[i].size);
1355 if (m_vars[i].direction.bits) {
1356 if (!is_arr_desc_contiguous(ap)) {
1357 if (m_vars[i].flags.is_noncont_src) {
1358 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
1359 return false;
1361 m_vars[i].flags.is_noncont_src = 1;
1362 m_vars_extra[i].read_rng_src =
1363 init_read_ranges_arr_desc(ap);
1367 else {
1368 if (m_vars[i].flags.has_length) {
1369 m_vars[i].size =
1370 __dv_data_length(dvp, m_vars[i].count);
1372 else {
1373 m_vars[i].size = __dv_data_length(dvp);
1375 m_vars[i].disp = 0;
1378 // check that length >= 0
1379 if (m_vars[i].alloc_if &&
1380 (m_vars[i].disp + m_vars[i].size < 0)) {
1381 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1382 exit(1);
1385 // base address
1386 void *base = reinterpret_cast<void*>(dvp->Base);
1387 PtrData *ptr_data;
1389 // allocate buffer if we have no INTO and don't need
1390 // allocation for the ptr at target
1391 if (src_is_for_mic) {
1392 if (m_vars[i].alloc_if) {
1393 // add new entry
1394 if (!alloc_ptr_data(
1395 ptr_data,
1396 base,
1397 (alloc_base != NULL) ?
1398 alloc_disp : m_vars[i].disp,
1399 (alloc_base != NULL) ?
1400 alloc_size : m_vars[i].size,
1401 alloc_disp,
1402 (alloc_base != NULL) ?
1403 0 : m_vars[i].align)) {
1404 return false;
1407 if (ptr_data->add_reference() == 0 &&
1408 ptr_data->mic_buf != 0) {
1409 // add buffer to the list of buffers
1410 // that are passed to dispatch call
1411 m_compute_buffers.push_back(
1412 ptr_data->mic_buf);
1414 else {
1415 // will send buffer address to device
1416 m_vars[i].flags.sink_addr = 1;
1419 if (!ptr_data->is_static) {
1420 // need to add reference for buffer
1421 m_need_runfunction = true;
1424 else {
1425 bool error_if_not_found = true;
1426 if (m_is_openmp) {
1427 // For omp target update variable is ignored
1428 // if it does not exist.
1429 if (!m_vars[i].alloc_if &&
1430 !m_vars[i].free_if) {
1431 error_if_not_found = false;
1435 // use existing association from pointer table
1436 if (!find_ptr_data(ptr_data,
1437 base,
1438 m_vars[i].disp,
1439 m_vars[i].size,
1440 error_if_not_found)) {
1441 return false;
1444 if (m_is_openmp) {
1445 // make var nocopy if it does not exist
1446 if (ptr_data == 0) {
1447 m_vars[i].direction.bits =
1448 c_parameter_nocopy;
1452 if (ptr_data != 0) {
1453 // need to update base in dope vector on device
1454 m_vars[i].flags.sink_addr = 1;
1458 if (ptr_data != 0) {
1459 if (m_is_openmp) {
1460 // data is transferred only if
1461 // alloc_if == 0 && free_if == 0
1462 // or reference count is 1
1463 if ((m_vars[i].alloc_if ||
1464 m_vars[i].free_if) &&
1465 ptr_data->get_reference() != 1) {
1466 m_vars[i].direction.bits =
1467 c_parameter_nocopy;
1471 if (ptr_data->alloc_disp != 0) {
1472 m_vars[i].flags.alloc_disp = 1;
1473 m_in_datalen += sizeof(alloc_disp);
1476 if (m_vars[i].flags.sink_addr) {
1477 // get buffers's address on the sink
1478 if (!init_mic_address(ptr_data)) {
1479 return false;
1482 m_in_datalen += sizeof(ptr_data->mic_addr);
1485 if (!ptr_data->is_static && m_vars[i].free_if) {
1486 // need to decrement buffer reference on target
1487 m_need_runfunction = true;
1490 // offset to base from the beginning of the buffer
1491 // memory
1492 m_vars[i].offset =
1493 (char*) base -
1494 (char*) ptr_data->cpu_addr.start();
1496 // copy other pointer properties to var descriptor
1497 m_vars[i].mic_offset = ptr_data->mic_offset;
1498 m_vars[i].flags.is_static = ptr_data->is_static;
1501 else { // !src_is_for_mic
1502 if (!find_ptr_data(ptr_data,
1503 base,
1504 m_vars[i].disp,
1505 m_vars[i].size,
1506 false)) {
1507 return false;
1509 m_vars[i].offset = !ptr_data ? 0 :
1510 (char*) base -
1511 (char*) ptr_data->cpu_addr.start();
1514 // save pointer data
1515 m_vars_extra[i].src_data = ptr_data;
1517 break;
1519 default:
1520 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
1521 LIBOFFLOAD_ABORT;
1523 if (m_vars[i].type.src == c_data_ptr_array) {
1524 continue;
1527 if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
1528 m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
1529 m_device.m_persist_list.front().cpu_stack_addr;
1531 // if source is used at CPU save its offset and disp
1532 if (m_vars[i].into == NULL || m_vars[i].direction.in) {
1533 m_vars_extra[i].cpu_offset = m_vars[i].offset;
1534 m_vars_extra[i].cpu_disp = m_vars[i].disp;
1537 // If "into" is define we need to do the similar work for it
1538 if (!m_vars[i].into) {
1539 continue;
1542 int64_t into_disp =0, into_offset = 0;
1544 switch (m_vars[i].type.dst) {
1545 case c_data_ptr_array:
1546 break;
1547 case c_data:
1548 case c_void_ptr:
1549 case c_cean_var: {
1550 int64_t size = m_vars[i].size;
1552 if (m_vars[i].type.dst == c_cean_var) {
1553 // array descriptor
1554 const arr_desc *ap =
1555 static_cast<const arr_desc*>(m_vars[i].into);
1557 // debug dump
1558 __arr_desc_dump(" ", "INTO", ap, 0);
1560 // offset and length are derived from the array descriptor
1561 __arr_data_offset_and_length(ap, into_disp, size);
1563 if (!is_arr_desc_contiguous(ap)) {
1564 m_vars[i].flags.is_noncont_dst = 1;
1565 m_vars_extra[i].read_rng_dst =
1566 init_read_ranges_arr_desc(ap);
1567 if (!cean_ranges_match(
1568 m_vars_extra[i].read_rng_src,
1569 m_vars_extra[i].read_rng_dst)) {
1570 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1571 exit(1);
1574 m_vars[i].into = reinterpret_cast<void*>(ap->base);
1577 int64_t size_src = m_vars_extra[i].read_rng_src ?
1578 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1579 m_vars[i].size;
1580 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
1581 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1582 size;
1583 // It's supposed that "into" size must be not less
1584 // than src size
1585 if (size_src > size_dst) {
1586 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1587 size_src, size_dst);
1588 exit(1);
1591 if (m_vars[i].direction.bits) {
1592 if (m_vars[i].flags.is_static_dstn) {
1593 PtrData *ptr_data;
1595 // find data associated with variable
1596 if (!find_ptr_data(ptr_data, m_vars[i].into,
1597 into_disp, size, false)) {
1598 return false;
1600 if (ptr_data != 0) {
1601 // offset to base from the beginning of the buffer
1602 // memory
1603 into_offset =
1604 (char*) m_vars[i].into -
1605 (char*) ptr_data->cpu_addr.start();
1607 else {
1608 m_vars[i].flags.is_static_dstn = false;
1610 m_vars_extra[i].dst_data = ptr_data;
1614 if (m_vars[i].direction.in &&
1615 !m_vars[i].flags.is_static_dstn) {
1616 m_in_datalen += m_vars[i].size;
1618 // for non-static target destination defined as CEAN
1619 // expression we pass to target its size and dist
1620 if (m_vars[i].type.dst == c_cean_var) {
1621 m_in_datalen += 2 * sizeof(uint64_t);
1623 m_need_runfunction = true;
1625 break;
1628 case c_dv:
1629 if (m_vars[i].direction.bits ||
1630 m_vars[i].alloc_if ||
1631 m_vars[i].free_if) {
1632 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
1634 // debug dump
1635 __dv_desc_dump("INTO", dvp);
1637 // send dope vector contents excluding base
1638 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1639 m_need_runfunction = true;
1641 break;
1643 case c_string_ptr:
1644 case c_data_ptr:
1645 case c_cean_var_ptr:
1646 case c_dv_ptr: {
1647 int64_t size = m_vars[i].size;
1649 if (m_vars[i].type.dst == c_cean_var_ptr) {
1650 // array descriptor
1651 const arr_desc *ap =
1652 static_cast<const arr_desc*>(m_vars[i].into);
1654 // debug dump
1655 __arr_desc_dump(" ", "INTO", ap, 1);
1657 // offset and length are derived from the array descriptor
1658 __arr_data_offset_and_length(ap, into_disp, size);
1660 if (!is_arr_desc_contiguous(ap)) {
1661 m_vars[i].flags.is_noncont_src = 1;
1662 m_vars_extra[i].read_rng_dst =
1663 init_read_ranges_arr_desc(ap);
1664 if (!cean_ranges_match(
1665 m_vars_extra[i].read_rng_src,
1666 m_vars_extra[i].read_rng_dst)) {
1667 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1670 m_vars[i].into = reinterpret_cast<char**>(ap->base);
1672 else if (m_vars[i].type.dst == c_dv_ptr) {
1673 // need to send DV to the device unless it is 'nocopy'
1674 if (m_vars[i].direction.bits ||
1675 m_vars[i].alloc_if ||
1676 m_vars[i].free_if) {
1677 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
1679 // debug dump
1680 __dv_desc_dump("INTO", dvp);
1682 m_vars[i].direction.bits = c_parameter_in;
1686 int64_t size_src = m_vars_extra[i].read_rng_src ?
1687 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1688 m_vars[i].size;
1689 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
1690 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1691 size;
1692 // It's supposed that "into" size must be not less than
1693 // src size
1694 if (size_src > size_dst) {
1695 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1696 size_src, size_dst);
1697 exit(1);
1700 if (m_vars[i].direction.bits) {
1701 PtrData *ptr_data;
1703 // base address
1704 void *base = *static_cast<void**>(m_vars[i].into);
1706 if (m_vars[i].direction.in) {
1707 // allocate buffer
1708 if (m_vars[i].flags.is_stack_buf) {
1709 // for stack persistent objects ptr data is created
1710 // by var_desc with number 0.
1711 // Its ptr_data is stored at m_stack_ptr_data
1712 ptr_data = m_stack_ptr_data;
1713 m_vars[i].flags.sink_addr = 1;
1715 else if (m_vars[i].alloc_if) {
1716 // add new entry
1717 if (!alloc_ptr_data(
1718 ptr_data,
1719 base,
1720 (alloc_base != NULL) ?
1721 alloc_disp : into_disp,
1722 (alloc_base != NULL) ?
1723 alloc_size : size,
1724 alloc_disp,
1725 (alloc_base != NULL) ?
1726 0 : m_vars[i].align)) {
1727 return false;
1730 if (ptr_data->add_reference() == 0 &&
1731 ptr_data->mic_buf != 0) {
1732 // add buffer to the list of buffers that
1733 // are passed to dispatch call
1734 m_compute_buffers.push_back(
1735 ptr_data->mic_buf);
1737 else {
1738 // will send buffer address to device
1739 m_vars[i].flags.sink_addr = 1;
1742 if (!ptr_data->is_static) {
1743 // need to add reference for buffer
1744 m_need_runfunction = true;
1747 else {
1748 // use existing association from pointer table
1749 if (!find_ptr_data(ptr_data, base, into_disp, size)) {
1750 return false;
1752 m_vars[i].flags.sink_addr = 1;
1755 if (ptr_data->alloc_disp != 0) {
1756 m_vars[i].flags.alloc_disp = 1;
1757 m_in_datalen += sizeof(alloc_disp);
1760 if (m_vars[i].flags.sink_addr) {
1761 // get buffers's address on the sink
1762 if (!init_mic_address(ptr_data)) {
1763 return false;
1766 m_in_datalen += sizeof(ptr_data->mic_addr);
1769 if (!ptr_data->is_static && m_vars[i].free_if) {
1770 // need to decrement buffer reference on target
1771 m_need_runfunction = true;
1774 // copy other pointer properties to var descriptor
1775 m_vars[i].mic_offset = ptr_data->mic_offset;
1776 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
1778 else {
1779 if (!find_ptr_data(ptr_data,
1780 base,
1781 into_disp,
1782 m_vars[i].size,
1783 false)) {
1784 return false;
1787 if (ptr_data) {
1788 into_offset = ptr_data ?
1789 (char*) base -
1790 (char*) ptr_data->cpu_addr.start() :
1793 // save pointer data
1794 m_vars_extra[i].dst_data = ptr_data;
1796 break;
1799 case c_func_ptr:
1800 break;
1802 case c_dv_data:
1803 case c_dv_ptr_data:
1804 case c_dv_data_slice:
1805 case c_dv_ptr_data_slice:
1806 if (m_vars[i].direction.bits ||
1807 m_vars[i].alloc_if ||
1808 m_vars[i].free_if) {
1809 const arr_desc *ap;
1810 ArrDesc *dvp;
1811 PtrData *ptr_data;
1812 int64_t disp;
1813 int64_t size;
1815 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
1816 ap = static_cast<const arr_desc*>(m_vars[i].into);
1818 // debug dump
1819 __arr_desc_dump(" ", "INTO", ap, 0);
1821 dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
1822 reinterpret_cast<ArrDesc*>(ap->base) :
1823 *reinterpret_cast<ArrDesc**>(ap->base);
1825 else {
1826 dvp = (m_vars[i].type.dst == c_dv_data) ?
1827 static_cast<ArrDesc*>(m_vars[i].into) :
1828 *static_cast<ArrDesc**>(m_vars[i].into);
1830 if (!__dv_is_contiguous(dvp)) {
1831 m_vars[i].flags.is_noncont_dst = 1;
1832 m_vars_extra[i].read_rng_dst =
1833 init_read_ranges_dv(dvp);
1835 // size and displacement
1836 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
1837 // offset and length are derived from the array
1838 // descriptor
1839 __arr_data_offset_and_length(ap, into_disp, size);
1840 if (m_vars[i].direction.bits) {
1841 if (!is_arr_desc_contiguous(ap)) {
1842 if (m_vars[i].flags.is_noncont_dst) {
1843 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
1844 return false;
1846 m_vars[i].flags.is_noncont_dst = 1;
1847 m_vars_extra[i].read_rng_dst =
1848 init_read_ranges_arr_desc(ap);
1849 if (!cean_ranges_match(
1850 m_vars_extra[i].read_rng_src,
1851 m_vars_extra[i].read_rng_dst)) {
1852 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1857 else {
1858 if (m_vars[i].flags.has_length) {
1859 size = __dv_data_length(dvp, m_vars[i].count);
1861 else {
1862 size = __dv_data_length(dvp);
1864 disp = 0;
1867 int64_t size_src =
1868 m_vars_extra[i].read_rng_src ?
1869 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1870 m_vars[i].size;
1871 int64_t size_dst =
1872 m_vars_extra[i].read_rng_dst ?
1873 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1874 size;
1875 // It's supposed that "into" size must be not less
1876 // than src size
1877 if (size_src > size_dst) {
1878 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1879 size_src, size_dst);
1880 exit(1);
1883 // base address
1884 void *base = reinterpret_cast<void*>(dvp->Base);
1886 // allocate buffer
1887 if (m_vars[i].direction.in) {
1888 if (m_vars[i].alloc_if) {
1889 // add new entry
1890 if (!alloc_ptr_data(
1891 ptr_data,
1892 base,
1893 (alloc_base != NULL) ?
1894 alloc_disp : into_disp,
1895 (alloc_base != NULL) ?
1896 alloc_size : size,
1897 alloc_disp,
1898 (alloc_base != NULL) ?
1899 0 : m_vars[i].align)) {
1900 return false;
1902 if (ptr_data->add_reference() == 0 &&
1903 ptr_data->mic_buf !=0) {
1904 // add buffer to the list of buffers
1905 // that are passed to dispatch call
1906 m_compute_buffers.push_back(
1907 ptr_data->mic_buf);
1909 else {
1910 // will send buffer address to device
1911 m_vars[i].flags.sink_addr = 1;
1914 if (!ptr_data->is_static) {
1915 // need to add reference for buffer
1916 m_need_runfunction = true;
1919 else {
1920 // use existing association from pointer table
1921 if (!find_ptr_data(ptr_data, base, into_disp, size)) {
1922 return false;
1925 // need to update base in dope vector on device
1926 m_vars[i].flags.sink_addr = 1;
1929 if (ptr_data->alloc_disp != 0) {
1930 m_vars[i].flags.alloc_disp = 1;
1931 m_in_datalen += sizeof(alloc_disp);
1934 if (m_vars[i].flags.sink_addr) {
1935 // get buffers's address on the sink
1936 if (!init_mic_address(ptr_data)) {
1937 return false;
1939 m_in_datalen += sizeof(ptr_data->mic_addr);
1942 if (!ptr_data->is_static && m_vars[i].free_if) {
1943 // need to decrement buffer reference on target
1944 m_need_runfunction = true;
1947 // offset to base from the beginning of the buffer
1948 // memory
1949 into_offset =
1950 (char*) base - (char*) ptr_data->cpu_addr.start();
1952 // copy other pointer properties to var descriptor
1953 m_vars[i].mic_offset = ptr_data->mic_offset;
1954 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
1956 else { // src_is_for_mic
1957 if (!find_ptr_data(ptr_data,
1958 base,
1959 into_disp,
1960 size,
1961 false)) {
1962 return false;
1964 into_offset = !ptr_data ?
1966 (char*) base - (char*) ptr_data->cpu_addr.start();
1969 // save pointer data
1970 m_vars_extra[i].dst_data = ptr_data;
1972 break;
1974 default:
1975 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
1976 LIBOFFLOAD_ABORT;
1978 // if into is used at CPU save its offset and disp
1979 if (m_vars[i].direction.out) {
1980 m_vars_extra[i].cpu_offset = into_offset;
1981 m_vars_extra[i].cpu_disp = into_disp;
1983 else {
1984 if (m_vars[i].flags.is_stack_buf) {
1985 into_offset = static_cast<char*>(m_vars[i].into) -
1986 m_device.m_persist_list.front().cpu_stack_addr;
1988 m_vars[i].offset = into_offset;
1989 m_vars[i].disp = into_disp;
1993 return true;
1996 bool OffloadDescriptor::setup_misc_data(const char *name)
1998 OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
2000 // we can skip run functon call together with wait if offloaded
2001 // region is empty and there is no user defined non-pointer IN/OUT data
2002 if (m_need_runfunction) {
2003 // variable descriptors are sent as input data
2004 m_in_datalen += m_vars_total * sizeof(VarDesc);
2006 // timer data is sent as a part of the output data
2007 m_out_datalen += OFFLOAD_TIMER_DATALEN();
2009 // max from input data and output data length
2010 uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
2011 m_out_datalen;
2013 // Misc data has the following layout
2014 // <Function Descriptor>
2015 // <Function Name>
2016 // <In/Out Data> (optional)
2018 // We can transfer copyin/copyout data in misc/return data which can
2019 // be passed to run function call if its size does not exceed
2020 // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
2021 // buffer for it.
2023 m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
2024 m_func_desc_size = (m_func_desc_size + 7) & ~7;
2026 int misc_data_offset = 0;
2027 int misc_data_size = 0;
2028 if (data_len > 0) {
2029 if (m_func_desc_size +
2030 m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
2031 m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
2032 // use misc/return data for copyin/copyout
2033 misc_data_offset = m_func_desc_size;
2034 misc_data_size = data_len;
2036 else {
2037 OffloadTimer timer_buf(get_timer_data(),
2038 c_offload_host_alloc_data_buffer);
2040 // send/receive data using buffer
2041 COIRESULT res = COI::BufferCreate(data_len,
2042 COI_BUFFER_NORMAL,
2043 0, 0,
2044 1, &m_device.get_process(),
2045 &m_inout_buf);
2046 if (res != COI_SUCCESS) {
2047 if (m_status != 0) {
2048 m_status->result = translate_coi_error(res);
2049 return false;
2051 report_coi_error(c_buf_create, res);
2054 m_compute_buffers.push_back(m_inout_buf);
2055 m_destroy_buffers.push_back(m_inout_buf);
2059 // initialize function descriptor
2060 m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
2061 misc_data_size);
2062 if (m_func_desc == NULL)
2063 LIBOFFLOAD_ERROR(c_malloc);
2064 m_func_desc->console_enabled = console_enabled;
2065 m_func_desc->timer_enabled =
2066 timer_enabled || (offload_report_level && offload_report_enabled);
2067 m_func_desc->offload_report_level = offload_report_level;
2068 m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
2069 m_func_desc->in_datalen = m_in_datalen;
2070 m_func_desc->out_datalen = m_out_datalen;
2071 m_func_desc->vars_num = m_vars_total;
2072 m_func_desc->data_offset = misc_data_offset;
2074 // append entry name
2075 strcpy(m_func_desc->data, name);
2078 return true;
2081 bool OffloadDescriptor::wait_dependencies(
2082 const void **waits,
2083 int num_waits
2086 OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
2087 bool ret = true;
2089 for (int i = 0; i < num_waits; i++) {
2091 OffloadDescriptor *task = m_device.find_signal(waits[i], true);
2092 if (task == 0) {
2093 LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
2094 waits[i]);
2095 LIBOFFLOAD_ABORT;
2098 if (!task->offload_finish()) {
2099 ret = false;
2102 task->cleanup();
2103 delete task;
2106 return ret;
2109 bool OffloadDescriptor::offload(
2110 const char *name,
2111 bool is_empty,
2112 VarDesc *vars,
2113 VarDesc2 *vars2,
2114 int vars_total,
2115 const void **waits,
2116 int num_waits,
2117 const void **signal,
2118 int entry_id,
2119 const void *stack_addr
2122 if (signal == 0) {
2123 OFFLOAD_DEBUG_TRACE_1(1,
2124 GET_OFFLOAD_NUMBER(get_timer_data()),
2125 c_offload_init_func,
2126 "Offload function %s, is_empty=%d, #varDescs=%d, "
2127 "#waits=%d, signal=none\n",
2128 name, is_empty, vars_total, num_waits);
2129 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2130 c_offload_sent_pointer_data,
2131 "#Wait : %d \n", num_waits);
2132 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2133 c_offload_signal,
2134 "none %d\n", 0);
2136 else {
2137 OFFLOAD_DEBUG_TRACE_1(1,
2138 GET_OFFLOAD_NUMBER(get_timer_data()),
2139 c_offload_init_func,
2140 "Offload function %s, is_empty=%d, #varDescs=%d, "
2141 "#waits=%d, signal=%p\n",
2142 name, is_empty, vars_total, num_waits,
2143 *signal);
2145 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2146 c_offload_signal,
2147 "%d\n", signal);
2149 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2150 c_offload_wait,
2151 "#Wait : %d %p\n", num_waits, waits);
2153 if (m_status != 0) {
2154 m_status->result = OFFLOAD_SUCCESS;
2155 m_status->device_number = m_device.get_logical_index();
2158 m_need_runfunction = !is_empty;
2160 // wait for dependencies to finish
2161 if (!wait_dependencies(waits, num_waits)) {
2162 cleanup();
2163 return false;
2166 // setup buffers
2167 if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
2168 cleanup();
2169 return false;
2172 // initiate send for pointers. Want to do it as early as possible.
2173 if (!send_pointer_data(signal != 0)) {
2174 cleanup();
2175 return false;
2178 // setup misc data for run function
2179 if (!setup_misc_data(name)) {
2180 cleanup();
2181 return false;
2184 // gather copyin data into buffer
2185 if (!gather_copyin_data()) {
2186 cleanup();
2187 return false;
2190 // Start the computation
2191 if (!compute()) {
2192 cleanup();
2193 return false;
2196 // initiate receive for pointers
2197 if (!receive_pointer_data(signal != 0)) {
2198 cleanup();
2199 return false;
2202 // if there is a signal save descriptor for the later use.
2203 if (signal != 0) {
2204 m_device.add_signal(*signal, this);
2205 return true;
2208 // wait for the offload to finish.
2209 if (!offload_finish()) {
2210 cleanup();
2211 return false;
2214 cleanup();
2215 return true;
2218 bool OffloadDescriptor::offload_finish()
2220 COIRESULT res;
2222 // wait for compute dependencies to become signaled
2223 if (m_in_deps_total > 0) {
2224 OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
2226 if (__offload_active_wait) {
2227 // keep CPU busy
2228 do {
2229 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
2231 while (res == COI_TIME_OUT_REACHED);
2233 else {
2234 res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
2237 if (res != COI_SUCCESS) {
2238 if (m_status != 0) {
2239 m_status->result = translate_coi_error(res);
2240 return false;
2242 report_coi_error(c_event_wait, res);
2246 // scatter copyout data received from target
2247 if (!scatter_copyout_data()) {
2248 return false;
2250 // wait for receive dependencies to become signaled
2251 if (m_out_deps_total > 0) {
2252 OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
2254 if (__offload_active_wait) {
2255 // keep CPU busy
2256 do {
2257 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
2259 while (res == COI_TIME_OUT_REACHED);
2261 else {
2262 res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
2265 if (res != COI_SUCCESS) {
2266 if (m_status != 0) {
2267 m_status->result = translate_coi_error(res);
2268 return false;
2270 report_coi_error(c_event_wait, res);
2274 // destroy buffers
2276 OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
2278 for (BufferList::const_iterator it = m_destroy_buffers.begin();
2279 it != m_destroy_buffers.end(); it++) {
2280 res = COI::BufferDestroy(*it);
2281 if (res != COI_SUCCESS) {
2282 if (m_status != 0) {
2283 m_status->result = translate_coi_error(res);
2284 return false;
2286 report_coi_error(c_buf_destroy, res);
2291 return true;
2294 void OffloadDescriptor::cleanup()
2296 // release device in orsl
2297 ORSL::release(m_device.get_logical_index());
2299 OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
2301 // report stuff
2302 Offload_Report_Epilog(get_timer_data());
2305 bool OffloadDescriptor::is_signaled()
2307 bool signaled = true;
2308 COIRESULT res;
2310 // check compute and receive dependencies
2311 if (m_in_deps_total > 0) {
2312 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
2313 signaled = signaled && (res == COI_SUCCESS);
2315 if (m_out_deps_total > 0) {
2316 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
2317 signaled = signaled && (res == COI_SUCCESS);
2320 return signaled;
2323 // Send pointer data if source or destination or both of them are
2324 // noncontiguous. There is guarantee that length of destination enough for
2325 // transfered data.
2326 bool OffloadDescriptor::send_noncontiguous_pointer_data(
2327 int i,
2328 PtrData* src_data,
2329 PtrData* dst_data,
2330 COIEVENT *event
2333 int64_t offset_src, offset_dst;
2334 int64_t length_src, length_dst;
2335 int64_t length_src_cur, length_dst_cur;
2336 int64_t send_size, data_sent = 0;
2337 COIRESULT res;
2338 bool dst_is_empty = true;
2339 bool src_is_empty = true;
2341 // Set length_src and length_dst
2342 length_src = (m_vars_extra[i].read_rng_src) ?
2343 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
2344 length_dst = !m_vars[i].into ? length_src :
2345 (m_vars_extra[i].read_rng_dst) ?
2346 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
2347 send_size = (length_src < length_dst) ? length_src : length_dst;
2349 // consequently get contiguous ranges,
2350 // define corresponded destination offset and send data
2351 do {
2352 if (src_is_empty) {
2353 if (m_vars_extra[i].read_rng_src) {
2354 if (!get_next_range(m_vars_extra[i].read_rng_src,
2355 &offset_src)) {
2356 // source ranges are over - nothing to send
2357 break;
2360 else if (data_sent == 0) {
2361 offset_src = m_vars_extra[i].cpu_disp;
2363 else {
2364 break;
2366 length_src_cur = length_src;
2368 else {
2369 // if source is contiguous or its contiguous range is greater
2370 // than destination one
2371 offset_src += send_size;
2373 length_src_cur -= send_size;
2374 src_is_empty = length_src_cur == 0;
2376 if (dst_is_empty) {
2377 if (m_vars[i].into) {
2378 if (m_vars_extra[i].read_rng_dst) {
2379 if (!get_next_range(m_vars_extra[i].read_rng_dst,
2380 &offset_dst)) {
2381 // destination ranges are over
2382 LIBOFFLOAD_ERROR(c_destination_is_over);
2383 return false;
2386 // into is contiguous.
2387 else {
2388 offset_dst = m_vars[i].disp;
2390 length_dst_cur = length_dst;
2392 // same as source
2393 else {
2394 offset_dst = offset_src;
2395 length_dst_cur = length_src;
2398 else {
2399 // if destination is contiguous or its contiguous range is greater
2400 // than source one
2401 offset_dst += send_size;
2403 length_dst_cur -= send_size;
2404 dst_is_empty = length_dst_cur == 0;
2406 if (src_data != 0 && src_data->cpu_buf != 0) {
2407 res = COI::BufferCopy(
2408 dst_data->mic_buf,
2409 src_data->cpu_buf,
2410 m_vars[i].mic_offset - dst_data->alloc_disp +
2411 m_vars[i].offset + offset_dst,
2412 m_vars_extra[i].cpu_offset + offset_src,
2413 send_size,
2414 COI_COPY_UNSPECIFIED,
2415 0, 0,
2416 event);
2417 if (res != COI_SUCCESS) {
2418 if (m_status != 0) {
2419 m_status->result = translate_coi_error(res);
2420 return false;
2422 report_coi_error(c_buf_copy, res);
2425 else {
2426 char *base = offload_get_src_base(m_vars[i].ptr,
2427 m_vars[i].type.src);
2429 res = COI::BufferWrite(
2430 dst_data->mic_buf,
2431 m_vars[i].mic_offset - dst_data->alloc_disp +
2432 m_vars[i].offset + offset_dst,
2433 base + offset_src,
2434 send_size,
2435 COI_COPY_UNSPECIFIED,
2436 0, 0,
2437 event);
2438 if (res != COI_SUCCESS) {
2439 if (m_status != 0) {
2440 m_status->result = translate_coi_error(res);
2441 return false;
2443 report_coi_error(c_buf_write, res);
2446 data_sent += length_src;
2448 while (true);
2449 return true;
2452 bool OffloadDescriptor::send_pointer_data(bool is_async)
2454 OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
2456 uint64_t ptr_sent = 0;
2457 COIRESULT res;
2459 // Initiate send for pointer data
2460 for (int i = 0; i < m_vars_total; i++) {
2461 switch (m_vars[i].type.dst) {
2462 case c_data_ptr_array:
2463 break;
2464 case c_data:
2465 case c_void_ptr:
2466 case c_cean_var:
2467 if (m_vars[i].direction.in &&
2468 m_vars[i].flags.is_static_dstn) {
2469 COIEVENT *event =
2470 (is_async ||
2471 m_vars[i].size >= __offload_use_async_buffer_write) ?
2472 &m_in_deps[m_in_deps_total++] : 0;
2473 PtrData* dst_data = m_vars[i].into ?
2474 m_vars_extra[i].dst_data :
2475 m_vars_extra[i].src_data;
2476 PtrData* src_data =
2477 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2478 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2479 m_vars[i].flags.is_static ?
2480 m_vars_extra[i].src_data : 0;
2482 if (m_vars[i].flags.is_noncont_src ||
2483 m_vars[i].flags.is_noncont_dst) {
2484 if (!send_noncontiguous_pointer_data(
2485 i, src_data, dst_data, event)) {
2486 return false;
2489 else if (src_data != 0 && src_data->cpu_buf != 0) {
2490 res = COI::BufferCopy(
2491 dst_data->mic_buf,
2492 src_data->cpu_buf,
2493 m_vars[i].mic_offset - dst_data->alloc_disp +
2494 m_vars[i].offset + m_vars[i].disp,
2495 m_vars_extra[i].cpu_offset +
2496 m_vars_extra[i].cpu_disp,
2497 m_vars[i].size,
2498 COI_COPY_UNSPECIFIED,
2499 0, 0,
2500 event);
2501 if (res != COI_SUCCESS) {
2502 if (m_status != 0) {
2503 m_status->result = translate_coi_error(res);
2504 return false;
2506 report_coi_error(c_buf_copy, res);
2509 else {
2510 char *base = offload_get_src_base(m_vars[i].ptr,
2511 m_vars[i].type.src);
2512 res = COI::BufferWrite(
2513 dst_data->mic_buf,
2514 m_vars[i].mic_offset - dst_data->alloc_disp +
2515 m_vars[i].offset + m_vars[i].disp,
2516 base + m_vars_extra[i].cpu_disp,
2517 m_vars[i].size,
2518 COI_COPY_UNSPECIFIED,
2519 0, 0,
2520 event);
2521 if (res != COI_SUCCESS) {
2522 if (m_status != 0) {
2523 m_status->result = translate_coi_error(res);
2524 return false;
2526 report_coi_error(c_buf_write, res);
2529 ptr_sent += m_vars[i].size;
2531 break;
2533 case c_string_ptr:
2534 case c_data_ptr:
2535 case c_cean_var_ptr:
2536 case c_dv_ptr:
2537 if (m_vars[i].direction.in && m_vars[i].size > 0) {
2538 COIEVENT *event =
2539 (is_async ||
2540 m_vars[i].size >= __offload_use_async_buffer_write) ?
2541 &m_in_deps[m_in_deps_total++] : 0;
2542 PtrData* dst_data = m_vars[i].into ?
2543 m_vars_extra[i].dst_data :
2544 m_vars_extra[i].src_data;
2545 PtrData* src_data =
2546 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2547 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2548 m_vars[i].flags.is_static ?
2549 m_vars_extra[i].src_data : 0;
2551 if (m_vars[i].flags.is_noncont_src ||
2552 m_vars[i].flags.is_noncont_dst) {
2553 send_noncontiguous_pointer_data(
2554 i, src_data, dst_data, event);
2556 else if (src_data != 0 && src_data->cpu_buf != 0) {
2557 res = COI::BufferCopy(
2558 dst_data->mic_buf,
2559 src_data->cpu_buf,
2560 m_vars[i].mic_offset - dst_data->alloc_disp +
2561 m_vars[i].offset + m_vars[i].disp,
2562 m_vars_extra[i].cpu_offset +
2563 m_vars_extra[i].cpu_disp,
2564 m_vars[i].size,
2565 COI_COPY_UNSPECIFIED,
2566 0, 0,
2567 event);
2568 if (res != COI_SUCCESS) {
2569 if (m_status != 0) {
2570 m_status->result = translate_coi_error(res);
2571 return false;
2573 report_coi_error(c_buf_copy, res);
2576 else {
2577 char *base = offload_get_src_base(m_vars[i].ptr,
2578 m_vars[i].type.src);
2579 res = COI::BufferWrite(
2580 dst_data->mic_buf,
2581 m_vars[i].mic_offset - dst_data->alloc_disp +
2582 m_vars[i].offset + m_vars[i].disp,
2583 base + m_vars_extra[i].cpu_disp,
2584 m_vars[i].size,
2585 COI_COPY_UNSPECIFIED,
2586 0, 0,
2587 event);
2588 if (res != COI_SUCCESS) {
2589 if (m_status != 0) {
2590 m_status->result = translate_coi_error(res);
2591 return false;
2593 report_coi_error(c_buf_write, res);
2597 ptr_sent += m_vars[i].size;
2599 break;
2601 case c_dv_data:
2602 case c_dv_ptr_data:
2603 if (m_vars[i].direction.in &&
2604 m_vars[i].size > 0) {
2605 PtrData *ptr_data = m_vars[i].into ?
2606 m_vars_extra[i].dst_data :
2607 m_vars_extra[i].src_data;
2608 PtrData* src_data = m_vars_extra[i].src_data;
2610 COIEVENT *event =
2611 (is_async ||
2612 m_vars[i].size >= __offload_use_async_buffer_write) ?
2613 &m_in_deps[m_in_deps_total++] : 0;
2615 if (m_vars[i].flags.is_noncont_src ||
2616 m_vars[i].flags.is_noncont_dst) {
2617 send_noncontiguous_pointer_data(
2618 i, src_data, ptr_data, event);
2620 else if (src_data && src_data->cpu_buf != 0) {
2621 res = COI::BufferCopy(
2622 ptr_data->mic_buf,
2623 src_data->cpu_buf,
2624 m_vars[i].offset + ptr_data->mic_offset -
2625 ptr_data->alloc_disp +
2626 m_vars[i].disp,
2627 m_vars_extra[i].cpu_offset +
2628 m_vars_extra[i].cpu_disp,
2629 m_vars[i].size,
2630 COI_COPY_UNSPECIFIED,
2631 0, 0,
2632 event);
2633 if (res != COI_SUCCESS) {
2634 if (m_status != 0) {
2635 m_status->result = translate_coi_error(res);
2636 return false;
2638 report_coi_error(c_buf_copy, res);
2641 else {
2642 char *base = offload_get_src_base(m_vars[i].ptr,
2643 m_vars[i].type.src);
2644 res = COI::BufferWrite(
2645 ptr_data->mic_buf,
2646 ptr_data->mic_offset - ptr_data->alloc_disp +
2647 m_vars[i].offset + m_vars[i].disp,
2648 base + m_vars_extra[i].cpu_disp,
2649 m_vars[i].size,
2650 COI_COPY_UNSPECIFIED,
2651 0, 0,
2652 event);
2653 if (res != COI_SUCCESS) {
2654 if (m_status != 0) {
2655 m_status->result = translate_coi_error(res);
2656 return false;
2658 report_coi_error(c_buf_write, res);
2661 ptr_sent += m_vars[i].size;
2663 break;
2665 case c_dv_data_slice:
2666 case c_dv_ptr_data_slice:
2667 if (m_vars[i].direction.in &&
2668 m_vars[i].size > 0) {
2669 PtrData *dst_data = m_vars[i].into ?
2670 m_vars_extra[i].dst_data :
2671 m_vars_extra[i].src_data;
2672 PtrData* src_data =
2673 (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2674 VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
2675 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
2676 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2677 m_vars[i].flags.is_static) ?
2678 m_vars_extra[i].src_data : 0;
2679 COIEVENT *event =
2680 (is_async ||
2681 m_vars[i].size >= __offload_use_async_buffer_write) ?
2682 &m_in_deps[m_in_deps_total++] : 0;
2683 if (m_vars[i].flags.is_noncont_src ||
2684 m_vars[i].flags.is_noncont_dst) {
2685 send_noncontiguous_pointer_data(
2686 i, src_data, dst_data, event);
2688 else if (src_data && src_data->cpu_buf != 0) {
2689 res = COI::BufferCopy(
2690 dst_data->mic_buf,
2691 src_data->cpu_buf,
2692 m_vars[i].offset - dst_data->alloc_disp +
2693 dst_data->mic_offset +
2694 m_vars[i].disp,
2695 m_vars_extra[i].cpu_offset +
2696 m_vars_extra[i].cpu_disp,
2697 m_vars[i].size,
2698 COI_COPY_UNSPECIFIED,
2699 0, 0,
2700 event);
2701 if (res != COI_SUCCESS) {
2702 if (m_status != 0) {
2703 m_status->result = translate_coi_error(res);
2704 return false;
2706 report_coi_error(c_buf_copy, res);
2709 else {
2710 char *base = offload_get_src_base(m_vars[i].ptr,
2711 m_vars[i].type.src);
2712 res = COI::BufferWrite(
2713 dst_data->mic_buf,
2714 dst_data->mic_offset - dst_data->alloc_disp +
2715 m_vars[i].offset + m_vars[i].disp,
2716 base + m_vars_extra[i].cpu_disp,
2717 m_vars[i].size,
2718 COI_COPY_UNSPECIFIED,
2719 0, 0,
2720 event);
2721 if (res != COI_SUCCESS) {
2722 if (m_status != 0) {
2723 m_status->result = translate_coi_error(res);
2724 return false;
2726 report_coi_error(c_buf_write, res);
2730 ptr_sent += m_vars[i].size;
2732 break;
2734 default:
2735 break;
2738 // alloc field isn't used at target.
2739 // We can reuse it for offset of array pointers.
2740 if (m_vars_extra[i].is_arr_ptr_el) {
2741 m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
2745 if (m_status) {
2746 m_status->data_sent += ptr_sent;
2749 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
2750 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
2751 c_offload_sent_pointer_data,
2752 "Total pointer data sent to target: [%lld] bytes\n",
2753 ptr_sent);
2755 return true;
2758 bool OffloadDescriptor::gather_copyin_data()
2760 OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
2762 if (m_need_runfunction && m_in_datalen > 0) {
2763 COIMAPINSTANCE map_inst;
2764 char *data;
2766 // init marshaller
2767 if (m_inout_buf != 0) {
2768 OffloadTimer timer_map(get_timer_data(),
2769 c_offload_host_map_in_data_buffer);
2771 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
2772 COI_MAP_WRITE_ENTIRE_BUFFER,
2773 0, 0, 0, &map_inst,
2774 reinterpret_cast<void**>(&data));
2775 if (res != COI_SUCCESS) {
2776 if (m_status != 0) {
2777 m_status->result = translate_coi_error(res);
2778 return false;
2780 report_coi_error(c_buf_map, res);
2783 else {
2784 data = (char*) m_func_desc + m_func_desc->data_offset;
2787 // send variable descriptors
2788 memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
2789 data += m_vars_total * sizeof(VarDesc);
2791 // init marshaller
2792 m_in.init_buffer(data, m_in_datalen);
2794 // Gather copy data into buffer
2795 for (int i = 0; i < m_vars_total; i++) {
2796 bool src_is_for_mic = (m_vars[i].direction.out ||
2797 m_vars[i].into == NULL);
2798 PtrData* ptr_data = src_is_for_mic ?
2799 m_vars_extra[i].src_data :
2800 m_vars_extra[i].dst_data;
2801 if (m_vars[i].flags.alloc_disp) {
2802 m_in.send_data(&ptr_data->alloc_disp,
2803 sizeof(ptr_data->alloc_disp));
2806 // send sink address to the target
2807 if (m_vars[i].flags.sink_addr) {
2808 m_in.send_data(&ptr_data->mic_addr,
2809 sizeof(ptr_data->mic_addr));
2812 switch (m_vars[i].type.dst) {
2813 case c_data_ptr_array:
2814 break;
2815 case c_data:
2816 case c_void_ptr:
2817 case c_cean_var:
2818 if (m_vars[i].direction.in &&
2819 !m_vars[i].flags.is_static_dstn) {
2821 char *ptr = offload_get_src_base(m_vars[i].ptr,
2822 m_vars[i].type.src);
2823 if (m_vars[i].type.dst == c_cean_var) {
2824 // offset and length are derived from the array
2825 // descriptor
2826 int64_t size = m_vars[i].size;
2827 int64_t disp = m_vars[i].disp;
2828 m_in.send_data(reinterpret_cast<char*>(&size),
2829 sizeof(int64_t));
2830 m_in.send_data(reinterpret_cast<char*>(&disp),
2831 sizeof(int64_t));
2834 m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
2835 m_vars[i].size);
2837 break;
2839 case c_dv:
2840 if (m_vars[i].direction.bits ||
2841 m_vars[i].alloc_if ||
2842 m_vars[i].free_if) {
2843 // send dope vector excluding base
2844 char *ptr = static_cast<char*>(m_vars[i].ptr);
2845 m_in.send_data(ptr + sizeof(uint64_t),
2846 m_vars[i].size - sizeof(uint64_t));
2848 break;
2850 case c_data_ptr:
2851 // send to target addresses of obsolete
2852 // stacks to be released
2853 if (m_vars[i].flags.is_stack_buf &&
2854 !m_vars[i].direction.bits &&
2855 m_vars[i].alloc_if &&
2856 m_vars[i].size != 0) {
2857 for (PtrDataList::iterator it =
2858 m_destroy_stack.begin();
2859 it != m_destroy_stack.end(); it++) {
2860 PtrData * ptr_data = *it;
2861 m_in.send_data(&(ptr_data->mic_addr),
2862 sizeof(ptr_data->mic_addr));
2865 break;
2866 case c_func_ptr:
2867 if (m_vars[i].direction.in) {
2868 m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
2870 break;
2872 default:
2873 break;
2877 if (m_status) {
2878 m_status->data_sent += m_in.get_tfr_size();
2881 if (m_func_desc->data_offset == 0) {
2882 OffloadTimer timer_unmap(get_timer_data(),
2883 c_offload_host_unmap_in_data_buffer);
2884 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
2885 if (res != COI_SUCCESS) {
2886 if (m_status != 0) {
2887 m_status->result = translate_coi_error(res);
2888 return false;
2890 report_coi_error(c_buf_unmap, res);
2895 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
2896 OFFLOAD_DEBUG_TRACE_1(1,
2897 GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
2898 "Total copyin data sent to target: [%lld] bytes\n",
2899 m_in.get_tfr_size());
2901 return true;
2904 bool OffloadDescriptor::compute()
2906 OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
2908 if (m_need_runfunction) {
2909 OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
2910 c_offload_compute, "Compute task on MIC\n");
2912 void* misc = m_func_desc;
2913 int misc_len = m_func_desc_size;
2914 void* ret = 0;
2915 int ret_len = 0;
2917 if (m_func_desc->data_offset != 0) {
2918 misc_len += m_in_datalen;
2920 if (m_out_datalen > 0) {
2921 ret = (char*) m_func_desc + m_func_desc->data_offset;
2922 ret_len = m_out_datalen;
2926 // dispatch task
2927 COIRESULT res;
2928 COIEVENT event;
2929 res = m_device.compute(m_compute_buffers,
2930 misc, misc_len,
2931 ret, ret_len,
2932 m_in_deps_total,
2933 m_in_deps_total > 0 ? m_in_deps : 0,
2934 &event);
2935 if (res != COI_SUCCESS) {
2936 if (m_status != 0) {
2937 m_status->result = translate_coi_error(res);
2938 return false;
2940 report_coi_error(c_pipeline_run_func, res);
2943 m_in_deps_total = 1;
2944 m_in_deps[0] = event;
2947 return true;
2950 // recieve pointer data if source or destination or both of them are
2951 // noncontiguous. There is guarantee that length of destination enough for
2952 // transfered data.
2953 bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
2954 int i,
2955 char* base,
2956 COIBUFFER dst_buf,
2957 COIEVENT *event
2960 int64_t offset_src, offset_dst;
2961 int64_t length_src, length_dst;
2962 int64_t length_src_cur, length_dst_cur;
2963 int64_t recieve_size, data_recieved = 0;
2964 COIRESULT res;
2965 bool dst_is_empty = true;
2966 bool src_is_empty = true;
2968 // Set length_src and length_dst
2969 length_src = (m_vars_extra[i].read_rng_src) ?
2970 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
2971 length_dst = !m_vars[i].into ? length_src :
2972 (m_vars_extra[i].read_rng_dst) ?
2973 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
2974 recieve_size = (length_src < length_dst) ? length_src : length_dst;
2976 // consequently get contiguous ranges,
2977 // define corresponded destination offset and recieve data
2978 do {
2979 // get sorce offset
2980 if (src_is_empty) {
2981 if (m_vars_extra[i].read_rng_src) {
2982 if (!get_next_range(m_vars_extra[i].read_rng_src,
2983 &offset_src)) {
2984 // source ranges are over - nothing to send
2985 break;
2988 else if (data_recieved == 0) {
2989 offset_src = 0;
2991 else {
2992 break;
2994 length_src_cur = length_src;
2996 else {
2997 // if source is contiguous or its contiguous range is greater
2998 // than destination one
2999 offset_src += recieve_size;
3001 length_src_cur -= recieve_size;
3002 src_is_empty = length_src_cur == 0;
3004 // get destination offset
3005 if (dst_is_empty) {
3006 if (m_vars[i].into) {
3007 if (m_vars_extra[i].read_rng_dst) {
3008 if (!get_next_range(m_vars_extra[i].read_rng_dst,
3009 &offset_dst)) {
3010 // destination ranges are over
3011 LIBOFFLOAD_ERROR(c_destination_is_over);
3012 return false;
3015 // destination is contiguous.
3016 else {
3017 offset_dst = m_vars_extra[i].cpu_disp;
3019 length_dst_cur = length_dst;
3021 // same as source
3022 else {
3023 offset_dst = offset_src;
3024 length_dst_cur = length_src;
3027 else {
3028 // if destination is contiguous or its contiguous range is greater
3029 // than source one
3030 offset_dst += recieve_size;
3032 length_dst_cur -= recieve_size;
3033 dst_is_empty = length_dst_cur == 0;
3035 if (dst_buf != 0) {
3036 res = COI::BufferCopy(
3037 dst_buf,
3038 m_vars_extra[i].src_data->mic_buf,
3039 m_vars_extra[i].cpu_offset + offset_dst,
3040 m_vars[i].offset + offset_src +
3041 m_vars[i].mic_offset -
3042 m_vars_extra[i].src_data->alloc_disp,
3043 recieve_size,
3044 COI_COPY_UNSPECIFIED,
3045 m_in_deps_total,
3046 m_in_deps_total > 0 ? m_in_deps : 0,
3047 event);
3048 if (res != COI_SUCCESS) {
3049 if (m_status != 0) {
3050 m_status->result = translate_coi_error(res);
3051 return false;
3053 report_coi_error(c_buf_copy, res);
3056 else {
3057 res = COI::BufferRead(
3058 m_vars_extra[i].src_data->mic_buf,
3059 m_vars[i].offset + offset_src +
3060 m_vars[i].mic_offset -
3061 m_vars_extra[i].src_data->alloc_disp,
3062 base + offset_dst,
3063 recieve_size,
3064 COI_COPY_UNSPECIFIED,
3065 m_in_deps_total,
3066 m_in_deps_total > 0 ? m_in_deps : 0,
3067 event);
3068 if (res != COI_SUCCESS) {
3069 if (m_status != 0) {
3070 m_status->result = translate_coi_error(res);
3071 return false;
3073 report_coi_error(c_buf_read, res);
3076 data_recieved += recieve_size;
3078 while (true);
3079 return true;
3082 bool OffloadDescriptor::receive_pointer_data(bool is_async)
3084 OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
3086 uint64_t ptr_received = 0;
3087 COIRESULT res;
3089 for (int i = 0; i < m_vars_total; i++) {
3090 switch (m_vars[i].type.src) {
3091 case c_data_ptr_array:
3092 break;
3093 case c_data:
3094 case c_void_ptr:
3095 case c_cean_var:
3096 if (m_vars[i].direction.out &&
3097 m_vars[i].flags.is_static) {
3098 COIEVENT *event =
3099 (is_async ||
3100 m_in_deps_total > 0 ||
3101 m_vars[i].size >= __offload_use_async_buffer_read) ?
3102 &m_out_deps[m_out_deps_total++] : 0;
3103 PtrData *ptr_data = NULL;
3104 COIBUFFER dst_buf = NULL; // buffer at host
3105 char *base;
3107 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
3108 ptr_data = m_vars[i].into ?
3109 m_vars_extra[i].dst_data :
3110 m_vars_extra[i].src_data;
3112 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
3113 if (m_vars[i].flags.is_static_dstn) {
3114 ptr_data = m_vars[i].into ?
3115 m_vars_extra[i].dst_data :
3116 m_vars_extra[i].src_data;
3119 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
3120 if (dst_buf == NULL) {
3121 base = offload_get_src_base(
3122 m_vars[i].into ?
3123 static_cast<char*>(m_vars[i].into) :
3124 static_cast<char*>(m_vars[i].ptr),
3125 m_vars[i].type.dst);
3128 if (m_vars[i].flags.is_noncont_src ||
3129 m_vars[i].flags.is_noncont_dst) {
3130 recieve_noncontiguous_pointer_data(
3131 i, base, dst_buf, event);
3133 else if (dst_buf != 0) {
3134 res = COI::BufferCopy(
3135 dst_buf,
3136 m_vars_extra[i].src_data->mic_buf,
3137 m_vars_extra[i].cpu_offset +
3138 m_vars_extra[i].cpu_disp,
3139 m_vars[i].offset + m_vars[i].disp,
3140 m_vars[i].size,
3141 COI_COPY_UNSPECIFIED,
3142 m_in_deps_total,
3143 m_in_deps_total > 0 ? m_in_deps : 0,
3144 event);
3145 if (res != COI_SUCCESS) {
3146 if (m_status != 0) {
3147 m_status->result = translate_coi_error(res);
3148 return false;
3150 report_coi_error(c_buf_copy, res);
3153 else {
3154 res = COI::BufferRead(
3155 m_vars_extra[i].src_data->mic_buf,
3156 m_vars[i].offset + m_vars[i].disp,
3157 base + m_vars_extra[i].cpu_offset +
3158 m_vars_extra[i].cpu_disp,
3159 m_vars[i].size,
3160 COI_COPY_UNSPECIFIED,
3161 m_in_deps_total,
3162 m_in_deps_total > 0 ? m_in_deps : 0,
3163 event);
3164 if (res != COI_SUCCESS) {
3165 if (m_status != 0) {
3166 m_status->result = translate_coi_error(res);
3167 return false;
3169 report_coi_error(c_buf_read, res);
3172 ptr_received += m_vars[i].size;
3174 break;
3176 case c_string_ptr:
3177 case c_data_ptr:
3178 case c_cean_var_ptr:
3179 case c_dv_data:
3180 case c_dv_ptr_data:
3181 case c_dv_data_slice:
3182 case c_dv_ptr_data_slice:
3183 case c_dv_ptr: {
3184 COIBUFFER dst_buf = NULL; // buffer on host
3185 if (m_vars[i].direction.out && m_vars[i].size > 0) {
3186 COIEVENT *event =
3187 (is_async ||
3188 m_in_deps_total > 0 ||
3189 m_vars[i].size >= __offload_use_async_buffer_read) ?
3190 &m_out_deps[m_out_deps_total++] : 0;
3192 uint64_t dst_offset = 0;
3193 char *base = static_cast<char*>(m_vars[i].ptr);
3195 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
3196 PtrData *ptr_data = m_vars[i].into ?
3197 m_vars_extra[i].dst_data :
3198 m_vars_extra[i].src_data;
3199 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
3200 if (dst_buf == NULL) {
3201 base = m_vars[i].into ?
3202 *static_cast<char**>(m_vars[i].into) :
3203 *static_cast<char**>(m_vars[i].ptr);
3205 dst_offset = m_vars_extra[i].cpu_offset +
3206 m_vars_extra[i].cpu_disp;
3208 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
3209 if (m_vars[i].flags.is_static_dstn) {
3210 dst_buf = m_vars[i].into ?
3211 m_vars_extra[i].dst_data->cpu_buf :
3212 m_vars_extra[i].src_data->cpu_buf;
3214 if (dst_buf == NULL) {
3215 base = offload_get_src_base(
3216 m_vars[i].into ?
3217 static_cast<char*>(m_vars[i].into) :
3218 static_cast<char*>(m_vars[i].ptr),
3219 m_vars[i].type.dst);
3221 dst_offset = m_vars_extra[i].cpu_offset +
3222 m_vars_extra[i].cpu_disp;
3224 else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
3225 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
3226 PtrData *ptr_data = m_vars[i].into != 0 ?
3227 m_vars_extra[i].dst_data :
3228 m_vars_extra[i].src_data;
3229 dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
3230 if (dst_buf == NULL) {
3231 base = offload_get_src_base(
3232 m_vars[i].into ?
3233 static_cast<char*>(m_vars[i].into) :
3234 static_cast<char*>(m_vars[i].ptr),
3235 m_vars[i].type.dst);
3238 dst_offset = m_vars_extra[i].cpu_offset +
3239 m_vars_extra[i].cpu_disp;
3242 if (m_vars[i].flags.is_noncont_src ||
3243 m_vars[i].flags.is_noncont_dst) {
3244 recieve_noncontiguous_pointer_data(
3245 i, base, dst_buf, event);
3247 else if (dst_buf != 0) {
3248 res = COI::BufferCopy(
3249 dst_buf,
3250 m_vars_extra[i].src_data->mic_buf,
3251 dst_offset,
3252 m_vars[i].offset + m_vars[i].disp +
3253 m_vars[i].mic_offset -
3254 m_vars_extra[i].src_data->alloc_disp,
3255 m_vars[i].size,
3256 COI_COPY_UNSPECIFIED,
3257 m_in_deps_total,
3258 m_in_deps_total > 0 ? m_in_deps : 0,
3259 event);
3260 if (res != COI_SUCCESS) {
3261 if (m_status != 0) {
3262 m_status->result = translate_coi_error(res);
3263 return false;
3265 report_coi_error(c_buf_copy, res);
3268 else {
3269 res = COI::BufferRead(
3270 m_vars_extra[i].src_data->mic_buf,
3271 m_vars[i].offset + m_vars[i].disp +
3272 m_vars[i].mic_offset -
3273 m_vars_extra[i].src_data->alloc_disp,
3274 base + dst_offset,
3275 m_vars[i].size,
3276 COI_COPY_UNSPECIFIED,
3277 m_in_deps_total,
3278 m_in_deps_total > 0 ? m_in_deps : 0,
3279 event);
3280 if (res != COI_SUCCESS) {
3281 if (m_status != 0) {
3282 m_status->result = translate_coi_error(res);
3283 return false;
3285 report_coi_error(c_buf_read, res);
3288 ptr_received += m_vars[i].size;
3290 break;
3293 default:
3294 break;
3297 // destroy buffers for obsolete stacks
3298 if (m_destroy_stack.size() != 0) {
3299 for (PtrDataList::iterator it = m_destroy_stack.begin();
3300 it != m_destroy_stack.end(); it++) {
3301 PtrData *ptr_data = *it;
3302 m_destroy_buffers.push_back(ptr_data->mic_buf);
3303 OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
3304 ptr_data->mic_addr);
3306 m_destroy_stack.clear();
3308 if (m_vars[i].free_if) {
3309 // remove association for automatic variables
3310 if (m_is_openmp && !m_vars[i].flags.is_static &&
3311 (m_vars[i].type.src == c_data ||
3312 m_vars[i].type.src == c_void_ptr ||
3313 m_vars[i].type.src == c_cean_var)) {
3314 AutoData *auto_data = m_vars_extra[i].auto_data;
3315 if (auto_data != 0 && auto_data->remove_reference() == 0) {
3316 m_device.remove_auto_data(auto_data->cpu_addr.start());
3320 // destroy buffers
3321 if (m_vars[i].direction.out || m_vars[i].into == NULL) {
3322 if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
3323 !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
3324 !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
3325 continue;
3328 PtrData *ptr_data = m_vars_extra[i].src_data;
3329 if (ptr_data->remove_reference() == 0) {
3330 // destroy buffers
3331 if (ptr_data->cpu_buf != 0) {
3332 m_destroy_buffers.push_back(ptr_data->cpu_buf);
3334 if (ptr_data->mic_buf != 0) {
3335 m_destroy_buffers.push_back(ptr_data->mic_buf);
3337 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
3338 ptr_data->cpu_addr.start());
3340 // remove association from map
3341 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
3344 else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
3345 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
3346 VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
3347 PtrData *ptr_data = m_vars_extra[i].dst_data;
3348 if (ptr_data->remove_reference() == 0) {
3349 // destroy buffers
3350 if (ptr_data->cpu_buf != 0) {
3351 m_destroy_buffers.push_back(ptr_data->cpu_buf);
3353 if (ptr_data->mic_buf != 0) {
3354 m_destroy_buffers.push_back(ptr_data->mic_buf);
3356 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
3357 ptr_data->cpu_addr.start());
3359 // remove association from map
3360 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
3366 if (m_status) {
3367 m_status->data_received += ptr_received;
3370 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
3371 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
3372 c_offload_received_pointer_data,
3373 "Total pointer data received from target: [%lld] bytes\n",
3374 ptr_received);
3376 return true;
3379 bool OffloadDescriptor::scatter_copyout_data()
3381 OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
3383 if (m_need_runfunction && m_out_datalen > 0) {
3385 // total size that need to be transferred from target to host
3386 COIMAPINSTANCE map_inst;
3387 COIRESULT res;
3388 char *data;
3390 // output data buffer
3391 if (m_func_desc->data_offset == 0) {
3392 OffloadTimer timer_map(get_timer_data(),
3393 c_offload_host_map_out_data_buffer);
3395 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
3396 COI_MAP_READ_ONLY, 0, 0, 0,
3397 &map_inst,
3398 reinterpret_cast<void**>(&data));
3399 if (res != COI_SUCCESS) {
3400 if (m_status != 0) {
3401 m_status->result = translate_coi_error(res);
3402 return false;
3404 report_coi_error(c_buf_map, res);
3407 else {
3408 data = (char*) m_func_desc + m_func_desc->data_offset;
3411 // get timing data
3412 OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
3413 data += OFFLOAD_TIMER_DATALEN();
3415 // initialize output marshaller
3416 m_out.init_buffer(data, m_out_datalen);
3418 for (int i = 0; i < m_vars_total; i++) {
3419 switch (m_vars[i].type.src) {
3420 case c_data_ptr_array:
3421 break;
3422 case c_data:
3423 case c_void_ptr:
3424 case c_cean_var:
3425 if (m_vars[i].direction.out &&
3426 !m_vars[i].flags.is_static) {
3428 if (m_vars[i].into) {
3429 char *ptr = offload_get_src_base(
3430 static_cast<char*>(m_vars[i].into),
3431 m_vars[i].type.dst);
3432 m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
3433 m_vars[i].size);
3435 else {
3436 m_out.receive_data(
3437 static_cast<char*>(m_vars[i].ptr) +
3438 m_vars_extra[i].cpu_disp,
3439 m_vars[i].size);
3442 break;
3444 case c_func_ptr:
3445 if (m_vars[i].direction.out) {
3446 m_out.receive_func_ptr((const void**) m_vars[i].ptr);
3448 break;
3450 default:
3451 break;
3455 if (m_status) {
3456 m_status->data_received += m_out.get_tfr_size();
3459 if (m_func_desc->data_offset == 0) {
3460 OffloadTimer timer_unmap(get_timer_data(),
3461 c_offload_host_unmap_out_data_buffer);
3463 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
3464 if (res != COI_SUCCESS) {
3465 if (m_status != 0) {
3466 m_status->result = translate_coi_error(res);
3467 return false;
3469 report_coi_error(c_buf_unmap, res);
3474 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
3475 OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
3476 m_out.get_tfr_size());
3478 return true;
3481 void get_arr_desc_numbers(
3482 const arr_desc *ap,
3483 int64_t el_size,
3484 int64_t &offset,
3485 int64_t &size,
3486 int &el_number,
3487 CeanReadRanges* &ptr_ranges
3490 if (is_arr_desc_contiguous(ap)) {
3491 ptr_ranges = NULL;
3492 __arr_data_offset_and_length(ap, offset, size);
3493 el_number = size / el_size;
3495 else {
3496 ptr_ranges = init_read_ranges_arr_desc(ap);
3497 el_number = (ptr_ranges->range_size / el_size) *
3498 ptr_ranges->range_max_number;
3499 size = ptr_ranges->range_size;
3503 arr_desc * make_arr_desc(
3504 void* ptr_val,
3505 int64_t extent_start_val,
3506 int64_t extent_elements_val,
3507 int64_t size
3510 arr_desc *res;
3511 res = (arr_desc *)malloc(sizeof(arr_desc));
3512 if (res == NULL)
3513 LIBOFFLOAD_ERROR(c_malloc);
3514 res->base = reinterpret_cast<int64_t>(ptr_val);
3515 res->rank = 1;
3516 res->dim[0].size = size;
3517 res->dim[0].lindex = 0;
3518 res->dim[0].lower = extent_start_val;
3519 res->dim[0].upper = extent_elements_val + extent_start_val - 1;
3520 res->dim[0].stride = 1;
3521 return res;
3524 bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
3526 int pointers_number;
3527 int tmp_val;
3528 int new_index = m_vars_total;
3529 const arr_desc *ap;
3530 const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
3531 int flags = vd3->array_fields;
3532 bool src_is_for_mic = (m_vars[i].direction.out ||
3533 m_vars[i].into == NULL);
3535 ReadArrElements<void *> ptr;
3536 ReadArrElements<void *> into;
3537 ReadArrElements<int64_t> ext_start;
3538 ReadArrElements<int64_t> ext_elements;
3539 ReadArrElements<int64_t> align;
3540 ReadArrElements<int64_t> alloc_if;
3541 ReadArrElements<int64_t> free_if;
3542 ReadArrElements<int64_t> into_start;
3543 ReadArrElements<int64_t> into_elem;
3544 ReadArrElements<int64_t> alloc_start;
3545 ReadArrElements<int64_t> alloc_elem;
3548 ap = static_cast<const arr_desc*>(vd3->ptr_array);
3550 // "pointers_number" for total number of transfered pointers.
3551 // For each of them we create new var_desc and put it at the bottom
3552 // of the var_desc's array
3553 get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
3554 pointers_number, ptr.ranges);
3555 ptr.base = reinterpret_cast<char*>(ap->base);
3557 // 2. prepare memory for new var_descs
3558 m_vars_total += pointers_number;
3559 m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
3560 if (m_vars == NULL)
3561 LIBOFFLOAD_ERROR(c_malloc);
3562 m_vars_extra =
3563 (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
3564 if (m_vars_extra == NULL)
3565 LIBOFFLOAD_ERROR(c_malloc);
3566 m_in_deps =
3567 (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
3568 if (m_in_deps == NULL)
3569 LIBOFFLOAD_ERROR(c_malloc);
3570 m_out_deps =
3571 (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
3572 if (m_out_deps == NULL)
3573 LIBOFFLOAD_ERROR(c_malloc);
3575 // 3. Prepare for reading new var_desc's fields
3576 // EXTENT START
3577 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
3578 ap = static_cast<const arr_desc*>(vd3->extent_start);
3579 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
3580 ext_start.size, tmp_val, ext_start.ranges);
3581 ext_start.base = reinterpret_cast<char*>(ap->base);
3582 ext_start.el_size = ap->dim[ap->rank - 1].size;
3584 if (tmp_val < pointers_number) {
3585 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
3586 return false;
3589 else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
3590 ext_start.val = (int64_t)vd3->extent_start;
3592 else {
3593 ext_start.val = 0;
3596 // EXTENT ELEMENTS NUMBER
3597 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
3598 ap = static_cast<const arr_desc*>(vd3->extent_elements);
3599 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
3600 ext_elements.offset, ext_elements.size,
3601 tmp_val, ext_elements.ranges);
3602 ext_elements.base = reinterpret_cast<char*>(ap->base);
3603 ext_elements.el_size = ap->dim[ap->rank - 1].size;
3605 if (tmp_val < pointers_number) {
3606 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
3607 return false;
3610 else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
3611 ext_elements.val = (int64_t)vd3->extent_elements;
3613 else {
3614 ext_elements.val = m_vars[i].count;
3617 // ALLOC_IF
3618 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
3619 ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
3620 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
3621 alloc_if.size, tmp_val, alloc_if.ranges);
3622 alloc_if.base = reinterpret_cast<char*>(ap->base);
3623 alloc_if.el_size = ap->dim[ap->rank - 1].size;
3625 if (tmp_val < pointers_number) {
3626 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
3627 return false;
3630 else {
3631 alloc_if.val = m_vars[i].count;
3634 // FREE_IF
3635 if ((flags & (1<<flag_free_if_is_array)) != 0) {
3636 ap = static_cast<const arr_desc*>(vd3->free_if_array);
3637 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
3638 free_if.size, tmp_val, free_if.ranges);
3639 free_if.base = reinterpret_cast<char*>(ap->base);
3640 free_if.el_size = ap->dim[ap->rank - 1].size;
3642 if (tmp_val < pointers_number) {
3643 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
3644 return false;
3647 else {
3648 free_if.val = m_vars[i].count;
3651 // ALIGN
3653 if ((flags & (1<<flag_align_is_array)) != 0) {
3654 ap = static_cast<const arr_desc*>(vd3->align_array);
3655 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
3656 align.size, tmp_val, align.ranges);
3657 align.base = reinterpret_cast<char*>(ap->base);
3658 align.el_size = ap->dim[ap->rank - 1].size;
3660 if (tmp_val < pointers_number) {
3661 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
3662 return false;
3665 else {
3666 align.val = m_vars[i].align;
3669 // 3.1 INTO
3671 if (m_vars[i].into) {
3672 ap = static_cast<const arr_desc*>(m_vars[i].into);
3673 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
3674 into.size, tmp_val, into.ranges);
3675 into.base = reinterpret_cast<char*>(ap->base);
3677 if (tmp_val < pointers_number) {
3678 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
3679 return false;
3683 // 3.2 INTO_START
3685 if ((flags & (1<<flag_into_start_is_array)) != 0) {
3686 ap = static_cast<const arr_desc*>(vd3->into_start);
3687 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
3688 into_start.size, tmp_val, into_start.ranges);
3689 into_start.base = reinterpret_cast<char*>(ap->base);
3690 into_start.el_size = ap->dim[ap->rank - 1].size;
3692 if (tmp_val < pointers_number) {
3693 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
3694 return false;
3697 else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
3698 into_start.val = (int64_t)vd3->into_start;
3700 else {
3701 into_start.val = 0;
3704 // 3.3 INTO_ELEMENTS
3706 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
3707 ap = static_cast<const arr_desc*>(vd3->into_elements);
3708 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
3709 into_elem.size, tmp_val, into_elem.ranges);
3710 into_elem.base = reinterpret_cast<char*>(ap->base);
3711 into_elem.el_size = ap->dim[ap->rank - 1].size;
3713 if (tmp_val < pointers_number) {
3714 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
3715 return false;
3718 else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
3719 into_elem.val = (int64_t)vd3->into_elements;
3721 else {
3722 into_elem.val = m_vars[i].count;
3725 // alloc_start
3727 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
3728 ap = static_cast<const arr_desc*>(vd3->alloc_start);
3729 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
3730 alloc_start.offset, alloc_start.size, tmp_val,
3731 alloc_start.ranges);
3732 alloc_start.base = reinterpret_cast<char*>(ap->base);
3733 alloc_start.el_size = ap->dim[ap->rank - 1].size;
3735 if (tmp_val < pointers_number) {
3736 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
3737 return false;
3740 else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
3741 alloc_start.val = (int64_t)vd3->alloc_start;
3743 else {
3744 alloc_start.val = 0;
3747 // alloc_elem
3749 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
3750 ap = static_cast<const arr_desc*>(vd3->alloc_elements);
3751 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
3752 alloc_elem.size, tmp_val, alloc_elem.ranges);
3753 alloc_elem.base = reinterpret_cast<char*>(ap->base);
3754 alloc_elem.el_size = ap->dim[ap->rank - 1].size;
3755 if (tmp_val < pointers_number) {
3756 LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
3757 "alloc_extent elements");
3758 return false;
3761 else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
3762 alloc_elem.val = (int64_t)vd3->alloc_elements;
3764 else {
3765 alloc_elem.val = 0;
3768 for (int k = 0; k < pointers_number; k++) {
3769 int type = flags & 0x3f;
3770 int type_src, type_dst;
3771 // Get new values
3772 // type_src, type_dst
3773 type_src = type_dst = (type == c_data_ptr_array) ?
3774 c_data_ptr : (type == c_func_ptr_array) ?
3775 c_func_ptr : (type == c_void_ptr_array) ?
3776 c_void_ptr : (type == c_string_ptr_array) ?
3777 c_string_ptr : 0;
3779 // Get ptr val
3780 if (!ptr.read_next(true)) {
3781 break;
3783 else {
3784 ptr.val = (void*)(ptr.base + ptr.offset);
3787 // !!! If we got error at phase of reading - it's an internal
3788 // !!! error, as we must detect mismatch before
3790 // Get into val
3791 if (m_vars[i].into) {
3792 if (!into.read_next(true)) {
3793 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
3794 LIBOFFLOAD_ABORT;
3796 else {
3797 into.val = (void*)(into.base + into.offset);
3801 // Get other components of the clause
3802 if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
3803 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
3804 LIBOFFLOAD_ABORT;
3806 if (!ext_elements.read_next(
3807 flags & (1<<flag_extent_elements_is_array))) {
3808 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
3809 LIBOFFLOAD_ABORT;
3811 if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
3812 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
3813 LIBOFFLOAD_ABORT;
3815 if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
3816 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
3817 LIBOFFLOAD_ABORT;
3819 if (!align.read_next(flags & (1<<flag_align_is_array))) {
3820 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
3821 LIBOFFLOAD_ABORT;
3823 if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
3824 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
3825 LIBOFFLOAD_ABORT;
3827 if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
3828 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
3829 LIBOFFLOAD_ABORT;
3831 if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
3832 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
3833 LIBOFFLOAD_ABORT;
3835 if (!alloc_elem.read_next(
3836 flags & (1<<flag_alloc_elements_is_array))) {
3837 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
3838 LIBOFFLOAD_ABORT;
3841 m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
3842 m_vars[new_index + k].alloc_if = alloc_if.val;
3843 m_vars[new_index + k].free_if = free_if.val;
3844 m_vars[new_index + k].align = align.val;
3845 m_vars[new_index + k].mic_offset = 0;
3846 m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
3847 m_vars[new_index + k].offset = 0;
3848 m_vars[new_index + k].size = m_vars[i].size;
3850 if (ext_start.val == 0) {
3851 m_vars[new_index + k].count = ext_elements.val;
3852 m_vars[new_index + k].ptr = ptr.val;
3853 if (type_src == c_string_ptr) {
3854 m_vars[new_index + k].size = 0;
3857 else {
3858 m_vars[new_index + k].count = 0;
3859 m_vars[new_index + k].ptr =
3860 static_cast<void*>(make_arr_desc(
3861 ptr.val,
3862 ext_start.val,
3863 ext_elements.val,
3864 m_vars[i].size));
3866 type_src = type_src == c_data_ptr ? c_cean_var_ptr :
3867 c_string_ptr ? c_cean_var_ptr :
3868 type_src;
3869 if (!m_vars[i].into) {
3870 type_dst = type_src;
3874 if (m_vars[i].into && into_elem.val != 0) {
3875 m_vars[new_index + k].into =
3876 static_cast<void*>(make_arr_desc(
3877 into.val,
3878 into_start.val,
3879 into_elem.val,
3880 m_vars[i].size));
3881 type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
3882 (type == c_string_ptr_array) ? c_cean_var_ptr :
3883 type_src;
3885 else {
3886 m_vars[new_index + k].into = NULL;
3889 if (alloc_elem.val != 0) {
3890 m_vars[new_index + k].alloc =
3891 static_cast<void*>(make_arr_desc(
3892 ptr.val,
3893 alloc_start.val,
3894 alloc_elem.val,
3895 m_vars[i].size));
3897 else {
3898 m_vars[new_index + k].alloc = NULL;
3901 m_vars[new_index + k].type.src = type_src;
3902 m_vars[new_index + k].type.dst = type_dst;
3904 m_vars_extra[new_index + k].is_arr_ptr_el = 1;
3905 m_vars_extra[new_index + k].ptr_arr_offset =
3906 src_is_for_mic ? ptr.offset : into.offset;
3908 // count and alloc fields are useless at target. They can be reused
3909 // for pointer arrays.
3910 m_vars[i].count = pointers_number;
3911 m_vars[i].ptr_arr_offset = new_index;
3912 return true;
3915 static void __offload_fini_library(void)
3917 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
3918 if (mic_engines_total > 0) {
3919 delete[] mic_engines;
3921 if (mic_proxy_fs_root != 0) {
3922 free(mic_proxy_fs_root);
3923 mic_proxy_fs_root = 0;
3926 if (mic_library_path != 0) {
3927 free(mic_library_path);
3928 mic_library_path = 0;
3931 // destroy thread key
3932 thread_key_delete(mic_thread_key);
3935 // unload COI library
3936 if (COI::is_available) {
3937 COI::fini();
3940 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
3943 static void __offload_init_library_once(void)
3945 COIRESULT res;
3946 uint32_t num_devices;
3947 std::bitset<MIC_ENGINES_MAX> devices;
3949 prefix = report_get_message_str(c_report_host);
3951 // initialize trace
3952 const char *env_var = getenv(htrace_envname);
3953 if (env_var != 0 && *env_var != '\0') {
3954 int64_t new_val;
3955 if (__offload_parse_int_string(env_var, new_val)) {
3956 console_enabled = new_val & 0x0f;
3960 env_var = getenv(offload_report_envname);
3961 if (env_var != 0 && *env_var != '\0') {
3962 int64_t env_val;
3963 if (__offload_parse_int_string(env_var, env_val)) {
3964 if (env_val == OFFLOAD_REPORT_1 ||
3965 env_val == OFFLOAD_REPORT_2 ||
3966 env_val == OFFLOAD_REPORT_3) {
3967 offload_report_level = env_val;
3969 else {
3970 LIBOFFLOAD_ERROR(c_invalid_env_report_value,
3971 offload_report_envname);
3974 else {
3975 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
3976 offload_report_envname);
3979 else if (!offload_report_level) {
3980 env_var = getenv(timer_envname);
3981 if (env_var != 0 && *env_var != '\0') {
3982 timer_enabled = atoi(env_var);
3986 // initialize COI
3987 if (!COI::init()) {
3988 return;
3991 // get number of devices installed in the system
3992 res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
3993 if (res != COI_SUCCESS) {
3994 return;
3997 if (num_devices > MIC_ENGINES_MAX) {
3998 num_devices = MIC_ENGINES_MAX;
4001 // fill in the list of devices that can be used for offloading
4002 env_var = getenv("OFFLOAD_DEVICES");
4003 if (env_var != 0) {
4004 if (strcasecmp(env_var, "none") != 0) {
4005 // value is composed of comma separated physical device indexes
4006 char *buf = strdup(env_var);
4007 char *str, *ptr;
4008 for (str = strtok_r(buf, ",", &ptr); str != 0;
4009 str = strtok_r(0, ",", &ptr)) {
4010 // convert string to an int
4011 int64_t num;
4012 if (!__offload_parse_int_string(str, num)) {
4013 LIBOFFLOAD_ERROR(c_mic_init5);
4015 // fallback to using all installed devices
4016 devices.reset();
4017 for (int i = 0; i < num_devices; i++) {
4018 devices.set(i);
4020 break;
4022 if (num < 0 || num >= num_devices) {
4023 LIBOFFLOAD_ERROR(c_mic_init6, num);
4024 continue;
4026 devices.set(num);
4028 free(buf);
4031 else {
4032 // use all available devices
4033 for (int i = 0; i < num_devices; i++) {
4034 COIENGINE engine;
4035 res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
4036 if (res == COI_SUCCESS) {
4037 devices.set(i);
4042 mic_engines_total = devices.count();
4044 // no need to continue if there are no devices to offload to
4045 if (mic_engines_total <= 0) {
4046 return;
4049 // initialize indexes for available devices
4050 mic_engines = new Engine[mic_engines_total];
4051 for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
4052 if (devices[p_idx]) {
4053 mic_engines[l_idx].set_indexes(l_idx, p_idx);
4054 l_idx++;
4058 // library search path for device binaries
4059 env_var = getenv("MIC_LD_LIBRARY_PATH");
4060 if (env_var != 0) {
4061 mic_library_path = strdup(env_var);
4064 // memory size reserved for COI buffers
4065 env_var = getenv("MIC_BUFFERSIZE");
4066 if (env_var != 0) {
4067 uint64_t new_size;
4068 if (__offload_parse_size_string(env_var, new_size)) {
4069 mic_buffer_size = new_size;
4071 else {
4072 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
4076 // determine stacksize for the pipeline on the device
4077 env_var = getenv("MIC_STACKSIZE");
4078 if (env_var != 0 && *env_var != '\0') {
4079 uint64_t new_size;
4080 if (__offload_parse_size_string(env_var, new_size) &&
4081 (new_size >= 16384) && ((new_size & 4095) == 0)) {
4082 mic_stack_size = new_size;
4084 else {
4085 LIBOFFLOAD_ERROR(c_mic_init3);
4089 // proxy I/O
4090 env_var = getenv("MIC_PROXY_IO");
4091 if (env_var != 0 && *env_var != '\0') {
4092 int64_t new_val;
4093 if (__offload_parse_int_string(env_var, new_val)) {
4094 mic_proxy_io = new_val;
4096 else {
4097 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
4100 env_var = getenv("MIC_PROXY_FS_ROOT");
4101 if (env_var != 0 && *env_var != '\0') {
4102 mic_proxy_fs_root = strdup(env_var);
4105 // Prepare environment for the target process using the following
4106 // rules
4107 // - If MIC_ENV_PREFIX is set then any environment variable on the
4108 // host which has that prefix are copied to the device without
4109 // the prefix.
4110 // All other host environment variables are ignored.
4111 // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
4112 // environment is duplicated.
4113 env_var = getenv("MIC_ENV_PREFIX");
4114 if (env_var != 0 && *env_var != '\0') {
4115 mic_env_vars.set_prefix(env_var);
4117 int len = strlen(env_var);
4118 for (int i = 0; environ[i] != 0; i++) {
4119 if (strncmp(environ[i], env_var, len) == 0 &&
4120 strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
4121 environ[i][len] != '=') {
4122 mic_env_vars.analyze_env_var(environ[i]);
4127 // create key for thread data
4128 if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
4129 LIBOFFLOAD_ERROR(c_mic_init4, errno);
4130 return;
4133 // cpu frequency
4134 cpu_frequency = COI::PerfGetCycleFrequency();
4136 env_var = getenv(mic_use_2mb_buffers_envname);
4137 if (env_var != 0 && *env_var != '\0') {
4138 uint64_t new_size;
4139 if (__offload_parse_size_string(env_var, new_size)) {
4140 __offload_use_2mb_buffers = new_size;
4142 else {
4143 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
4144 mic_use_2mb_buffers_envname);
4148 env_var = getenv(mic_use_async_buffer_write_envname);
4149 if (env_var != 0 && *env_var != '\0') {
4150 uint64_t new_size;
4151 if (__offload_parse_size_string(env_var, new_size)) {
4152 __offload_use_async_buffer_write = new_size;
4156 env_var = getenv(mic_use_async_buffer_read_envname);
4157 if (env_var != 0 && *env_var != '\0') {
4158 uint64_t new_size;
4159 if (__offload_parse_size_string(env_var, new_size)) {
4160 __offload_use_async_buffer_read = new_size;
4164 // mic initialization type
4165 env_var = getenv(offload_init_envname);
4166 if (env_var != 0 && *env_var != '\0') {
4167 if (strcmp(env_var, "on_offload") == 0) {
4168 __offload_init_type = c_init_on_offload;
4170 else if (strcmp(env_var, "on_offload_all") == 0) {
4171 __offload_init_type = c_init_on_offload_all;
4173 #ifndef TARGET_WINNT
4174 else if (strcmp(env_var, "on_start") == 0) {
4175 __offload_init_type = c_init_on_start;
4177 #endif // TARGET_WINNT
4178 else {
4179 LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
4183 // active wait
4184 env_var = getenv(offload_active_wait_envname);
4185 if (env_var != 0 && *env_var != '\0') {
4186 int64_t new_val;
4187 if (__offload_parse_int_string(env_var, new_val)) {
4188 __offload_active_wait = new_val;
4190 else {
4191 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
4192 offload_active_wait_envname);
4196 // omp device num
4197 env_var = getenv(omp_device_num_envname);
4198 if (env_var != 0 && *env_var != '\0') {
4199 int64_t new_val;
4200 if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
4201 __omp_device_num = new_val;
4203 else {
4204 LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
4205 omp_device_num_envname);
4209 // init ORSL
4210 ORSL::init();
4213 extern int __offload_init_library(void)
4215 // do one time intialization
4216 static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
4217 __offload_run_once(&ctrl, __offload_init_library_once);
4219 // offload is available if COI is available and the number of devices > 0
4220 bool is_available = COI::is_available && (mic_engines_total > 0);
4222 // register pending libraries if there are any
4223 if (is_available && __target_libs) {
4224 mutex_locker_t locker(__target_libs_lock);
4226 for (TargetImageList::iterator it = __target_libs_list.begin();
4227 it != __target_libs_list.end(); it++) {
4228 // Register library in COI
4229 COI::ProcessRegisterLibraries(1, &it->data, &it->size,
4230 &it->origin, &it->offset);
4232 // add lib to all engines
4233 for (int i = 0; i < mic_engines_total; i++) {
4234 mic_engines[i].add_lib(*it);
4238 __target_libs = false;
4239 __target_libs_list.clear();
4242 return is_available;
4245 extern "C" void __offload_register_image(const void *target_image)
4247 const struct Image *image = static_cast<const struct Image*>(target_image);
4249 // decode image
4250 const char *name = image->data;
4251 const void *data = image->data + strlen(image->data) + 1;
4252 uint64_t size = image->size;
4253 const char *origin = 0;
4254 uint64_t offset = 0;
4256 // our actions depend on the image type
4257 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
4258 switch (hdr->e_type) {
4259 case ET_EXEC:
4260 // Each offload application is supposed to have only one target
4261 // image representing target executable.
4262 // No thread synchronization is required here as the initialization
4263 // code is always executed in a single thread.
4264 if (__target_exe != 0) {
4265 LIBOFFLOAD_ERROR(c_multiple_target_exes);
4266 exit(1);
4268 __target_exe = new TargetImage(name, data, size, origin, offset);
4270 // Registration code for execs is always called from the context
4271 // of main and thus we can safely call any function here,
4272 // including LoadLibrary API on windows. This is the place where
4273 // we do the offload library initialization.
4274 if (__offload_init_library()) {
4275 // initialize engine if init_type is on_start
4276 if (__offload_init_type == c_init_on_start) {
4277 for (int i = 0; i < mic_engines_total; i++) {
4278 mic_engines[i].init();
4282 break;
4284 case ET_DYN:
4285 // Registration code for libraries is called from the DllMain
4286 // context (on windows) and thus we cannot do anything usefull
4287 // here. So we just add it to the list of pending libraries for
4288 // the later use.
4289 __target_libs_lock.lock();
4290 __target_libs = true;
4291 __target_libs_list.push_back(TargetImage(name, data, size,
4292 origin, offset));
4293 __target_libs_lock.unlock();
4294 break;
4296 default:
4297 // something is definitely wrong, issue an error and exit
4298 LIBOFFLOAD_ERROR(c_unknown_binary_type);
4299 exit(1);
4303 extern "C" void __offload_unregister_image(const void *target_image)
4305 // Target image is packed as follows:
4306 // 8 bytes - size of the target binary
4307 // null-terminated string - binary name
4308 // <size> bytes - binary contents
4309 const struct Image {
4310 int64_t size;
4311 char data[];
4312 } *image = static_cast<const struct Image*>(target_image);
4314 // decode image
4315 const char *name = image->data;
4316 const void *data = image->data + strlen(image->data) + 1;
4318 // our actions depend on the image type
4319 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
4320 if (hdr->e_type == ET_EXEC) {
4321 // We are executing exec's desctructors.
4322 // It is time to do a library cleanup.
4323 if (timer_enabled) {
4324 Offload_Timer_Print();
4327 #ifdef MYO_SUPPORT
4328 __offload_myoFini();
4329 #endif // MYO_SUPPORT
4331 __offload_fini_library();
4335 // Runtime trace interface for user programs
4337 void __offload_console_trace(int level)
4339 console_enabled = level;
4342 // User-visible offload API
4344 int _Offload_number_of_devices(void)
4346 __offload_init_library();
4347 return mic_engines_total;
4350 int _Offload_get_device_number(void)
4352 return -1;
4355 int _Offload_get_physical_device_number(void)
4357 return -1;
4360 int _Offload_signaled(int index, void *signal)
4362 __offload_init_library();
4364 // check index value
4365 if (index < 0 || mic_engines_total <= 0) {
4366 LIBOFFLOAD_ERROR(c_offload_signaled1, index);
4367 LIBOFFLOAD_ABORT;
4370 // find associated async task
4371 OffloadDescriptor *task =
4372 mic_engines[index % mic_engines_total].find_signal(signal, false);
4373 if (task == 0) {
4374 LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
4375 LIBOFFLOAD_ABORT;
4378 return task->is_signaled();
4381 void _Offload_report(int val)
4383 if (val == OFFLOAD_REPORT_ON ||
4384 val == OFFLOAD_REPORT_OFF) {
4385 offload_report_enabled = val;
4389 // IDB support
4390 int __dbg_is_attached = 0;
4391 int __dbg_target_id = -1;
4392 pid_t __dbg_target_so_pid = -1;
4393 char __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
4394 const int __dbg_api_major_version = 1;
4395 const int __dbg_api_minor_version = 0;
4397 void __dbg_target_so_loaded()
4400 void __dbg_target_so_unloaded()