poly_int: emit_group_load/store
[official-gcc.git] / liboffloadmic / runtime / offload_target.cpp
blob8273faac13b852d50a27eb2c4075e657601086d0
1 /*
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "offload_target.h"
32 #include <stdlib.h>
33 #include <unistd.h>
34 #ifdef SEP_SUPPORT
35 #include <fcntl.h>
36 #include <sys/ioctl.h>
37 #endif // SEP_SUPPORT
38 #include <omp.h>
39 #include <map>
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms)(void *);
46 // Target console and file logging
47 const char *prefix;
48 int console_enabled = 0;
49 int offload_report_level = 0;
51 // Trace information
52 static const char* vardesc_direction_as_string[] = {
53 "NOCOPY",
54 "IN",
55 "OUT",
56 "INOUT"
58 static const char* vardesc_type_as_string[] = {
59 "unknown",
60 "data",
61 "data_ptr",
62 "func_ptr",
63 "void_ptr",
64 "string_ptr",
65 "dv",
66 "dv_data",
67 "dv_data_slice",
68 "dv_ptr",
69 "dv_ptr_data",
70 "dv_ptr_data_slice",
71 "cean_var",
72 "cean_var_ptr",
73 "c_data_ptr_array",
74 "c_extended_type",
75 "c_func_ptr_array",
76 "c_void_ptr_array",
77 "c_string_ptr_array",
78 "c_data_ptr_ptr",
79 "c_func_ptr_ptr",
80 "c_void_ptr_ptr",
81 "c_string_ptr_ptr",
82 "c_cean_var_ptr_ptr",
85 int mic_index = -1;
86 int mic_engines_total = -1;
87 uint64_t mic_frequency = 0;
88 int offload_number = 0;
89 static std::map<void*, RefInfo*> ref_data;
90 static mutex_t add_ref_lock;
92 #ifdef SEP_SUPPORT
93 static const char* sep_monitor_env = "SEP_MONITOR";
94 static bool sep_monitor = false;
95 static const char* sep_device_env = "SEP_DEVICE";
96 static const char* sep_device = "/dev/sep3.8/c";
97 static int sep_counter = 0;
99 #define SEP_API_IOC_MAGIC 99
100 #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
101 #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
103 static void add_ref_count(void * buf, bool created)
105 mutex_locker_t locker(add_ref_lock);
106 RefInfo * info = ref_data[buf];
108 if (info) {
109 info->count++;
111 else {
112 info = new RefInfo((int)created,(long)1);
114 info->is_added |= created;
115 ref_data[buf] = info;
118 static void BufReleaseRef(void * buf)
120 mutex_locker_t locker(add_ref_lock);
121 RefInfo * info = ref_data[buf];
123 if (info) {
124 --info->count;
125 if (info->count == 0 && info->is_added) {
126 OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
127 ((RefInfo *) ref_data[buf])->count);
128 BufferReleaseRef(buf);
129 info->is_added = 0;
134 static int VTPauseSampling(void)
136 int ret = -1;
137 int handle = open(sep_device, O_RDWR);
138 if (handle > 0) {
139 ret = ioctl(handle, SEP_IOCTL_PAUSE);
140 close(handle);
142 return ret;
145 static int VTResumeSampling(void)
147 int ret = -1;
148 int handle = open(sep_device, O_RDWR);
149 if (handle > 0) {
150 ret = ioctl(handle, SEP_IOCTL_RESUME);
151 close(handle);
153 return ret;
155 #endif // SEP_SUPPORT
157 void OffloadDescriptor::offload(
158 uint32_t buffer_count,
159 void** buffers,
160 void* misc_data,
161 uint16_t misc_data_len,
162 void* return_data,
163 uint16_t return_data_len
166 FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
167 const char *name = func->data;
168 OffloadDescriptor ofld;
169 char *in_data = 0;
170 char *out_data = 0;
171 char *timer_data = 0;
173 console_enabled = func->console_enabled;
174 timer_enabled = func->timer_enabled;
175 offload_report_level = func->offload_report_level;
176 offload_number = func->offload_number;
177 ofld.set_offload_number(func->offload_number);
179 #ifdef SEP_SUPPORT
180 if (sep_monitor) {
181 if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
182 OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
183 VTResumeSampling();
186 #endif // SEP_SUPPORT
188 OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
189 c_offload_start_target_func,
190 "Offload \"%s\" started\n", name);
192 // initialize timer data
193 OFFLOAD_TIMER_INIT();
195 OFFLOAD_TIMER_START(c_offload_target_total_time);
197 OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
199 // get input/output buffer addresses
200 if (func->in_datalen > 0 || func->out_datalen > 0) {
201 if (func->data_offset != 0) {
202 in_data = (char*) misc_data + func->data_offset;
203 out_data = (char*) return_data;
205 else {
206 char *inout_buf = (char*) buffers[--buffer_count];
207 in_data = inout_buf;
208 out_data = inout_buf;
212 // assign variable descriptors
213 ofld.m_vars_total = func->vars_num;
214 if (ofld.m_vars_total > 0) {
215 uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
217 ofld.m_vars = (VarDesc*) malloc(var_data_len);
218 if (ofld.m_vars == NULL)
219 LIBOFFLOAD_ERROR(c_malloc);
220 memcpy(ofld.m_vars, in_data, var_data_len);
222 ofld.m_vars_extra =
223 (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra));
224 if (ofld.m_vars == NULL)
225 LIBOFFLOAD_ERROR(c_malloc);
227 in_data += var_data_len;
228 func->in_datalen -= var_data_len;
231 // timer data
232 if (func->timer_enabled) {
233 uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
235 timer_data = out_data;
236 out_data += timer_data_len;
237 func->out_datalen -= timer_data_len;
240 // init Marshallers
241 ofld.m_in.init_buffer(in_data, func->in_datalen);
242 ofld.m_out.init_buffer(out_data, func->out_datalen);
244 // copy buffers to offload descriptor
245 std::copy(buffers, buffers + buffer_count,
246 std::back_inserter(ofld.m_buffers));
248 OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
250 // find offload entry address
251 OFFLOAD_TIMER_START(c_offload_target_func_lookup);
253 offload_func_with_parms entry = (offload_func_with_parms)
254 __offload_entries.find_addr(name);
256 if (entry == NULL) {
257 #if OFFLOAD_DEBUG > 0
258 if (console_enabled > 2) {
259 __offload_entries.dump();
261 #endif
262 LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
263 exit(1);
266 OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
268 OFFLOAD_TIMER_START(c_offload_target_func_time);
270 // execute offload entry
271 entry(&ofld);
273 OFFLOAD_TIMER_STOP(c_offload_target_func_time);
275 OFFLOAD_TIMER_STOP(c_offload_target_total_time);
277 // copy timer data to the buffer
278 OFFLOAD_TIMER_TARGET_DATA(timer_data);
280 OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
282 #ifdef SEP_SUPPORT
283 if (sep_monitor) {
284 if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
285 OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
286 VTPauseSampling();
289 #endif // SEP_SUPPORT
292 void OffloadDescriptor::merge_var_descs(
293 VarDesc *vars,
294 VarDesc2 *vars2,
295 int vars_total
298 // number of variable descriptors received from host and generated
299 // locally should match
300 if (m_vars_total < vars_total) {
301 LIBOFFLOAD_ERROR(c_merge_var_descs1);
302 exit(1);
305 for (int i = 0; i < m_vars_total; i++) {
306 // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
308 if (i < vars_total) {
309 // variable type must match
310 if (m_vars[i].type.bits != vars[i].type.bits) {
311 OFFLOAD_TRACE(2,
312 "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
313 i, m_vars[i].type.bits, i, vars[i].type.bits);
314 LIBOFFLOAD_ERROR(c_merge_var_descs2);
315 exit(1);
318 if (m_vars[i].type.src == c_extended_type) {
319 VarDescExtendedType *etype =
320 reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
321 m_vars_extra[i].type_src = etype->extended_type;
322 m_vars[i].ptr = etype->ptr;
324 else {
325 m_vars_extra[i].type_src = m_vars[i].type.src;
326 if (!(m_vars[i].flags.use_device_ptr &&
327 m_vars[i].type.src == c_dv)) {
328 m_vars[i].ptr = vars[i].ptr;
331 // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
332 if (m_vars[i].type.dst == c_extended_type && i < vars_total) {
333 VarDescExtendedType *etype =
334 reinterpret_cast<VarDescExtendedType*>(vars[i].into);
335 m_vars_extra[i].type_dst = etype->extended_type;
336 m_vars[i].into = etype->ptr;
338 else {
339 m_vars_extra[i].type_dst = m_vars[i].type.dst;
340 m_vars[i].into = vars[i].into;
343 const char *var_sname = "";
344 if (vars2 != NULL) {
345 if (vars2[i].sname != NULL) {
346 var_sname = vars2[i].sname;
349 OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
350 " VarDesc %d, var=%s, %s, %s\n",
351 i, var_sname,
352 vardesc_direction_as_string[m_vars[i].direction.bits],
353 vardesc_type_as_string[m_vars_extra[i].type_src]);
354 if (vars2 != NULL && vars2[i].dname != NULL) {
355 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
356 vardesc_type_as_string[m_vars_extra[i].type_dst]);
359 else {
360 m_vars_extra[i].type_src = m_vars[i].type.src;
361 m_vars_extra[i].type_dst = m_vars[i].type.dst;
364 OFFLOAD_TRACE(2,
365 " type_src=%d, type_dstn=%d, direction=%d, "
366 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
367 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
368 m_vars_extra[i].type_src,
369 m_vars_extra[i].type_dst,
370 m_vars[i].direction.bits,
371 m_vars[i].alloc_if,
372 m_vars[i].free_if,
373 m_vars[i].align,
374 m_vars[i].mic_offset,
375 m_vars[i].flags.bits,
376 m_vars[i].offset,
377 m_vars[i].size,
378 m_vars[i].count,
379 m_vars[i].ptr,
380 m_vars[i].into);
384 void OffloadDescriptor::scatter_copyin_data()
386 OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
388 OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
389 m_in.get_buffer_start(),
390 m_in.get_buffer_size());
391 OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
392 m_in.get_buffer_size());
394 // receive data
395 for (int i = 0; i < m_vars_total; i++) {
396 bool src_is_for_mic = (m_vars[i].direction.out ||
397 m_vars[i].into == NULL);
398 void** ptr_addr = src_is_for_mic ?
399 static_cast<void**>(m_vars[i].ptr) :
400 static_cast<void**>(m_vars[i].into);
401 int type = src_is_for_mic ? m_vars_extra[i].type_src :
402 m_vars_extra[i].type_dst;
403 bool is_static = src_is_for_mic ?
404 m_vars[i].flags.is_static :
405 m_vars[i].flags.is_static_dstn;
406 void *ptr = NULL;
408 if (m_vars[i].flags.alloc_disp) {
409 int64_t offset = 0;
410 m_in.receive_data(&offset, sizeof(offset));
412 if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
413 VAR_TYPE_IS_DV_DATA(type)) {
414 ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
415 reinterpret_cast<ArrDesc*>(ptr_addr) :
416 *reinterpret_cast<ArrDesc**>(ptr_addr);
417 ptr_addr = reinterpret_cast<void**>(&dvp->Base);
419 // Set pointer values
420 switch (type) {
421 case c_data_ptr_array:
423 int j = m_vars[i].ptr_arr_offset;
424 int max_el = j + m_vars[i].count;
425 char *dst_arr_ptr = (src_is_for_mic)?
426 *(reinterpret_cast<char**>(m_vars[i].ptr)) :
427 reinterpret_cast<char*>(m_vars[i].into);
429 // if is_pointer is 1 it means that pointer array itself
430 // is defined either via pointer or as class member.
431 // i.e. arr_ptr[0:5] or this->ARR[0:5]
432 if (m_vars[i].flags.is_pointer) {
433 int64_t offset = 0;
434 m_in.receive_data(&offset, sizeof(offset));
435 dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
437 for (; j < max_el; j++) {
438 if (src_is_for_mic) {
439 m_vars[j].ptr =
440 dst_arr_ptr + m_vars[j].ptr_arr_offset;
442 else {
443 m_vars[j].into =
444 dst_arr_ptr + m_vars[j].ptr_arr_offset;
448 break;
449 case c_data:
450 case c_void_ptr:
451 case c_void_ptr_ptr:
452 case c_cean_var:
453 case c_dv:
454 break;
456 case c_string_ptr:
457 case c_data_ptr:
458 case c_string_ptr_ptr:
459 case c_data_ptr_ptr:
460 case c_cean_var_ptr:
461 case c_cean_var_ptr_ptr:
462 case c_dv_ptr:
463 // Don't need ptr_addr value for variables from stack buffer.
464 // Stack buffer address is set at var_desc with #0.
465 if (i != 0 && m_vars[i].flags.is_stack_buf) {
466 break;
468 if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
469 TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
470 int64_t offset;
472 m_in.receive_data(&offset, sizeof(offset));
473 ptr_addr = reinterpret_cast<void**>(
474 reinterpret_cast<char*>(*ptr_addr) + offset);
478 if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
479 void *buf = NULL;
480 if (m_vars[i].flags.sink_addr) {
481 m_in.receive_data(&buf, sizeof(buf));
483 else {
484 buf = m_buffers.front();
485 m_buffers.pop_front();
487 if (buf) {
488 if (!is_static) {
489 if (!m_vars[i].flags.sink_addr) {
490 // increment buffer reference
491 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
492 BufferAddRef(buf);
493 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
494 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
496 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
497 OFFLOAD_TRACE(1, " AddRef count = %d\n",
498 ((RefInfo *) ref_data[buf])->count);
500 ptr = static_cast<char*>(buf) +
501 m_vars[i].mic_offset +
502 (m_vars[i].flags.is_stack_buf ?
503 0 : m_vars[i].offset);
506 *ptr_addr = ptr;
508 else if (m_vars[i].flags.sink_addr) {
509 void *buf;
510 m_in.receive_data(&buf, sizeof(buf));
511 void *ptr = static_cast<char*>(buf) +
512 m_vars[i].mic_offset +
513 (m_vars[i].flags.is_stack_buf ?
514 0 : m_vars[i].offset);
515 *ptr_addr = ptr;
517 break;
519 case c_func_ptr:
520 case c_func_ptr_ptr:
521 break;
523 case c_dv_data:
524 case c_dv_ptr_data:
525 case c_dv_data_slice:
526 case c_dv_ptr_data_slice:
527 if (m_vars[i].alloc_if) {
528 void *buf;
529 if (m_vars[i].flags.sink_addr) {
530 m_in.receive_data(&buf, sizeof(buf));
532 else {
533 buf = m_buffers.front();
534 m_buffers.pop_front();
536 if (buf) {
537 if (!is_static) {
538 if (!m_vars[i].flags.sink_addr) {
539 // increment buffer reference
540 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
541 BufferAddRef(buf);
542 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
544 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
546 ptr = static_cast<char*>(buf) +
547 m_vars[i].mic_offset + m_vars[i].offset;
549 *ptr_addr = ptr;
551 else if (m_vars[i].flags.sink_addr) {
552 void *buf;
553 m_in.receive_data(&buf, sizeof(buf));
554 ptr = static_cast<char*>(buf) +
555 m_vars[i].mic_offset + m_vars[i].offset;
556 *ptr_addr = ptr;
558 break;
560 default:
561 LIBOFFLOAD_ERROR(c_unknown_var_type, type);
562 abort();
564 // Release obsolete buffers for stack of persistent objects.
565 // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
566 // stack buffer pointer.
567 if (i == 0 &&
568 m_vars[i].flags.is_stack_buf &&
569 !m_vars[i].direction.bits &&
570 m_vars[i].alloc_if &&
571 m_vars[i].size != 0) {
572 for (int j=0; j < m_vars[i].size; j++) {
573 void *buf;
574 m_in.receive_data(&buf, sizeof(buf));
575 OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
576 BufferReleaseRef(buf);
577 ref_data.erase(buf);
580 // Do copyin
581 switch (m_vars_extra[i].type_dst) {
582 case c_data_ptr_array:
583 break;
584 case c_data:
585 case c_void_ptr:
586 case c_void_ptr_ptr:
587 case c_cean_var:
588 if (m_vars[i].direction.in &&
589 !m_vars[i].flags.is_static_dstn) {
590 int64_t size;
591 int64_t disp;
592 char* ptr = m_vars[i].into ?
593 static_cast<char*>(m_vars[i].into) :
594 static_cast<char*>(m_vars[i].ptr);
595 if (m_vars_extra[i].type_dst == c_cean_var) {
596 m_in.receive_data((&size), sizeof(int64_t));
597 m_in.receive_data((&disp), sizeof(int64_t));
599 else {
600 size = m_vars[i].size;
601 disp = 0;
603 m_in.receive_data(ptr + disp, size);
605 break;
607 case c_dv:
608 if (m_vars[i].direction.bits ||
609 m_vars[i].alloc_if ||
610 m_vars[i].free_if) {
611 char* ptr = m_vars[i].into ?
612 static_cast<char*>(m_vars[i].into) :
613 static_cast<char*>(m_vars[i].ptr);
614 m_in.receive_data(ptr + sizeof(uint64_t),
615 m_vars[i].size - sizeof(uint64_t));
617 break;
619 case c_string_ptr:
620 case c_data_ptr:
621 case c_string_ptr_ptr:
622 case c_data_ptr_ptr:
623 case c_cean_var_ptr:
624 case c_cean_var_ptr_ptr:
625 case c_dv_ptr:
626 case c_dv_data:
627 case c_dv_ptr_data:
628 case c_dv_data_slice:
629 case c_dv_ptr_data_slice:
630 break;
632 case c_func_ptr:
633 case c_func_ptr_ptr:
634 if (m_vars[i].direction.in) {
635 m_in.receive_func_ptr((const void**) m_vars[i].ptr);
637 break;
639 default:
640 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
641 abort();
645 OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
646 m_in.get_tfr_size());
648 OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
650 OFFLOAD_TIMER_START(c_offload_target_compute);
653 void OffloadDescriptor::gather_copyout_data()
655 OFFLOAD_TIMER_STOP(c_offload_target_compute);
657 OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
659 for (int i = 0; i < m_vars_total; i++) {
660 bool src_is_for_mic = (m_vars[i].direction.out ||
661 m_vars[i].into == NULL);
662 if (m_vars[i].flags.is_stack_buf) {
663 continue;
665 switch (m_vars_extra[i].type_src) {
666 case c_data_ptr_array:
667 break;
668 case c_data:
669 case c_void_ptr:
670 case c_void_ptr_ptr:
671 case c_cean_var:
672 if (m_vars[i].direction.out &&
673 !m_vars[i].flags.is_static) {
674 m_out.send_data(
675 static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
676 m_vars[i].size);
678 break;
680 case c_dv:
681 break;
683 case c_string_ptr:
684 case c_data_ptr:
685 case c_string_ptr_ptr:
686 case c_data_ptr_ptr:
687 case c_cean_var_ptr:
688 case c_cean_var_ptr_ptr:
689 case c_dv_ptr:
690 if (m_vars[i].free_if &&
691 src_is_for_mic &&
692 !m_vars[i].flags.preallocated &&
693 !m_vars[i].flags.is_static) {
694 void *buf = *static_cast<char**>(m_vars[i].ptr) -
695 m_vars[i].mic_offset -
696 (m_vars[i].flags.is_stack_buf?
697 0 : m_vars[i].offset);
698 if (buf == NULL) {
699 break;
701 // decrement buffer reference count
702 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
703 BufReleaseRef(buf);
704 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
706 if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
707 m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
709 break;
711 case c_func_ptr:
712 case c_func_ptr_ptr:
713 if (m_vars[i].direction.out) {
714 m_out.send_func_ptr(*((void**) m_vars[i].ptr));
716 break;
718 case c_dv_data:
719 case c_dv_ptr_data:
720 case c_dv_data_slice:
721 case c_dv_ptr_data_slice:
722 if (src_is_for_mic &&
723 m_vars[i].free_if &&
724 !m_vars[i].flags.is_static) {
725 ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data ||
726 m_vars_extra[i].type_src == c_dv_data_slice) ?
727 static_cast<ArrDesc*>(m_vars[i].ptr) :
728 *static_cast<ArrDesc**>(m_vars[i].ptr);
730 void *buf = reinterpret_cast<char*>(dvp->Base) -
731 m_vars[i].mic_offset -
732 m_vars[i].offset;
734 if (buf == NULL) {
735 break;
738 // decrement buffer reference count
739 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
740 BufReleaseRef(buf);
741 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
743 break;
745 default:
746 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
747 abort();
750 if (m_vars[i].into) {
751 switch (m_vars_extra[i].type_dst) {
752 case c_data_ptr_array:
753 break;
754 case c_data:
755 case c_void_ptr:
756 case c_void_ptr_ptr:
757 case c_cean_var:
758 case c_dv:
759 break;
761 case c_string_ptr:
762 case c_data_ptr:
763 case c_string_ptr_ptr:
764 case c_data_ptr_ptr:
765 case c_cean_var_ptr:
766 case c_cean_var_ptr_ptr:
767 case c_dv_ptr:
768 if (m_vars[i].direction.in &&
769 m_vars[i].free_if &&
770 !m_vars[i].flags.is_static_dstn) {
771 void *buf = *static_cast<char**>(m_vars[i].into) -
772 m_vars[i].mic_offset -
773 (m_vars[i].flags.is_stack_buf?
774 0 : m_vars[i].offset);
776 if (buf == NULL) {
777 break;
779 // decrement buffer reference count
780 OFFLOAD_TIMER_START(
781 c_offload_target_release_buffer_refs);
782 BufReleaseRef(buf);
783 OFFLOAD_TIMER_STOP(
784 c_offload_target_release_buffer_refs);
786 break;
788 case c_func_ptr:
789 case c_func_ptr_ptr:
790 break;
792 case c_dv_data:
793 case c_dv_ptr_data:
794 case c_dv_data_slice:
795 case c_dv_ptr_data_slice:
796 if (m_vars[i].free_if &&
797 m_vars[i].direction.in &&
798 !m_vars[i].flags.is_static_dstn) {
799 ArrDesc *dvp =
800 (m_vars_extra[i].type_dst == c_dv_data_slice ||
801 m_vars_extra[i].type_dst == c_dv_data) ?
802 static_cast<ArrDesc*>(m_vars[i].into) :
803 *static_cast<ArrDesc**>(m_vars[i].into);
804 void *buf = reinterpret_cast<char*>(dvp->Base) -
805 m_vars[i].mic_offset -
806 m_vars[i].offset;
808 if (buf == NULL) {
809 break;
811 // decrement buffer reference count
812 OFFLOAD_TIMER_START(
813 c_offload_target_release_buffer_refs);
814 BufReleaseRef(buf);
815 OFFLOAD_TIMER_STOP(
816 c_offload_target_release_buffer_refs);
818 break;
820 default:
821 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
822 abort();
827 OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
828 m_out.get_buffer_start(),
829 m_out.get_buffer_size());
831 OFFLOAD_DEBUG_DUMP_BYTES(2,
832 m_out.get_buffer_start(),
833 m_out.get_buffer_size());
835 OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
836 "Total copyout data sent to host: [%lld] bytes\n",
837 m_out.get_tfr_size());
839 OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
842 void __offload_target_init(void)
844 #ifdef SEP_SUPPORT
845 const char* env_var = getenv(sep_monitor_env);
846 if (env_var != 0 && *env_var != '\0') {
847 sep_monitor = atoi(env_var);
849 env_var = getenv(sep_device_env);
850 if (env_var != 0 && *env_var != '\0') {
851 sep_device = env_var;
853 #endif // SEP_SUPPORT
855 prefix = report_get_message_str(c_report_mic);
857 // init frequency
858 mic_frequency = COIPerfGetCycleFrequency();
861 // User-visible offload API
863 int _Offload_number_of_devices(void)
865 return mic_engines_total;
868 int _Offload_get_device_number(void)
870 return mic_index;
873 int _Offload_get_physical_device_number(void)
875 uint32_t index;
876 EngineGetIndex(&index);
877 return index;