2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "offload_target.h"
36 #include <sys/ioctl.h>
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms
)(void *);
46 // Target console and file logging
48 int console_enabled
= 0;
49 int offload_report_level
= 0;
52 static const char* vardesc_direction_as_string
[] = {
58 static const char* vardesc_type_as_string
[] = {
77 int mic_engines_total
= -1;
78 uint64_t mic_frequency
= 0;
79 int offload_number
= 0;
80 static std::map
<void*, RefInfo
*> ref_data
;
81 static mutex_t add_ref_lock
;
84 static const char* sep_monitor_env
= "SEP_MONITOR";
85 static bool sep_monitor
= false;
86 static const char* sep_device_env
= "SEP_DEVICE";
87 static const char* sep_device
= "/dev/sep3.8/c";
88 static int sep_counter
= 0;
90 #define SEP_API_IOC_MAGIC 99
91 #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
92 #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
94 static void add_ref_count(void * buf
, bool created
)
96 mutex_locker_t
locker(add_ref_lock
);
97 RefInfo
* info
= ref_data
[buf
];
103 info
= new RefInfo((int)created
,(long)1);
105 info
->is_added
|= created
;
106 ref_data
[buf
] = info
;
109 static void BufReleaseRef(void * buf
)
111 mutex_locker_t
locker(add_ref_lock
);
112 RefInfo
* info
= ref_data
[buf
];
116 if (info
->count
== 0 && info
->is_added
) {
117 BufferReleaseRef(buf
);
123 static int VTPauseSampling(void)
126 int handle
= open(sep_device
, O_RDWR
);
128 ret
= ioctl(handle
, SEP_IOCTL_PAUSE
);
134 static int VTResumeSampling(void)
137 int handle
= open(sep_device
, O_RDWR
);
139 ret
= ioctl(handle
, SEP_IOCTL_RESUME
);
144 #endif // SEP_SUPPORT
146 void OffloadDescriptor::offload(
147 uint32_t buffer_count
,
150 uint16_t misc_data_len
,
152 uint16_t return_data_len
155 FunctionDescriptor
*func
= (FunctionDescriptor
*) misc_data
;
156 const char *name
= func
->data
;
157 OffloadDescriptor ofld
;
160 char *timer_data
= 0;
162 console_enabled
= func
->console_enabled
;
163 timer_enabled
= func
->timer_enabled
;
164 offload_report_level
= func
->offload_report_level
;
165 offload_number
= func
->offload_number
;
166 ofld
.set_offload_number(func
->offload_number
);
170 if (__sync_fetch_and_add(&sep_counter
, 1) == 0) {
171 OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
175 #endif // SEP_SUPPORT
177 OFFLOAD_DEBUG_TRACE_1(2, ofld
.get_offload_number(),
178 c_offload_start_target_func
,
179 "Offload \"%s\" started\n", name
);
181 // initialize timer data
182 OFFLOAD_TIMER_INIT();
184 OFFLOAD_TIMER_START(c_offload_target_total_time
);
186 OFFLOAD_TIMER_START(c_offload_target_descriptor_setup
);
188 // get input/output buffer addresses
189 if (func
->in_datalen
> 0 || func
->out_datalen
> 0) {
190 if (func
->data_offset
!= 0) {
191 in_data
= (char*) misc_data
+ func
->data_offset
;
192 out_data
= (char*) return_data
;
195 char *inout_buf
= (char*) buffers
[--buffer_count
];
197 out_data
= inout_buf
;
201 // assign variable descriptors
202 ofld
.m_vars_total
= func
->vars_num
;
203 if (ofld
.m_vars_total
> 0) {
204 uint64_t var_data_len
= ofld
.m_vars_total
* sizeof(VarDesc
);
206 ofld
.m_vars
= (VarDesc
*) malloc(var_data_len
);
207 if (ofld
.m_vars
== NULL
)
208 LIBOFFLOAD_ERROR(c_malloc
);
209 memcpy(ofld
.m_vars
, in_data
, var_data_len
);
211 in_data
+= var_data_len
;
212 func
->in_datalen
-= var_data_len
;
216 if (func
->timer_enabled
) {
217 uint64_t timer_data_len
= OFFLOAD_TIMER_DATALEN();
219 timer_data
= out_data
;
220 out_data
+= timer_data_len
;
221 func
->out_datalen
-= timer_data_len
;
225 ofld
.m_in
.init_buffer(in_data
, func
->in_datalen
);
226 ofld
.m_out
.init_buffer(out_data
, func
->out_datalen
);
228 // copy buffers to offload descriptor
229 std::copy(buffers
, buffers
+ buffer_count
,
230 std::back_inserter(ofld
.m_buffers
));
232 OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup
);
234 // find offload entry address
235 OFFLOAD_TIMER_START(c_offload_target_func_lookup
);
237 offload_func_with_parms entry
= (offload_func_with_parms
)
238 __offload_entries
.find_addr(name
);
241 #if OFFLOAD_DEBUG > 0
242 if (console_enabled
> 2) {
243 __offload_entries
.dump();
246 LIBOFFLOAD_ERROR(c_offload_descriptor_offload
, name
);
250 OFFLOAD_TIMER_STOP(c_offload_target_func_lookup
);
252 OFFLOAD_TIMER_START(c_offload_target_func_time
);
254 // execute offload entry
257 OFFLOAD_TIMER_STOP(c_offload_target_func_time
);
259 OFFLOAD_TIMER_STOP(c_offload_target_total_time
);
261 // copy timer data to the buffer
262 OFFLOAD_TIMER_TARGET_DATA(timer_data
);
264 OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name
);
268 if (__sync_sub_and_fetch(&sep_counter
, 1) == 0) {
269 OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
273 #endif // SEP_SUPPORT
276 void OffloadDescriptor::merge_var_descs(
282 // number of variable descriptors received from host and generated
283 // locally should match
284 if (m_vars_total
< vars_total
) {
285 LIBOFFLOAD_ERROR(c_merge_var_descs1
);
289 for (int i
= 0; i
< m_vars_total
; i
++) {
290 if (i
< vars_total
) {
291 // variable type must match
292 if (m_vars
[i
].type
.bits
!= vars
[i
].type
.bits
) {
293 LIBOFFLOAD_ERROR(c_merge_var_descs2
);
297 m_vars
[i
].ptr
= vars
[i
].ptr
;
298 m_vars
[i
].into
= vars
[i
].into
;
300 const char *var_sname
= "";
302 if (vars2
[i
].sname
!= NULL
) {
303 var_sname
= vars2
[i
].sname
;
306 OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var
,
307 " VarDesc %d, var=%s, %s, %s\n",
309 vardesc_direction_as_string
[m_vars
[i
].direction
.bits
],
310 vardesc_type_as_string
[m_vars
[i
].type
.src
]);
311 if (vars2
!= NULL
&& vars2
[i
].dname
!= NULL
) {
312 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2
[i
].dname
,
313 vardesc_type_as_string
[m_vars
[i
].type
.dst
]);
317 " type_src=%d, type_dstn=%d, direction=%d, "
318 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
319 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
322 m_vars
[i
].direction
.bits
,
326 m_vars
[i
].mic_offset
,
327 m_vars
[i
].flags
.bits
,
336 void OffloadDescriptor::scatter_copyin_data()
338 OFFLOAD_TIMER_START(c_offload_target_scatter_inputs
);
340 OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
341 m_in
.get_buffer_start(),
342 m_in
.get_buffer_size());
343 OFFLOAD_DEBUG_DUMP_BYTES(2, m_in
.get_buffer_start(),
344 m_in
.get_buffer_size());
347 for (int i
= 0; i
< m_vars_total
; i
++) {
348 bool src_is_for_mic
= (m_vars
[i
].direction
.out
||
349 m_vars
[i
].into
== NULL
);
350 void** ptr_addr
= src_is_for_mic
?
351 static_cast<void**>(m_vars
[i
].ptr
) :
352 static_cast<void**>(m_vars
[i
].into
);
353 int type
= src_is_for_mic
? m_vars
[i
].type
.src
:
355 bool is_static
= src_is_for_mic
?
356 m_vars
[i
].flags
.is_static
:
357 m_vars
[i
].flags
.is_static_dstn
;
360 if (m_vars
[i
].flags
.alloc_disp
) {
362 m_in
.receive_data(&offset
, sizeof(offset
));
363 m_vars
[i
].offset
= -offset
;
365 if (VAR_TYPE_IS_DV_DATA_SLICE(type
) ||
366 VAR_TYPE_IS_DV_DATA(type
)) {
367 ArrDesc
*dvp
= (type
== c_dv_data_slice
|| type
== c_dv_data
)?
368 reinterpret_cast<ArrDesc
*>(ptr_addr
) :
369 *reinterpret_cast<ArrDesc
**>(ptr_addr
);
370 ptr_addr
= reinterpret_cast<void**>(&dvp
->Base
);
373 // Set pointer values
375 case c_data_ptr_array
:
377 int j
= m_vars
[i
].ptr_arr_offset
;
378 int max_el
= j
+ m_vars
[i
].count
;
379 char *dst_arr_ptr
= (src_is_for_mic
)?
380 *(reinterpret_cast<char**>(m_vars
[i
].ptr
)) :
381 reinterpret_cast<char*>(m_vars
[i
].into
);
383 for (; j
< max_el
; j
++) {
384 if (src_is_for_mic
) {
386 dst_arr_ptr
+ m_vars
[j
].ptr_arr_offset
;
390 dst_arr_ptr
+ m_vars
[j
].ptr_arr_offset
;
405 if (m_vars
[i
].alloc_if
) {
407 if (m_vars
[i
].flags
.sink_addr
) {
408 m_in
.receive_data(&buf
, sizeof(buf
));
411 buf
= m_buffers
.front();
412 m_buffers
.pop_front();
416 if (!m_vars
[i
].flags
.sink_addr
) {
417 // increment buffer reference
418 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs
);
420 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs
);
422 add_ref_count(buf
, 0 == m_vars
[i
].flags
.sink_addr
);
424 ptr
= static_cast<char*>(buf
) +
425 m_vars
[i
].mic_offset
+
426 (m_vars
[i
].flags
.is_stack_buf
?
427 0 : m_vars
[i
].offset
);
431 else if (m_vars
[i
].flags
.sink_addr
) {
433 m_in
.receive_data(&buf
, sizeof(buf
));
434 void *ptr
= static_cast<char*>(buf
) +
435 m_vars
[i
].mic_offset
+
436 (m_vars
[i
].flags
.is_stack_buf
?
437 0 : m_vars
[i
].offset
);
447 case c_dv_data_slice
:
448 case c_dv_ptr_data_slice
:
449 if (m_vars
[i
].alloc_if
) {
451 if (m_vars
[i
].flags
.sink_addr
) {
452 m_in
.receive_data(&buf
, sizeof(buf
));
455 buf
= m_buffers
.front();
456 m_buffers
.pop_front();
460 if (!m_vars
[i
].flags
.sink_addr
) {
461 // increment buffer reference
462 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs
);
464 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs
);
466 add_ref_count(buf
, 0 == m_vars
[i
].flags
.sink_addr
);
468 ptr
= static_cast<char*>(buf
) +
469 m_vars
[i
].mic_offset
+ m_vars
[i
].offset
;
473 else if (m_vars
[i
].flags
.sink_addr
) {
475 m_in
.receive_data(&buf
, sizeof(buf
));
476 ptr
= static_cast<char*>(buf
) +
477 m_vars
[i
].mic_offset
+ m_vars
[i
].offset
;
483 LIBOFFLOAD_ERROR(c_unknown_var_type
, type
);
486 // Release obsolete buffers for stack of persistent objects
487 if (type
= c_data_ptr
&&
488 m_vars
[i
].flags
.is_stack_buf
&&
489 !m_vars
[i
].direction
.bits
&&
490 m_vars
[i
].alloc_if
&&
491 m_vars
[i
].size
!= 0) {
492 for (int j
=0; j
< m_vars
[i
].size
; j
++) {
494 m_in
.receive_data(&buf
, sizeof(buf
));
495 BufferReleaseRef(buf
);
500 switch (m_vars
[i
].type
.dst
) {
501 case c_data_ptr_array
:
506 if (m_vars
[i
].direction
.in
&&
507 !m_vars
[i
].flags
.is_static_dstn
) {
510 char* ptr
= m_vars
[i
].into
?
511 static_cast<char*>(m_vars
[i
].into
) :
512 static_cast<char*>(m_vars
[i
].ptr
);
513 if (m_vars
[i
].type
.dst
== c_cean_var
) {
514 m_in
.receive_data((&size
), sizeof(int64_t));
515 m_in
.receive_data((&disp
), sizeof(int64_t));
518 size
= m_vars
[i
].size
;
521 m_in
.receive_data(ptr
+ disp
, size
);
526 if (m_vars
[i
].direction
.bits
||
527 m_vars
[i
].alloc_if
||
529 char* ptr
= m_vars
[i
].into
?
530 static_cast<char*>(m_vars
[i
].into
) :
531 static_cast<char*>(m_vars
[i
].ptr
);
532 m_in
.receive_data(ptr
+ sizeof(uint64_t),
533 m_vars
[i
].size
- sizeof(uint64_t));
543 case c_dv_data_slice
:
544 case c_dv_ptr_data_slice
:
548 if (m_vars
[i
].direction
.in
) {
549 m_in
.receive_func_ptr((const void**) m_vars
[i
].ptr
);
554 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
559 OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
560 m_in
.get_tfr_size());
562 OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs
);
564 OFFLOAD_TIMER_START(c_offload_target_compute
);
567 void OffloadDescriptor::gather_copyout_data()
569 OFFLOAD_TIMER_STOP(c_offload_target_compute
);
571 OFFLOAD_TIMER_START(c_offload_target_gather_outputs
);
573 for (int i
= 0; i
< m_vars_total
; i
++) {
574 bool src_is_for_mic
= (m_vars
[i
].direction
.out
||
575 m_vars
[i
].into
== NULL
);
577 switch (m_vars
[i
].type
.src
) {
578 case c_data_ptr_array
:
583 if (m_vars
[i
].direction
.out
&&
584 !m_vars
[i
].flags
.is_static
) {
586 static_cast<char*>(m_vars
[i
].ptr
) + m_vars
[i
].disp
,
598 if (m_vars
[i
].free_if
&&
600 !m_vars
[i
].flags
.is_static
) {
601 void *buf
= *static_cast<char**>(m_vars
[i
].ptr
) -
602 m_vars
[i
].mic_offset
-
603 (m_vars
[i
].flags
.is_stack_buf
?
604 0 : m_vars
[i
].offset
);
608 // decrement buffer reference count
609 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs
);
611 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs
);
616 if (m_vars
[i
].direction
.out
) {
617 m_out
.send_func_ptr(*((void**) m_vars
[i
].ptr
));
623 case c_dv_data_slice
:
624 case c_dv_ptr_data_slice
:
625 if (src_is_for_mic
&&
627 !m_vars
[i
].flags
.is_static
) {
628 ArrDesc
*dvp
= (m_vars
[i
].type
.src
== c_dv_data
||
629 m_vars
[i
].type
.src
== c_dv_data_slice
) ?
630 static_cast<ArrDesc
*>(m_vars
[i
].ptr
) :
631 *static_cast<ArrDesc
**>(m_vars
[i
].ptr
);
633 void *buf
= reinterpret_cast<char*>(dvp
->Base
) -
634 m_vars
[i
].mic_offset
-
641 // decrement buffer reference count
642 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs
);
644 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs
);
649 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
653 if (m_vars
[i
].into
) {
654 switch (m_vars
[i
].type
.dst
) {
655 case c_data_ptr_array
:
667 if (m_vars
[i
].direction
.in
&&
669 !m_vars
[i
].flags
.is_static_dstn
) {
670 void *buf
= *static_cast<char**>(m_vars
[i
].into
) -
671 m_vars
[i
].mic_offset
-
672 (m_vars
[i
].flags
.is_stack_buf
?
673 0 : m_vars
[i
].offset
);
678 // decrement buffer reference count
680 c_offload_target_release_buffer_refs
);
683 c_offload_target_release_buffer_refs
);
692 case c_dv_data_slice
:
693 case c_dv_ptr_data_slice
:
694 if (m_vars
[i
].free_if
&&
695 m_vars
[i
].direction
.in
&&
696 !m_vars
[i
].flags
.is_static_dstn
) {
698 (m_vars
[i
].type
.dst
== c_dv_data_slice
||
699 m_vars
[i
].type
.dst
== c_dv_data
) ?
700 static_cast<ArrDesc
*>(m_vars
[i
].into
) :
701 *static_cast<ArrDesc
**>(m_vars
[i
].into
);
702 void *buf
= reinterpret_cast<char*>(dvp
->Base
) -
703 m_vars
[i
].mic_offset
-
709 // decrement buffer reference count
711 c_offload_target_release_buffer_refs
);
714 c_offload_target_release_buffer_refs
);
719 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
725 OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
726 m_out
.get_buffer_start(),
727 m_out
.get_buffer_size());
729 OFFLOAD_DEBUG_DUMP_BYTES(2,
730 m_out
.get_buffer_start(),
731 m_out
.get_buffer_size());
733 OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data
,
734 "Total copyout data sent to host: [%lld] bytes\n",
735 m_out
.get_tfr_size());
737 OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs
);
740 void __offload_target_init(void)
743 const char* env_var
= getenv(sep_monitor_env
);
744 if (env_var
!= 0 && *env_var
!= '\0') {
745 sep_monitor
= atoi(env_var
);
747 env_var
= getenv(sep_device_env
);
748 if (env_var
!= 0 && *env_var
!= '\0') {
749 sep_device
= env_var
;
751 #endif // SEP_SUPPORT
753 prefix
= report_get_message_str(c_report_mic
);
756 mic_frequency
= COIPerfGetCycleFrequency();
759 // User-visible offload API
761 int _Offload_number_of_devices(void)
763 return mic_engines_total
;
766 int _Offload_get_device_number(void)
771 int _Offload_get_physical_device_number(void)
774 EngineGetIndex(&index
);