2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 \brief The parts of the runtime library used only on the host
35 #ifndef OFFLOAD_HOST_H_INCLUDED
36 #define OFFLOAD_HOST_H_INCLUDED
40 #endif // TARGET_WINNT
41 #include "offload_common.h"
42 #include "offload_util.h"
43 #include "offload_engine.h"
44 #include "offload_env.h"
45 #include "offload_orsl.h"
46 #include "coi/coi_client.h"
49 DLL_LOCAL
extern Engine
* mic_engines
;
50 DLL_LOCAL
extern uint32_t mic_engines_total
;
52 // DMA channel count used by COI and set via
53 // OFFLOAD_DMA_CHANNEL_COUNT environment variable
54 DLL_LOCAL
extern uint32_t mic_dma_channel_count
;
56 //! The target image is packed as follows.
57 /*! 1. 8 bytes containing the size of the target binary */
58 /*! 2. a null-terminated string which is the binary name */
59 /*! 3. <size> number of bytes that are the contents of the image */
60 /*! The address of symbol __offload_target_image
61 is the address of this structure. */
63 int64_t size
; //!< Size in bytes of the target binary name and contents
64 char data
[]; //!< The name and contents of the target image
67 // The offload descriptor.
68 class OffloadDescriptor
71 enum OmpAsyncLastEventType
{
72 c_last_not
, // not last event
73 c_last_write
, // the last event that is write
74 c_last_read
, // the last event that is read
75 c_last_runfunc
// the last event that is runfunction
80 _Offload_status
*status
,
83 OffloadHostTimerData
* timer_data
85 m_device(mic_engines
[index
== -1 ? 0 : index
% mic_engines_total
]),
86 m_is_mandatory(is_mandatory
),
87 m_is_openmp(is_openmp
),
91 m_num_in_dependencies(0),
92 m_p_in_dependencies(0),
95 m_in_deps_allocated(0),
98 m_out_deps_allocated(0),
102 m_timer_data(timer_data
),
103 m_out_with_preallocated(false),
104 m_preallocated_alloc(false),
105 m_traceback_called(false),
109 m_omp_async_last_event_type(c_last_not
)
111 m_wait_all_devices
= index
== -1;
116 if (m_in_deps
!= 0) {
119 if (m_out_deps
!= 0) {
122 if (m_func_desc
!= 0) {
131 bool offload(const char *name
, bool is_empty
,
132 VarDesc
*vars
, VarDesc2
*vars2
, int vars_total
,
133 const void **waits
, int num_waits
, const void **signal
,
134 int entry_id
, const void *stack_addr
,
135 OffloadFlags offload_flags
);
137 bool offload_finish(bool is_traceback
);
141 OffloadHostTimerData
* get_timer_data() const {
145 void set_stream(_Offload_stream stream
) {
149 _Offload_stream
get_stream() {
153 Engine
& get_device() {
161 void set_signal(const void* signal
) {
163 m_signal
= const_cast<void*>(signal
);
168 uint32_t m_event_count
;
172 bool offload_wrap(const char *name
, bool is_empty
,
173 VarDesc
*vars
, VarDesc2
*vars2
, int vars_total
,
174 const void **waits
, int num_waits
, const void **signal
,
175 int entry_id
, const void *stack_addr
,
176 OffloadFlags offload_flags
);
177 bool wait_dependencies(const void **waits
, int num_waits
,
178 _Offload_stream stream
);
179 bool setup_descriptors(VarDesc
*vars
, VarDesc2
*vars2
, int vars_total
,
180 int entry_id
, const void *stack_addr
);
181 bool setup_misc_data(const char *name
);
182 bool send_pointer_data(bool is_async
, void* info
);
183 bool send_noncontiguous_pointer_data(
189 uint32_t in_deps_amount
,
192 bool receive_noncontiguous_pointer_data(
196 uint64_t &received_data
,
197 uint32_t in_deps_amount
,
201 bool gather_copyin_data();
203 bool compute(void *);
205 bool receive_pointer_data(bool is_async
, bool first_run
, void * info
);
206 bool scatter_copyout_data();
208 bool find_ptr_data(PtrData
* &ptr_data
, void *base
, int64_t disp
,
209 int64_t length
, bool is_targptr
,
210 bool error_does_not_exist
= true);
212 void find_device_ptr( int64_t* &device_ptr
,
215 bool alloc_ptr_data(PtrData
* &ptr_data
, void *base
, int64_t disp
,
216 int64_t length
, int64_t alloc_disp
, int align
,
217 bool is_targptr
, bool is_prealloc
, bool pin
);
218 bool create_preallocated_buffer(PtrData
* ptr_data
, void *base
);
219 bool init_static_ptr_data(PtrData
*ptr_data
);
220 bool init_mic_address(PtrData
*ptr_data
);
221 bool offload_stack_memory_manager(
222 const void * stack_begin
,
226 bool thread_specific_function_locals
,
228 char *get_this_threads_cpu_stack_addr(
229 const void * stack_begin
,
231 bool thread_specific_function_locals
);
232 PtrData
*get_this_threads_mic_stack_addr(
233 const void * stack_begin
,
235 bool thread_specific_function_locals
);
236 bool nullify_target_stack(COIBUFFER targ_buf
, uint64_t size
);
238 bool gen_var_descs_for_pointer_array(int i
);
240 void get_stream_in_dependencies(uint32_t &in_deps_amount
,
243 void report_coi_error(error_types msg
, COIRESULT res
);
244 _Offload_result
translate_coi_error(COIRESULT res
) const;
246 void setup_omp_async_info();
248 void setup_use_device_ptr(int i
);
250 void register_event_call_back(void (*)(
254 const COIEVENT
*event
,
257 void register_omp_event_call_back(const COIEVENT
*event
, const void *info
);
260 typedef std::list
<COIBUFFER
> BufferList
;
262 // extra data associated with each variable descriptor
271 CeanReadRanges
*read_rng_src
;
272 NonContigDesc
*noncont_desc
;
274 CeanReadRanges
*read_rng_dst
;
275 int64_t ptr_arr_offset
;
277 OmpAsyncLastEventType omp_last_event_type
;
278 int64_t pointer_offset
;
283 template<typename T
> class ReadArrElements
{
294 bool read_next(bool flag
)
299 if (!get_next_range(ranges
, &offset
)) {
304 // all contiguous elements are over
305 else if (count
!= 0) {
314 val
= (T
)get_el_value(base
, offset
, el_size
);
315 length_cur
-= el_size
;
317 is_empty
= length_cur
== 0;
322 CeanReadRanges
* ranges
;
333 // ptr_data for persistent auto objects
334 PtrData
* m_stack_ptr_data
;
335 PtrDataList m_destroy_stack
;
340 // true for offload_wait target(mic) stream(0)
341 bool m_wait_all_devices
;
343 // if true offload is mandatory
346 // if true offload has openmp origin
347 const bool m_is_openmp
;
349 // The Marshaller for the inputs of the offloaded region.
352 // The Marshaller for the outputs of the offloaded region.
355 // List of buffers that are passed to dispatch call
356 BufferList m_compute_buffers
;
358 // List of buffers that need to be destroyed at the end of offload
359 BufferList m_destroy_buffers
;
361 // Variable descriptors
363 VarExtra
* m_vars_extra
;
366 // Pointer to a user-specified status variable
367 _Offload_status
*m_status
;
369 // Function descriptor
370 FunctionDescriptor
* m_func_desc
;
371 uint32_t m_func_desc_size
;
373 // Buffer for transferring copyin/copyout data
374 COIBUFFER m_inout_buf
;
379 uint32_t m_in_deps_total
;
380 uint32_t m_in_deps_allocated
;
381 COIEVENT
*m_out_deps
;
382 uint32_t m_out_deps_total
;
383 uint32_t m_out_deps_allocated
;
385 // 2 variables defines input dependencies for current COI API.
386 // The calls to routines as BufferWrite/PipelineRunFunction/BufferRead
387 // is supposed to have input dependencies.
388 // 2 variables below defines the number and vector of dependencies
389 // in every current moment of offload.
390 // So any phase of offload can use its values as input dependencies
391 // for the COI API that the phase calls.
392 // It means that all phases (of Write, RunFunction,Read) must keep
393 // the variables correct to be used by following phase.
394 // If some consequent offloads are connected (i.e. by the same stream)
395 // the final 2 variables of the offload is used as initial inputs
396 // for the next offload.
397 uint32_t m_num_in_dependencies
;
398 COIEVENT
*m_p_in_dependencies
;
401 _Offload_stream m_stream
;
407 OffloadHostTimerData
*m_timer_data
;
409 // copyin/copyout data length
410 uint64_t m_in_datalen
;
411 uint64_t m_out_datalen
;
413 // a boolean value calculated in setup_descriptors. If true we need to do
414 // a run function on the target. Otherwise it may be optimized away.
415 bool m_need_runfunction
;
417 // initialized value of m_need_runfunction;
418 // is used to recognize offload_transfer
419 bool m_initial_need_runfunction
;
421 // a Boolean value set to true when OUT clauses with preallocated targetptr
422 // is encountered to indicate that call receive_pointer_data needs to be
423 // invoked again after call to scatter_copyout_data.
424 bool m_out_with_preallocated
;
426 // a Boolean value set to true if an alloc_if(1) is used with preallocated
427 // targetptr to indicate the need to scatter_copyout_data even for
429 bool m_preallocated_alloc
;
431 // a Boolean value set to true if traceback routine is called
432 bool m_traceback_called
;
434 OmpAsyncLastEventType m_omp_async_last_event_type
;
437 // Initialization types for MIC
438 enum OffloadInitType
{
439 c_init_on_start
, // all devices before entering main
440 c_init_on_offload
, // single device before starting the first offload
441 c_init_on_offload_all
// all devices before starting the first offload
444 // Determines if MIC code is an executable or a shared library
445 extern "C" bool __offload_target_image_is_executable(const void *target_image
);
447 // Initializes library and registers specified offload image.
448 extern "C" bool __offload_register_image(const void* image
);
449 extern "C" void __offload_unregister_image(const void* image
);
451 // Registers asynchronous task completion callback
452 extern "C" void __offload_register_task_callback(void (*cb
)(void *));
454 // Initializes offload runtime library.
455 DLL_LOCAL
extern int __offload_init_library(void);
457 // thread data for associating pipelines with threads
458 DLL_LOCAL
extern pthread_key_t mic_thread_key
;
460 // location of offload_main executable
461 // To be used if the main application has no offload and is not built
462 // with -offload but dynamic library linked in has offload pragma
463 DLL_LOCAL
extern char* mic_device_main
;
465 // Environment variables for devices
466 DLL_LOCAL
extern MicEnvVar mic_env_vars
;
469 DLL_LOCAL
extern uint64_t cpu_frequency
;
471 // LD_LIBRARY_PATH for KNC libraries
472 DLL_LOCAL
extern char* knc_library_path
;
474 // LD_LIBRARY_PATH for KNL libraries
475 DLL_LOCAL
extern char* knl_library_path
;
477 // stack size for target
478 DLL_LOCAL
extern uint32_t mic_stack_size
;
480 // Preallocated memory size for buffers on MIC
481 DLL_LOCAL
extern uint64_t mic_buffer_size
;
483 // Preallocated 4K page memory size for buffers on MIC
484 DLL_LOCAL
extern uint64_t mic_4k_buffer_size
;
486 // Preallocated 2M page memory size for buffers on MIC
487 DLL_LOCAL
extern uint64_t mic_2m_buffer_size
;
489 // Setting controlling inout proxy
490 DLL_LOCAL
extern bool mic_proxy_io
;
491 DLL_LOCAL
extern char* mic_proxy_fs_root
;
493 // Threshold for creating buffers with large pages
494 DLL_LOCAL
extern uint64_t __offload_use_2mb_buffers
;
496 // offload initialization type
497 DLL_LOCAL
extern OffloadInitType __offload_init_type
;
499 // Device number to offload to when device is not explicitly specified.
500 DLL_LOCAL
extern int __omp_device_num
;
503 DLL_LOCAL
extern TargetImage
* __target_exe
;
505 // is true if last loaded image is dll
506 DLL_LOCAL
extern bool __current_image_is_dll
;
507 // is true if myo library is loaded when dll is loaded
508 DLL_LOCAL
extern bool __myo_init_in_so
;
512 // Called by the offload runtime after initialization of offload infrastructure
513 // has been completed.
514 extern "C" void __dbg_target_so_loaded();
516 // Called by the offload runtime when the offload infrastructure is about to be
517 // shut down, currently at application exit.
518 extern "C" void __dbg_target_so_unloaded();
520 // Null-terminated string containing path to the process image of the hosting
521 // application (offload_main)
522 #define MAX_TARGET_NAME 512
523 extern "C" char __dbg_target_exe_name
[MAX_TARGET_NAME
];
525 // Integer specifying the process id
526 extern "C" pid_t __dbg_target_so_pid
;
528 // Integer specifying the 0-based device number
529 extern "C" int __dbg_target_id
;
531 // Set to non-zero by the host-side debugger to enable offload debugging
533 extern "C" int __dbg_is_attached
;
535 // Major version of the debugger support API
536 extern "C" const int __dbg_api_major_version
;
538 // Minor version of the debugger support API
539 extern "C" const int __dbg_api_minor_version
;
541 #endif // OFFLOAD_HOST_H_INCLUDED