2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef OFFLOAD_ENGINE_H_INCLUDED
32 #define OFFLOAD_ENGINE_H_INCLUDED
39 #include "offload_common.h"
40 #include "coi/coi_client.h"
45 MemRange() : m_start(0), m_length(0) {}
46 MemRange(const void *addr
, uint64_t len
) : m_start(addr
), m_length(len
) {}
48 const void* start() const {
52 const void* end() const {
53 return static_cast<const char*>(m_start
) + m_length
;
56 uint64_t length() const {
60 // returns true if given range overlaps with another one
61 bool overlaps(const MemRange
&o
) const {
62 // Two address ranges A[start, end) and B[start,end) overlap
63 // if A.start < B.end and A.end > B.start.
64 return start() < o
.end() && end() > o
.start();
67 // returns true if given range contains the other range
68 bool contains(const MemRange
&o
) const {
69 return start() <= o
.start() && o
.end() <= end();
77 // Data associated with a pointer variable
80 PtrData(const void *addr
, uint64_t len
) :
81 cpu_addr(addr
, len
), cpu_buf(0),
82 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
83 ref_count(0), is_static(false)
89 PtrData(const PtrData
& ptr
):
90 cpu_addr(ptr
.cpu_addr
), cpu_buf(ptr
.cpu_buf
),
91 mic_addr(ptr
.mic_addr
), alloc_disp(ptr
.alloc_disp
),
92 mic_buf(ptr
.mic_buf
), mic_offset(ptr
.mic_offset
),
93 ref_count(ptr
.ref_count
), is_static(ptr
.is_static
)
96 bool operator<(const PtrData
&o
) const {
97 // Variables are sorted by the CPU start address.
98 // Overlapping memory ranges are considered equal.
99 return (cpu_addr
.start() < o
.cpu_addr
.start()) &&
100 !cpu_addr
.overlaps(o
.cpu_addr
);
103 long add_reference() {
108 return __sync_fetch_and_add(&ref_count
, 1);
109 #else // TARGET_WINNT
110 return _InterlockedIncrement(&ref_count
) - 1;
111 #endif // TARGET_WINNT
114 long remove_reference() {
119 return __sync_sub_and_fetch(&ref_count
, 1);
120 #else // TARGET_WINNT
121 return _InterlockedDecrement(&ref_count
);
122 #endif // TARGET_WINNT
125 long get_reference() const {
134 const MemRange cpu_addr
;
136 // CPU and MIC buffers
140 // placeholder for buffer address on mic
145 // additional offset to pointer data on MIC for improving bandwidth for
146 // data which is not 4K aligned
149 // if true buffers are created from static memory
151 mutex_t alloc_ptr_data_lock
;
154 // reference count for the entry
158 typedef std::list
<PtrData
*> PtrDataList
;
160 // Data associated with automatic variable
163 AutoData(const void *addr
, uint64_t len
) :
164 cpu_addr(addr
, len
), ref_count(0)
167 bool operator<(const AutoData
&o
) const {
168 // Variables are sorted by the CPU start address.
169 // Overlapping memory ranges are considered equal.
170 return (cpu_addr
.start() < o
.cpu_addr
.start()) &&
171 !cpu_addr
.overlaps(o
.cpu_addr
);
174 long add_reference() {
176 return __sync_fetch_and_add(&ref_count
, 1);
177 #else // TARGET_WINNT
178 return _InterlockedIncrement(&ref_count
) - 1;
179 #endif // TARGET_WINNT
182 long remove_reference() {
184 return __sync_sub_and_fetch(&ref_count
, 1);
185 #else // TARGET_WINNT
186 return _InterlockedDecrement(&ref_count
);
187 #endif // TARGET_WINNT
190 long get_reference() const {
196 const MemRange cpu_addr
;
199 // reference count for the entry
203 // Set of autimatic variables
204 typedef std::set
<AutoData
> AutoSet
;
209 TargetImage(const char *_name
, const void *_data
, uint64_t _size
,
210 const char *_origin
, uint64_t _offset
) :
211 name(_name
), data(_data
), size(_size
),
212 origin(_origin
), offset(_offset
)
222 // file of origin and offset within that file
227 typedef std::list
<TargetImage
> TargetImageList
;
229 // Data associated with persistent auto objects
232 PersistData(const void *addr
, uint64_t routine_num
, uint64_t size
) :
233 stack_cpu_addr(addr
), routine_id(routine_num
)
235 stack_ptr_data
= new PtrData(0, size
);
237 // 1-st key value - begining of the stack at CPU
238 const void * stack_cpu_addr
;
239 // 2-nd key value - identifier of routine invocation at CPU
241 // corresponded PtrData; only stack_ptr_data->mic_buf is used
242 PtrData
* stack_ptr_data
;
243 // used to get offset of the variable in stack buffer
244 char * cpu_stack_addr
;
247 typedef std::list
<PersistData
> PersistDataList
;
249 // class representing a single engine
251 friend void __offload_init_library_once(void);
252 friend void __offload_fini_library(void);
254 #define check_result(res, tag, ...) \
256 if (res == COI_PROCESS_DIED) { \
257 fini_process(true); \
260 if (res != COI_SUCCESS) { \
261 __liboffload_error_support(tag, __VA_ARGS__); \
266 int get_logical_index() const {
270 int get_physical_index() const {
271 return m_physical_index
;
274 const COIPROCESS
& get_process() const {
282 void add_lib(const TargetImage
&lib
)
286 m_images
.push_back(lib
);
291 const std::list
<COIBUFFER
> &buffers
,
297 const COIEVENT
* deps
,
302 // temporary workaround for blocking behavior for myoiLibInit/Fini calls
303 void init_myo(COIEVENT
*event
) {
305 res
= COI::PipelineRunFunction(get_pipeline(),
306 m_funcs
[c_func_myo_init
],
307 0, 0, 0, 0, 0, 0, 0, 0, 0,
309 check_result(res
, c_pipeline_run_func
, m_index
, res
);
312 void fini_myo(COIEVENT
*event
) {
314 res
= COI::PipelineRunFunction(get_pipeline(),
315 m_funcs
[c_func_myo_fini
],
316 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 check_result(res
, c_pipeline_run_func
, m_index
, res
);
320 #endif // MYO_SUPPORT
323 // Memory association table
325 PtrData
* find_ptr_data(const void *ptr
) {
327 PtrSet::iterator res
= m_ptr_set
.find(PtrData(ptr
, 0));
329 if (res
== m_ptr_set
.end()) {
332 return const_cast<PtrData
*>(res
.operator->());
335 PtrData
* insert_ptr_data(const void *ptr
, uint64_t len
, bool &is_new
) {
337 std::pair
<PtrSet::iterator
, bool> res
=
338 m_ptr_set
.insert(PtrData(ptr
, len
));
339 PtrData
* ptr_data
= const_cast<PtrData
*>(res
.first
.operator->());
344 // It's necessary to lock as soon as possible.
345 // unlock must be done at call site of insert_ptr_data at
347 ptr_data
->alloc_ptr_data_lock
.lock();
352 void remove_ptr_data(const void *ptr
) {
354 m_ptr_set
.erase(PtrData(ptr
, 0));
359 // Automatic variables
361 AutoData
* find_auto_data(const void *ptr
) {
362 AutoSet
&auto_vars
= get_auto_vars();
363 AutoSet::iterator res
= auto_vars
.find(AutoData(ptr
, 0));
364 if (res
== auto_vars
.end()) {
367 return const_cast<AutoData
*>(res
.operator->());
370 AutoData
* insert_auto_data(const void *ptr
, uint64_t len
) {
371 AutoSet
&auto_vars
= get_auto_vars();
372 std::pair
<AutoSet::iterator
, bool> res
=
373 auto_vars
.insert(AutoData(ptr
, len
));
374 return const_cast<AutoData
*>(res
.first
.operator->());
377 void remove_auto_data(const void *ptr
) {
378 get_auto_vars().erase(AutoData(ptr
, 0));
384 void add_signal(const void *signal
, OffloadDescriptor
*desc
) {
385 m_signal_lock
.lock();
386 m_signal_map
[signal
] = desc
;
387 m_signal_lock
.unlock();
390 OffloadDescriptor
* find_signal(const void *signal
, bool remove
) {
391 OffloadDescriptor
*desc
= 0;
393 m_signal_lock
.lock();
395 SignalMap::iterator it
= m_signal_map
.find(signal
);
396 if (it
!= m_signal_map
.end()) {
399 m_signal_map
.erase(it
);
403 m_signal_lock
.unlock();
408 // stop device process
409 void fini_process(bool verbose
);
411 // list of stacks active at the engine
412 PersistDataList m_persist_list
;
415 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
420 if (m_process
!= 0) {
426 void set_indexes(int logical_index
, int physical_index
) {
427 m_index
= logical_index
;
428 m_physical_index
= physical_index
;
431 // start process on device
434 void load_libraries(void);
435 void init_ptr_data(void);
437 // performs library intialization on the device side
438 pid_t
init_device(void);
441 // get pipeline associated with a calling thread
442 COIPIPELINE
get_pipeline(void);
444 // get automatic vars set associated with the calling thread
445 AutoSet
& get_auto_vars(void);
447 // destructor for thread data
448 static void destroy_thread_data(void *data
);
451 typedef std::set
<PtrData
> PtrSet
;
452 typedef std::map
<const void*, OffloadDescriptor
*> SignalMap
;
456 int m_physical_index
;
458 // number of COI pipes created for the engine
462 COIPROCESS m_process
;
464 // If false, device either has not been initialized or new libraries
469 // List of libraries to be loaded
470 TargetImageList m_images
;
477 SignalMap m_signal_map
;
478 mutex_t m_signal_lock
;
480 // constants for accessing device function handles
486 #endif // MYO_SUPPORT
488 c_func_var_table_size
,
489 c_func_var_table_copy
,
492 static const char* m_func_names
[c_funcs_total
];
494 // device function handles
495 COIFUNCTION m_funcs
[c_funcs_total
];
497 // int -> name mapping for device signals
498 static const int c_signal_max
= 32;
499 static const char* c_signal_names
[c_signal_max
];
502 #endif // OFFLOAD_ENGINE_H_INCLUDED