PR libgomp/64635
[official-gcc.git] / liboffloadmic / runtime / offload_engine.h
blob501890c583428a4f21f9ed6806f458def2cf3575
1 /*
2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef OFFLOAD_ENGINE_H_INCLUDED
32 #define OFFLOAD_ENGINE_H_INCLUDED
34 #include <limits.h>
36 #include <list>
37 #include <set>
38 #include <map>
39 #include "offload_common.h"
40 #include "coi/coi_client.h"
42 // Address range
43 class MemRange {
44 public:
45 MemRange() : m_start(0), m_length(0) {}
46 MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
48 const void* start() const {
49 return m_start;
52 const void* end() const {
53 return static_cast<const char*>(m_start) + m_length;
56 uint64_t length() const {
57 return m_length;
60 // returns true if given range overlaps with another one
61 bool overlaps(const MemRange &o) const {
62 // Two address ranges A[start, end) and B[start,end) overlap
63 // if A.start < B.end and A.end > B.start.
64 return start() < o.end() && end() > o.start();
67 // returns true if given range contains the other range
68 bool contains(const MemRange &o) const {
69 return start() <= o.start() && o.end() <= end();
72 private:
73 const void* m_start;
74 uint64_t m_length;
77 // Data associated with a pointer variable
78 class PtrData {
79 public:
80 PtrData(const void *addr, uint64_t len) :
81 cpu_addr(addr, len), cpu_buf(0),
82 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
83 ref_count(0), is_static(false)
87 // Copy constructor
89 PtrData(const PtrData& ptr):
90 cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
91 mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
92 mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
93 ref_count(ptr.ref_count), is_static(ptr.is_static)
96 bool operator<(const PtrData &o) const {
97 // Variables are sorted by the CPU start address.
98 // Overlapping memory ranges are considered equal.
99 return (cpu_addr.start() < o.cpu_addr.start()) &&
100 !cpu_addr.overlaps(o.cpu_addr);
103 long add_reference() {
104 if (is_static) {
105 return LONG_MAX;
107 #ifndef TARGET_WINNT
108 return __sync_fetch_and_add(&ref_count, 1);
109 #else // TARGET_WINNT
110 return _InterlockedIncrement(&ref_count) - 1;
111 #endif // TARGET_WINNT
114 long remove_reference() {
115 if (is_static) {
116 return LONG_MAX;
118 #ifndef TARGET_WINNT
119 return __sync_sub_and_fetch(&ref_count, 1);
120 #else // TARGET_WINNT
121 return _InterlockedDecrement(&ref_count);
122 #endif // TARGET_WINNT
125 long get_reference() const {
126 if (is_static) {
127 return LONG_MAX;
129 return ref_count;
132 public:
133 // CPU address range
134 const MemRange cpu_addr;
136 // CPU and MIC buffers
137 COIBUFFER cpu_buf;
138 COIBUFFER mic_buf;
140 // placeholder for buffer address on mic
141 uint64_t mic_addr;
143 uint64_t alloc_disp;
145 // additional offset to pointer data on MIC for improving bandwidth for
146 // data which is not 4K aligned
147 uint32_t mic_offset;
149 // if true buffers are created from static memory
150 bool is_static;
151 mutex_t alloc_ptr_data_lock;
153 private:
154 // reference count for the entry
155 long ref_count;
158 typedef std::list<PtrData*> PtrDataList;
160 // Data associated with automatic variable
161 class AutoData {
162 public:
163 AutoData(const void *addr, uint64_t len) :
164 cpu_addr(addr, len), ref_count(0)
167 bool operator<(const AutoData &o) const {
168 // Variables are sorted by the CPU start address.
169 // Overlapping memory ranges are considered equal.
170 return (cpu_addr.start() < o.cpu_addr.start()) &&
171 !cpu_addr.overlaps(o.cpu_addr);
174 long add_reference() {
175 #ifndef TARGET_WINNT
176 return __sync_fetch_and_add(&ref_count, 1);
177 #else // TARGET_WINNT
178 return _InterlockedIncrement(&ref_count) - 1;
179 #endif // TARGET_WINNT
182 long remove_reference() {
183 #ifndef TARGET_WINNT
184 return __sync_sub_and_fetch(&ref_count, 1);
185 #else // TARGET_WINNT
186 return _InterlockedDecrement(&ref_count);
187 #endif // TARGET_WINNT
190 long get_reference() const {
191 return ref_count;
194 public:
195 // CPU address range
196 const MemRange cpu_addr;
198 private:
199 // reference count for the entry
200 long ref_count;
203 // Set of autimatic variables
204 typedef std::set<AutoData> AutoSet;
206 // Target image data
207 struct TargetImage
209 TargetImage(const char *_name, const void *_data, uint64_t _size,
210 const char *_origin, uint64_t _offset) :
211 name(_name), data(_data), size(_size),
212 origin(_origin), offset(_offset)
215 // library name
216 const char* name;
218 // contents and size
219 const void* data;
220 uint64_t size;
222 // file of origin and offset within that file
223 const char* origin;
224 uint64_t offset;
227 typedef std::list<TargetImage> TargetImageList;
229 // Data associated with persistent auto objects
230 struct PersistData
232 PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
233 stack_cpu_addr(addr), routine_id(routine_num)
235 stack_ptr_data = new PtrData(0, size);
237 // 1-st key value - begining of the stack at CPU
238 const void * stack_cpu_addr;
239 // 2-nd key value - identifier of routine invocation at CPU
240 uint64_t routine_id;
241 // corresponded PtrData; only stack_ptr_data->mic_buf is used
242 PtrData * stack_ptr_data;
243 // used to get offset of the variable in stack buffer
244 char * cpu_stack_addr;
247 typedef std::list<PersistData> PersistDataList;
249 // class representing a single engine
250 struct Engine {
251 friend void __offload_init_library_once(void);
252 friend void __offload_fini_library(void);
254 #define check_result(res, tag, ...) \
256 if (res == COI_PROCESS_DIED) { \
257 fini_process(true); \
258 exit(1); \
260 if (res != COI_SUCCESS) { \
261 __liboffload_error_support(tag, __VA_ARGS__); \
262 exit(1); \
266 int get_logical_index() const {
267 return m_index;
270 int get_physical_index() const {
271 return m_physical_index;
274 const COIPROCESS& get_process() const {
275 return m_process;
278 // initialize device
279 void init(void);
281 // add new library
282 void add_lib(const TargetImage &lib)
284 m_lock.lock();
285 m_ready = false;
286 m_images.push_back(lib);
287 m_lock.unlock();
290 COIRESULT compute(
291 const std::list<COIBUFFER> &buffers,
292 const void* data,
293 uint16_t data_size,
294 void* ret,
295 uint16_t ret_size,
296 uint32_t num_deps,
297 const COIEVENT* deps,
298 COIEVENT* event
301 #ifdef MYO_SUPPORT
302 // temporary workaround for blocking behavior for myoiLibInit/Fini calls
303 void init_myo(COIEVENT *event) {
304 COIRESULT res;
305 res = COI::PipelineRunFunction(get_pipeline(),
306 m_funcs[c_func_myo_init],
307 0, 0, 0, 0, 0, 0, 0, 0, 0,
308 event);
309 check_result(res, c_pipeline_run_func, m_index, res);
312 void fini_myo(COIEVENT *event) {
313 COIRESULT res;
314 res = COI::PipelineRunFunction(get_pipeline(),
315 m_funcs[c_func_myo_fini],
316 0, 0, 0, 0, 0, 0, 0, 0, 0,
317 event);
318 check_result(res, c_pipeline_run_func, m_index, res);
320 #endif // MYO_SUPPORT
323 // Memory association table
325 PtrData* find_ptr_data(const void *ptr) {
326 m_ptr_lock.lock();
327 PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
328 m_ptr_lock.unlock();
329 if (res == m_ptr_set.end()) {
330 return 0;
332 return const_cast<PtrData*>(res.operator->());
335 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
336 m_ptr_lock.lock();
337 std::pair<PtrSet::iterator, bool> res =
338 m_ptr_set.insert(PtrData(ptr, len));
339 PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
340 m_ptr_lock.unlock();
342 is_new = res.second;
343 if (is_new) {
344 // It's necessary to lock as soon as possible.
345 // unlock must be done at call site of insert_ptr_data at
346 // branch for is_new
347 ptr_data->alloc_ptr_data_lock.lock();
349 return ptr_data;
352 void remove_ptr_data(const void *ptr) {
353 m_ptr_lock.lock();
354 m_ptr_set.erase(PtrData(ptr, 0));
355 m_ptr_lock.unlock();
359 // Automatic variables
361 AutoData* find_auto_data(const void *ptr) {
362 AutoSet &auto_vars = get_auto_vars();
363 AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
364 if (res == auto_vars.end()) {
365 return 0;
367 return const_cast<AutoData*>(res.operator->());
370 AutoData* insert_auto_data(const void *ptr, uint64_t len) {
371 AutoSet &auto_vars = get_auto_vars();
372 std::pair<AutoSet::iterator, bool> res =
373 auto_vars.insert(AutoData(ptr, len));
374 return const_cast<AutoData*>(res.first.operator->());
377 void remove_auto_data(const void *ptr) {
378 get_auto_vars().erase(AutoData(ptr, 0));
382 // Signals
384 void add_signal(const void *signal, OffloadDescriptor *desc) {
385 m_signal_lock.lock();
386 m_signal_map[signal] = desc;
387 m_signal_lock.unlock();
390 OffloadDescriptor* find_signal(const void *signal, bool remove) {
391 OffloadDescriptor *desc = 0;
393 m_signal_lock.lock();
395 SignalMap::iterator it = m_signal_map.find(signal);
396 if (it != m_signal_map.end()) {
397 desc = it->second;
398 if (remove) {
399 m_signal_map.erase(it);
403 m_signal_lock.unlock();
405 return desc;
408 // stop device process
409 void fini_process(bool verbose);
411 // list of stacks active at the engine
412 PersistDataList m_persist_list;
414 private:
415 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
416 m_proc_number(0)
419 ~Engine() {
420 if (m_process != 0) {
421 fini_process(false);
425 // set indexes
426 void set_indexes(int logical_index, int physical_index) {
427 m_index = logical_index;
428 m_physical_index = physical_index;
431 // start process on device
432 void init_process();
434 void load_libraries(void);
435 void init_ptr_data(void);
437 // performs library intialization on the device side
438 pid_t init_device(void);
440 private:
441 // get pipeline associated with a calling thread
442 COIPIPELINE get_pipeline(void);
444 // get automatic vars set associated with the calling thread
445 AutoSet& get_auto_vars(void);
447 // destructor for thread data
448 static void destroy_thread_data(void *data);
450 private:
451 typedef std::set<PtrData> PtrSet;
452 typedef std::map<const void*, OffloadDescriptor*> SignalMap;
454 // device indexes
455 int m_index;
456 int m_physical_index;
458 // number of COI pipes created for the engine
459 long m_proc_number;
461 // process handle
462 COIPROCESS m_process;
464 // If false, device either has not been initialized or new libraries
465 // have been added.
466 bool m_ready;
467 mutex_t m_lock;
469 // List of libraries to be loaded
470 TargetImageList m_images;
472 // var table
473 PtrSet m_ptr_set;
474 mutex_t m_ptr_lock;
476 // signals
477 SignalMap m_signal_map;
478 mutex_t m_signal_lock;
480 // constants for accessing device function handles
481 enum {
482 c_func_compute = 0,
483 #ifdef MYO_SUPPORT
484 c_func_myo_init,
485 c_func_myo_fini,
486 #endif // MYO_SUPPORT
487 c_func_init,
488 c_func_var_table_size,
489 c_func_var_table_copy,
490 c_funcs_total
492 static const char* m_func_names[c_funcs_total];
494 // device function handles
495 COIFUNCTION m_funcs[c_funcs_total];
497 // int -> name mapping for device signals
498 static const int c_signal_max = 32;
499 static const char* c_signal_names[c_signal_max];
502 #endif // OFFLOAD_ENGINE_H_INCLUDED