2 Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #if defined(LINUX) || defined(FREEBSD)
32 #include <mm_malloc.h>
35 #include "offload_myo_host.h"
38 #include "offload_host.h"
39 //#include "offload_util.h"
41 #define MYO_VERSION1 "MYO_1.0"
43 extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
44 extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
47 #pragma weak __cilkrts_cilk_for_32
48 #pragma weak __cilkrts_cilk_for_64
49 #endif // TARGET_WINNT
51 static void __offload_myoProcessDeferredTables();
55 MyoWrapper() : m_lib_handle(0), m_is_available(false)
58 bool is_available() const {
59 return m_is_available
;
62 bool LoadLibrary(void);
64 // unloads the library
65 void UnloadLibrary(void) {
66 // if (m_lib_handle != 0) {
67 // DL_close(m_lib_handle);
72 // Wrappers for MYO client functions
73 void LibInit(void *arg
, void *func
) const {
74 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit
,
75 "%s(%p, %p)\n", __func__
, arg
, func
);
76 CheckResult(__func__
, m_lib_init(arg
, func
));
79 void LibFini(void) const {
80 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini
, "%s()\n", __func__
);
84 void* SharedMalloc(size_t size
) const {
85 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc
,
86 "%s(%lld)\n", __func__
, size
);
87 return m_shared_malloc(size
);
90 void SharedFree(void *ptr
) const {
91 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree
,
92 "%s(%p)\n", __func__
, ptr
);
96 void* SharedAlignedMalloc(size_t size
, size_t align
) const {
97 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc
,
98 "%s(%lld, %lld)\n", __func__
, size
, align
);
99 return m_shared_aligned_malloc(size
, align
);
102 void SharedAlignedFree(void *ptr
) const {
103 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree
,
104 "%s(%p)\n", __func__
, ptr
);
105 m_shared_aligned_free(ptr
);
108 void Acquire(void) const {
109 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire
,
111 CheckResult(__func__
, m_acquire());
114 void Release(void) const {
115 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease
,
117 CheckResult(__func__
, m_release());
120 void HostVarTablePropagate(void *table
, int num_entries
) const {
121 OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__
, table
, num_entries
);
122 CheckResult(__func__
, m_host_var_table_propagate(table
, num_entries
));
125 void HostFptrTableRegister(void *table
, int num_entries
,
127 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister
,
128 "%s(%p, %d, %d)\n", __func__
, table
,
129 num_entries
, ordered
);
130 CheckResult(__func__
,
131 m_host_fptr_table_register(table
, num_entries
, ordered
));
134 void RemoteThunkCall(void *thunk
, void *args
, int device
) {
135 OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__
, thunk
, args
,
137 CheckResult(__func__
, m_remote_thunk_call(thunk
, args
, device
));
140 MyoiRFuncCallHandle
RemoteCall(const char *func
, void *args
, int device
) const {
141 OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__
, func
, args
,
143 return m_remote_call(func
, args
, device
);
146 void GetResult(MyoiRFuncCallHandle handle
) const {
147 OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__
, handle
);
148 CheckResult(__func__
, m_get_result(handle
));
151 bool PostInitFuncSupported() const {
152 OFFLOAD_DEBUG_TRACE(4, "%s()\n", __func__
);
153 if (m_feature_available
) {
154 return m_feature_available(MYO_FEATURE_POST_LIB_INIT
) ==
161 void CreateVtableArena();
163 MyoArena
GetVtableArena()const {
164 return m_vtable_arena
;
168 MyoOwnershipType ownership
,
173 OFFLOAD_DEBUG_TRACE(4, "%s(%d, %d, %p)\n",
174 __func__
, ownership
, consistency
, arena
);
175 CheckResult(__func__
, m_arena_create(ownership
, consistency
, arena
));
178 void* SharedAlignedArenaMalloc(
184 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenamalloc
,
185 "%s(%u, %lld, %lld)\n",
186 __func__
, arena
, size
, align
);
187 return m_arena_aligned_malloc(arena
, size
, align
);
190 void* SharedAlignedArenaFree(
195 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenafree
,
196 "%s(%u, %p)\n", __func__
, arena
, ptr
);
197 return m_arena_aligned_free(arena
, ptr
);
204 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenaacquire
,
206 CheckResult(__func__
, m_arena_acquire(arena
));
213 OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenarelease
,
215 CheckResult(__func__
, m_arena_release(arena
));
219 void CheckResult(const char *func
, MyoError error
) const {
220 if (error
!= MYO_SUCCESS
) {
221 LIBOFFLOAD_ERROR(c_myowrapper_checkresult
, func
, error
);
229 int m_post_init_func
;
230 MyoArena m_vtable_arena
;
232 // pointers to functions from myo library
233 MyoError (*m_lib_init
)(void*, void*);
234 void (*m_lib_fini
)(void);
235 void* (*m_shared_malloc
)(size_t);
236 void (*m_shared_free
)(void*);
237 void* (*m_shared_aligned_malloc
)(size_t, size_t);
238 void (*m_shared_aligned_free
)(void*);
239 MyoError (*m_acquire
)(void);
240 MyoError (*m_release
)(void);
241 MyoError (*m_host_var_table_propagate
)(void*, int);
242 MyoError (*m_host_fptr_table_register
)(void*, int, int);
243 MyoError (*m_remote_thunk_call
)(void*, void*, int);
244 MyoiRFuncCallHandle (*m_remote_call
)(const char*, void*, int);
245 MyoError (*m_get_result
)(MyoiRFuncCallHandle
);
246 MyoError (*m_arena_create
)(MyoOwnershipType
, int, MyoArena
*);
247 void* (*m_arena_aligned_malloc
)(MyoArena
, size_t, size_t);
248 void* (*m_arena_aligned_free
)(MyoArena
, void*);
249 MyoError (*m_arena_acquire
)(MyoArena
);
250 MyoError (*m_arena_release
)(MyoArena
);
251 // Placeholder until MYO headers support enum type for feature
252 MyoError (*m_feature_available
)(int feature
);
255 DLL_LOCAL
bool MyoWrapper::LoadLibrary(void)
258 const char *lib_name
= "libmyo-client.so";
259 #else // TARGET_WINNT
260 const char *lib_name
= "myo-client.dll";
261 #endif // TARGET_WINNT
263 OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name
);
265 m_lib_handle
= DL_open(lib_name
);
266 if (m_lib_handle
== 0) {
267 OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n",
272 m_lib_init
= (MyoError (*)(void*, void*))
273 DL_sym(m_lib_handle
, "myoiLibInit", MYO_VERSION1
);
274 if (m_lib_init
== 0) {
275 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
281 m_lib_fini
= (void (*)(void))
282 DL_sym(m_lib_handle
, "myoiLibFini", MYO_VERSION1
);
283 if (m_lib_fini
== 0) {
284 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
290 m_shared_malloc
= (void* (*)(size_t))
291 DL_sym(m_lib_handle
, "myoSharedMalloc", MYO_VERSION1
);
292 if (m_shared_malloc
== 0) {
293 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
299 m_shared_free
= (void (*)(void*))
300 DL_sym(m_lib_handle
, "myoSharedFree", MYO_VERSION1
);
301 if (m_shared_free
== 0) {
302 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
308 m_shared_aligned_malloc
= (void* (*)(size_t, size_t))
309 DL_sym(m_lib_handle
, "myoSharedAlignedMalloc", MYO_VERSION1
);
310 if (m_shared_aligned_malloc
== 0) {
311 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
312 "myoSharedAlignedMalloc");
317 m_shared_aligned_free
= (void (*)(void*))
318 DL_sym(m_lib_handle
, "myoSharedAlignedFree", MYO_VERSION1
);
319 if (m_shared_aligned_free
== 0) {
320 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
321 "myoSharedAlignedFree");
326 m_acquire
= (MyoError (*)(void))
327 DL_sym(m_lib_handle
, "myoAcquire", MYO_VERSION1
);
328 if (m_acquire
== 0) {
329 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
335 m_release
= (MyoError (*)(void))
336 DL_sym(m_lib_handle
, "myoRelease", MYO_VERSION1
);
337 if (m_release
== 0) {
338 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
344 m_host_var_table_propagate
= (MyoError (*)(void*, int))
345 DL_sym(m_lib_handle
, "myoiHostVarTablePropagate", MYO_VERSION1
);
346 if (m_host_var_table_propagate
== 0) {
347 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
348 "myoiHostVarTablePropagate");
353 m_host_fptr_table_register
= (MyoError (*)(void*, int, int))
354 DL_sym(m_lib_handle
, "myoiHostFptrTableRegister", MYO_VERSION1
);
355 if (m_host_fptr_table_register
== 0) {
356 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
357 "myoiHostFptrTableRegister");
362 m_remote_thunk_call
= (MyoError (*)(void*, void*, int))
363 DL_sym(m_lib_handle
, "myoiRemoteThunkCall", MYO_VERSION1
);
364 if (m_remote_thunk_call
== 0) {
365 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
366 "myoiRemoteThunkCall");
371 m_remote_call
= (MyoiRFuncCallHandle (*)(const char*, void*, int))
372 DL_sym(m_lib_handle
, "myoiRemoteCall", MYO_VERSION1
);
373 if (m_remote_call
== 0) {
374 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
380 m_get_result
= (MyoError (*)(MyoiRFuncCallHandle
))
381 DL_sym(m_lib_handle
, "myoiGetResult", MYO_VERSION1
);
382 if (m_get_result
== 0) {
383 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
389 m_arena_create
= (MyoError (*)(MyoOwnershipType
, int, MyoArena
*))
390 DL_sym(m_lib_handle
, "myoArenaCreate", MYO_VERSION1
);
391 if (m_arena_create
== 0) {
392 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
398 m_arena_aligned_malloc
= (void* (*)(MyoArena
, size_t, size_t))
399 DL_sym(m_lib_handle
, "myoArenaAlignedMalloc", MYO_VERSION1
);
400 if (m_arena_aligned_malloc
== 0) {
401 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
402 "myoArenaAlignedMalloc");
407 m_arena_aligned_free
= (void* (*)(MyoArena
, void*))
408 DL_sym(m_lib_handle
, "myoArenaAlignedFree", MYO_VERSION1
);
409 if (m_arena_aligned_free
== 0) {
410 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
411 "myoArenaAlignedFree");
416 m_arena_acquire
= (MyoError (*)(MyoArena
))
417 DL_sym(m_lib_handle
, "myoArenaAcquire", MYO_VERSION1
);
418 if (m_acquire
== 0) {
419 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
425 m_arena_release
= (MyoError (*)(MyoArena
))
426 DL_sym(m_lib_handle
, "myoArenaRelease", MYO_VERSION1
);
427 if (m_release
== 0) {
428 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
434 // Check for "feature-available" API added in MPSS 3.3.
435 // Not finding it is not an error.
436 m_feature_available
= (MyoError (*)(int))
437 DL_sym(m_lib_handle
, "myoiSupportsFeature", MYO_VERSION1
);
438 if (m_feature_available
== 0) {
439 OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
440 "myoiSupportsFeature");
443 OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
445 // Create arena if supported
447 OFFLOAD_DEBUG_TRACE(3, "Vtable arena created\n");
449 m_is_available
= true;
454 static bool myo_is_available
;
455 static MyoWrapper myo_wrapper
;
457 void MyoWrapper::CreateVtableArena()
459 MyoArena
* vtable_arena
;
461 // Check if this MYO supports arenas for vtables
462 if (myo_wrapper
.PostInitFuncSupported()) {
463 // Create arena for vtables
464 vtable_arena
= (MyoArena
*)myo_wrapper
.SharedMalloc(sizeof(MyoArena
));
465 myo_wrapper
.ArenaCreate(
466 MYO_ARENA_OURS
, MYO_NO_CONSISTENCY
, vtable_arena
);
467 m_vtable_arena
= *vtable_arena
;
468 OFFLOAD_DEBUG_TRACE(4, "created arena = %d\n", m_vtable_arena
);
476 MyoTable(SharedTableEntry
*tab
, int len
) : var_tab(tab
), var_tab_len(len
)
479 SharedTableEntry
* var_tab
;
483 typedef std::list
<MyoTable
> MyoTableList
;
484 static MyoTableList __myo_table_list
;
485 static mutex_t __myo_table_lock
;
486 static bool __myo_tables
= false;
488 static void __offload_myo_shared_vtable_process(SharedTableEntry
*entry
);
489 static void __offload_myo_shared_table_process(SharedTableEntry
*entry
);
490 static void __offload_myo_shared_init_table_process(InitTableEntry
* entry
);
491 static void __offload_myo_fptr_table_process(FptrTableEntry
*entry
);
492 static void __offload_propagate_shared_vars();
494 static void __offload_myoLoadLibrary_once(void)
496 if (__offload_init_library()) {
497 myo_wrapper
.LoadLibrary();
501 static bool __offload_myoLoadLibrary(void)
503 OFFLOAD_DEBUG_TRACE(4, "__offload_myoLoadLibrary\n");
504 static OffloadOnceControl ctrl
= OFFLOAD_ONCE_CONTROL_INIT
;
505 __offload_run_once(&ctrl
, __offload_myoLoadLibrary_once
);
507 return myo_wrapper
.is_available();
510 static void __offload_myoInit_once(void)
512 if (!__offload_myoLoadLibrary()) {
516 // initialize all devices
517 for (int i
= 0; i
< mic_engines_total
; i
++) {
518 mic_engines
[i
].init();
521 // load and initialize MYO library
522 OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
524 COIEVENT events
[MIC_ENGINES_MAX
];
526 // One entry per device +
527 // A pair of entries for the Host postInit func +
528 // A pair of entries for the MIC postInit func +
530 MyoiUserParams params
[MIC_ENGINES_MAX
+5];
532 // Load target library to all devices and
533 // create libinit parameters for all devices
534 for (int i
= 0; i
< mic_engines_total
; i
++) {
535 mic_engines
[i
].init_myo(&events
[i
]);
537 params
[i
].type
= MYOI_USERPARAMS_DEVID
;
538 params
[i
].nodeid
= mic_engines
[i
].get_physical_index() + 1;
539 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
540 i
, params
[i
].type
, params
[i
].nodeid
);
543 // Check if V2 myoLibInit is available
544 if (myo_wrapper
.PostInitFuncSupported()) {
545 // Set the host post libInit function indicator
546 params
[mic_engines_total
].type
=
547 MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC
;
548 params
[mic_engines_total
].nodeid
=
549 MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE
;
550 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
552 params
[mic_engines_total
].type
, params
[mic_engines_total
].nodeid
);
554 // Set the host post libInit host function address
555 ((MyoiUserParamsPostLibInit
*)(¶ms
[mic_engines_total
+1]))->
556 postLibInitHostFuncAddress
=
557 (void (*)())&__offload_propagate_shared_vars
;
558 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n",
560 ((MyoiUserParamsPostLibInit
*)(¶ms
[mic_engines_total
+1]))->
561 postLibInitHostFuncAddress
);
563 // Set the target post libInit function indicator
564 params
[mic_engines_total
+2].type
=
565 MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC
;
566 params
[mic_engines_total
+2].nodeid
=
567 MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES
;
569 // Set the target post libInit target function name
570 ((MyoiUserParamsPostLibInit
*)(¶ms
[mic_engines_total
+3]))->
571 postLibInitRemoveFuncName
= "--vtable_initializer--";
572 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n",
574 ((MyoiUserParamsPostLibInit
*)(¶ms
[mic_engines_total
+1]))->
575 postLibInitRemoveFuncName
);
577 params
[mic_engines_total
+4].type
= MYOI_USERPARAMS_LAST_MSG
;
578 params
[mic_engines_total
+4].nodeid
= 0;
579 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
581 params
[mic_engines_total
+4].type
,
582 params
[mic_engines_total
+4].nodeid
);
584 params
[mic_engines_total
].type
= MYOI_USERPARAMS_LAST_MSG
;
585 params
[mic_engines_total
].nodeid
= 0;
586 OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
588 params
[mic_engines_total
].type
, params
[mic_engines_total
].nodeid
);
591 // initialize myo runtime on host
592 myo_wrapper
.LibInit(params
, 0);
594 // wait for the target init calls to finish
596 res
= COI::EventWait(mic_engines_total
, events
, -1, 1, 0, 0);
597 if (res
!= COI_SUCCESS
) {
598 LIBOFFLOAD_ERROR(c_event_wait
, res
);
602 myo_is_available
= true;
603 OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available
);
605 OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
608 static bool __offload_myoInit(void)
610 static OffloadOnceControl ctrl
= OFFLOAD_ONCE_CONTROL_INIT
;
611 __offload_run_once(&ctrl
, __offload_myoInit_once
);
613 // Check if using V1 myoLibInit
614 if (!myo_wrapper
.PostInitFuncSupported()) {
615 __offload_propagate_shared_vars();
618 return myo_is_available
;
621 static void __offload_propagate_shared_vars()
623 // Propagate pending shared var tables
625 mutex_locker_t
locker(__myo_table_lock
);
628 // Give tables with MYO so it can propagate to target
629 for(MyoTableList::const_iterator it
= __myo_table_list
.begin();
630 it
!= __myo_table_list
.end(); ++it
) {
632 for (SharedTableEntry
*entry
= it
->var_tab
;
633 entry
->varName
!= MYO_TABLE_END_MARKER(); entry
++) {
634 if (entry
->varName
== 0) {
637 myo_wrapper
.HostVarTablePropagate(entry
, 1);
638 OFFLOAD_DEBUG_TRACE(2, "HostVarTablePropagate(%s, 1)\n",
641 #else // TARGET_WINNT
642 myo_wrapper
.HostVarTablePropagate(it
->var_tab
,
644 #endif // TARGET_WINNT
647 __myo_table_list
.clear();
648 __myo_tables
= false;
653 static bool shared_table_entries(
654 SharedTableEntry
*entry
657 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
659 for (; entry
->varName
!= MYO_TABLE_END_MARKER(); entry
++) {
661 if (entry
->varName
== 0) {
664 #endif // TARGET_WINNT
672 static bool fptr_table_entries(
673 FptrTableEntry
*entry
676 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
678 for (; entry
->funcName
!= MYO_TABLE_END_MARKER(); entry
++) {
680 if (entry
->funcName
== 0) {
683 #endif // TARGET_WINNT
691 extern "C" void __offload_myoRegisterTables(
692 InitTableEntry
* init_table
,
693 SharedTableEntry
*shared_table
,
694 FptrTableEntry
*fptr_table
697 // check whether we need to initialize MYO library. It is
698 // initialized only if at least one myo table is not empty
699 if (shared_table_entries(shared_table
) || fptr_table_entries(fptr_table
)) {
700 // make sure myo library is loaded
701 __offload_myoLoadLibrary();
704 __offload_myo_shared_table_process(shared_table
);
705 __offload_myo_fptr_table_process(fptr_table
);
706 __offload_myo_shared_init_table_process(init_table
);
710 extern "C" bool __offload_myoProcessTables(
712 MYOInitTableList::Node
*init_table
,
713 MYOVarTableList::Node
*shared_table
,
714 MYOVarTableList::Node
*shared_vtable
,
715 MYOFuncTableList::Node
*fptr_table
718 OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__
);
720 // Collect the tables in this .dll/.so
721 __offload_myoRegisterTables1(
722 init_table
, shared_table
, shared_vtable
, fptr_table
);
724 // Now check what type of module we are dealing with
725 if (__offload_target_image_is_executable(image
)) {
726 OFFLOAD_DEBUG_TRACE(2, "Main encountered\n");
727 OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n");
728 // MYO tables across dlls have been collected
729 // Now init MYO and process the tables
730 __offload_myoProcessDeferredTables();
731 // Return true to indicate that atexit needs to be calld by ofldbegin
734 // This is a shared library, either auto-loaded or dynamically loaded
735 // If __target_exe is set, then main has started running
736 if (__target_exe
!= 0) {
737 // Main is running: this is a dynamic load of a shared library
738 // Finish processing the tables in this library
739 OFFLOAD_DEBUG_TRACE(2,
740 "Dynamically loaded shared library encountered\n");
741 OFFLOAD_DEBUG_TRACE(2,
742 "MYO initialization not deferred\n");
743 __offload_myoProcessDeferredTables();
745 // Main is not running: this is an auto-loaded shared library
746 // Tables have been collected, nothing else to do
747 OFFLOAD_DEBUG_TRACE(2,
748 "Auto-loaded shared library encountered\n");
749 OFFLOAD_DEBUG_TRACE(2, "Deferring initialization of MYO\n");
755 // Process contents of all Var tables
756 void MYOVarTableList::process()
758 OFFLOAD_DEBUG_TRACE(2, "Process MYO Var tables:\n");
762 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
763 __offload_myo_shared_table_process(
764 (SharedTableEntry
*)n
->table
.entries
);
766 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
773 // Process contents of all Var tables
774 void MYOVarTableList::process_vtable()
776 OFFLOAD_DEBUG_TRACE(2, "Process MYO Vtable tables:\n");
780 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
781 __offload_myo_shared_vtable_process(
782 (SharedTableEntry
*)n
->table
.entries
);
784 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
791 // Process contents of all Func tables
792 void MYOFuncTableList::process()
794 OFFLOAD_DEBUG_TRACE(2, "Process MYO Func tables:\n");
798 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
799 __offload_myo_fptr_table_process(
800 (FptrTableEntry
*)n
->table
.entries
);
802 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
809 // Process contents of all Init tables
810 void MYOInitTableList::process()
812 OFFLOAD_DEBUG_TRACE(2, "Process MYO Init tables:\n");
816 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
817 __offload_myo_shared_init_table_process(
818 (InitTableEntry
*)n
->table
.entries
);
820 for (Node
*n
= m_head
; n
!= 0; n
= n
->next
) {
827 static void __offload_myoProcessDeferredTables()
829 OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__
);
831 // Debug dumps of MYO tables
832 if (console_enabled
>= 2) {
833 __offload_myo_var_tables
.dump();
834 __offload_myo_vtable_tables
.dump();
835 __offload_myo_func_tables
.dump();
836 __offload_myo_init_tables
.dump();
839 if (!__offload_myo_var_tables
.is_empty() ||
840 !__offload_myo_vtable_tables
.is_empty() ||
841 !__offload_myo_func_tables
.is_empty() ||
842 !__offload_myo_init_tables
.is_empty())
844 OFFLOAD_DEBUG_TRACE(3, "MYO usage detected in program\n");
846 // Make sure myo library is loaded
847 __offload_myoLoadLibrary();
848 OFFLOAD_DEBUG_TRACE(3, "Initialized MYO\n");
850 __offload_myo_var_tables
.process();
851 __offload_myo_vtable_tables
.process_vtable();
852 __offload_myo_func_tables
.process();
853 __offload_myo_init_tables
.process();
854 OFFLOAD_DEBUG_TRACE(3, "Finished processing MYO tables\n");
856 OFFLOAD_DEBUG_TRACE(3,
857 "MYO tables are empty; Will not initialize MYO\n");
861 DLL_LOCAL
void __offload_myoFini(void)
863 if (myo_is_available
) {
864 OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__
);
866 COIEVENT events
[MIC_ENGINES_MAX
];
868 // kick off myoiLibFini calls on all devices
869 for (int i
= 0; i
< mic_engines_total
; i
++) {
870 mic_engines
[i
].fini_myo(&events
[i
]);
873 // cleanup myo runtime on host
874 myo_wrapper
.LibFini();
876 // wait for the target fini calls to finish
878 res
= COI::EventWait(mic_engines_total
, events
, -1, 1, 0, 0);
879 if (res
!= COI_SUCCESS
) {
880 LIBOFFLOAD_ERROR(c_event_wait
, res
);
886 static void __offload_myo_shared_table_process(
887 SharedTableEntry
*entry
890 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
892 SharedTableEntry
*start
= entry
;
895 // allocate shared memory for vars
896 for (; entry
->varName
!= MYO_TABLE_END_MARKER(); entry
++) {
898 if (entry
->varName
== 0) {
899 OFFLOAD_DEBUG_TRACE(4,
900 "skip registering a NULL MyoSharedTable entry\n");
903 #endif // TARGET_WINNT
905 OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n",
906 entry
->varName
, entry
);
908 // Invoke the function to create shared memory
909 reinterpret_cast<void(*)(void)>(entry
->sharedAddr
)();
913 // and table to the list if it is not empty
915 mutex_locker_t
locker(__myo_table_lock
);
916 __myo_table_list
.push_back(MyoTable(start
, entries
));
921 static void __offload_myo_shared_vtable_process(
922 SharedTableEntry
*entry
925 SharedTableEntry
*start
= entry
;
928 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
930 // allocate shared memory for vtables
931 for (; entry
->varName
!= MYO_TABLE_END_MARKER(); entry
++) {
933 if (entry
->varName
== 0) {
934 OFFLOAD_DEBUG_TRACE(4,
935 "skip registering a NULL MyoSharedVTable entry\n");
938 #endif // TARGET_WINNT
940 OFFLOAD_DEBUG_TRACE(4,
941 "registering MyoSharedVTable entry for %s @%p\n",
942 entry
->varName
, entry
);
944 // Invoke the function to create shared memory
945 reinterpret_cast<void(*)(MyoArena
)>(entry
->sharedAddr
)(
946 myo_wrapper
.GetVtableArena());
950 // add table to the list if it is not empty
952 mutex_locker_t
locker(__myo_table_lock
);
953 __myo_table_list
.push_back(MyoTable(start
, entries
));
958 void __offload_myo_shared_init_table_process(InitTableEntry
* entry
)
960 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
963 for (; entry
->funcName
!= MYO_TABLE_END_MARKER(); entry
++) {
964 if (entry
->funcName
== 0) {
965 OFFLOAD_DEBUG_TRACE(4,
966 "skip registering a NULL MyoSharedInit entry\n");
970 // Invoke the function to init the shared memory
971 OFFLOAD_DEBUG_TRACE(4, "execute MyoSharedInit routine for %s\n",
973 entry
->func(myo_wrapper
.GetVtableArena());
975 #else // TARGET_WINNT
976 for (; entry
->func
!= 0; entry
++) {
977 // Invoke the function to init the shared memory
978 entry
->func(myo_wrapper
.GetVtableArena());
980 #endif // TARGET_WINNT
983 static void __offload_myo_fptr_table_process(
984 FptrTableEntry
*entry
987 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, entry
);
989 FptrTableEntry
*start
= entry
;
992 for (; entry
->funcName
!= MYO_TABLE_END_MARKER(); entry
++) {
994 if (entry
->funcName
== 0) {
995 OFFLOAD_DEBUG_TRACE(4,
996 "skip registering a NULL MyoFptrTable entry\n");
999 #endif // TARGET_WINNT
1001 if (!myo_wrapper
.is_available()) {
1002 *(static_cast<void**>(entry
->localThunkAddr
)) = entry
->funcAddr
;
1005 OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n",
1006 entry
->funcName
, entry
);
1009 if (myo_wrapper
.is_available()) {
1010 myo_wrapper
.HostFptrTableRegister(entry
, 1, false);
1012 #endif // TARGET_WINNT
1017 #ifndef TARGET_WINNT
1018 if (myo_wrapper
.is_available() && entries
> 0) {
1019 myo_wrapper
.HostFptrTableRegister(start
, entries
, false);
1021 #endif // TARGET_WINNT
1024 extern "C" int __offload_myoIsAvailable(int target_number
)
1026 OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__
, target_number
);
1028 if (target_number
>= -2) {
1029 bool is_default_number
= (target_number
== -2);
1031 if (__offload_myoInit()) {
1032 if (target_number
>= 0) {
1033 // User provided the device number
1034 int num
= target_number
% mic_engines_total
;
1036 // reserve device in ORSL
1037 target_number
= ORSL::reserve(num
) ? num
: -1;
1040 // try to use device 0
1041 target_number
= ORSL::reserve(0) ? 0 : -1;
1044 // make sure device is initialized
1045 if (target_number
>= 0) {
1046 mic_engines
[target_number
].init();
1054 if (target_number
< 0 && !is_default_number
) {
1055 LIBOFFLOAD_ERROR(c_device_is_not_available
);
1060 LIBOFFLOAD_ERROR(c_invalid_device_number
);
1064 return target_number
;
1067 extern "C" void __offload_myoiRemoteIThunkCall(
1073 OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__
, thunk
, arg
,
1076 myo_wrapper
.Release();
1077 myo_wrapper
.RemoteThunkCall(thunk
, arg
, target_number
);
1078 myo_wrapper
.Acquire();
1080 ORSL::release(target_number
);
1083 extern "C" void* _Offload_shared_malloc(size_t size
)
1085 OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__
, size
);
1087 if (__offload_myoLoadLibrary()) {
1088 return myo_wrapper
.SharedMalloc(size
);
1091 return malloc(size
);
1095 extern "C" void _Offload_shared_free(void *ptr
)
1097 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, ptr
);
1099 if (__offload_myoLoadLibrary()) {
1100 myo_wrapper
.SharedFree(ptr
);
1107 extern "C" void* _Offload_shared_aligned_malloc(size_t size
, size_t align
)
1109 OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__
, size
, align
);
1111 if (__offload_myoLoadLibrary()) {
1112 return myo_wrapper
.SharedAlignedMalloc(size
, align
);
1115 if (align
< sizeof(void*)) {
1116 align
= sizeof(void*);
1118 return _mm_malloc(size
, align
);
1122 extern "C" void _Offload_shared_aligned_free(void *ptr
)
1124 OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__
, ptr
);
1126 if (__offload_myoLoadLibrary()) {
1127 myo_wrapper
.SharedAlignedFree(ptr
);
1134 extern "C" void _Offload_shared_arena_create(
1135 MyoOwnershipType ownership
,
1140 OFFLOAD_DEBUG_TRACE(3, "%s(%d, %d, %p)\n",
1141 __func__
, ownership
, consistency
, arena
);
1143 if (__offload_myoLoadLibrary()) {
1144 myo_wrapper
.ArenaCreate(ownership
, consistency
, arena
);
1148 extern "C" void* _Offload_shared_aligned_arena_malloc(
1154 OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)\n",
1155 __func__
, arena
, size
, align
);
1157 if (__offload_myoLoadLibrary()) {
1158 void *p
= myo_wrapper
.SharedAlignedArenaMalloc(arena
, size
, align
);
1159 OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)->%p\n",
1160 __func__
, arena
, size
, align
, p
);
1164 if (align
< sizeof(void*)) {
1165 align
= sizeof(void*);
1167 return _mm_malloc(size
, align
);
1171 extern "C" void _Offload_shared_aligned_arena_free(
1176 OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__
, arena
, ptr
);
1178 if (__offload_myoLoadLibrary()) {
1179 myo_wrapper
.SharedAlignedArenaFree(arena
, ptr
);
1186 extern "C" void _Offload_shared_arena_acquire(
1190 OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__
, arena
);
1192 if (__offload_myoLoadLibrary()) {
1193 myo_wrapper
.ArenaAcquire(arena
);
1197 extern "C" void _Offload_shared_arena_release(
1201 OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__
, arena
);
1203 if (__offload_myoLoadLibrary()) {
1204 myo_wrapper
.ArenaRelease(arena
);
1208 extern "C" void __intel_cilk_for_32_offload(
1210 void (*copy_constructor
)(void*, void*),
1213 void *closure_object
,
1215 unsigned int grain_size
)
1217 OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__
);
1219 target_number
= __offload_myoIsAvailable(target_number
);
1220 if (target_number
>= 0) {
1228 args
= (struct S
*) _Offload_shared_malloc(sizeof(struct S
) + size
);
1230 LIBOFFLOAD_ERROR(c_malloc
);
1233 args
->M3
= grain_size
;
1235 if (copy_constructor
== 0) {
1236 memcpy(args
->closure
, closure_object
, size
);
1239 copy_constructor(args
->closure
, closure_object
);
1242 myo_wrapper
.Release();
1243 myo_wrapper
.GetResult(
1244 myo_wrapper
.RemoteCall("__intel_cilk_for_32_offload",
1245 args
, target_number
)
1247 myo_wrapper
.Acquire();
1249 _Offload_shared_free(args
);
1251 ORSL::release(target_number
);
1254 __cilkrts_cilk_for_32(raddr
,
1261 extern "C" void __intel_cilk_for_64_offload(
1263 void (*copy_constructor
)(void*, void*),
1266 void *closure_object
,
1268 uint64_t grain_size
)
1270 OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__
);
1272 target_number
= __offload_myoIsAvailable(target_number
);
1273 if (target_number
>= 0) {
1281 args
= (struct S
*) _Offload_shared_malloc(sizeof(struct S
) + size
);
1283 LIBOFFLOAD_ERROR(c_malloc
);
1286 args
->M3
= grain_size
;
1288 if (copy_constructor
== 0) {
1289 memcpy(args
->closure
, closure_object
, size
);
1292 copy_constructor(args
->closure
, closure_object
);
1295 myo_wrapper
.Release();
1296 myo_wrapper
.GetResult(
1297 myo_wrapper
.RemoteCall("__intel_cilk_for_64_offload", args
,
1300 myo_wrapper
.Acquire();
1302 _Offload_shared_free(args
);
1304 ORSL::release(target_number
);
1307 __cilkrts_cilk_for_64(raddr
,