4 *************************************************************************
6 * Copyright (C) 2010-2016, Intel Corporation
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * * Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * * Neither the name of Intel Corporation nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
30 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
33 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
36 * *********************************************************************
38 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
39 * a repository at cilkplus.org. Changes made to this file that are not
40 * submitted through the contribution process detailed at
41 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
42 * time that a new version is released. Changes only submitted to the
43 * GNU compiler collection or posted to the git repository at
44 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
47 * We welcome your contributions to this open source project. Thank you
48 * for your assistance in helping us improve Cilk Plus.
50 **************************************************************************
56 #include "local_state.h"
57 #include "signal_node.h"
58 #include "full_frame.h"
60 #include "cilk_malloc.h"
61 #include "reducer_impl.h"
62 #include "metacall_impl.h"
65 // On x86 processors (but not MIC processors), the compiler generated code to
66 // save the FP state (rounding mode and the like) before calling setjmp. We
67 // will need to restore that state when we resume.
69 # if defined(__i386__) || defined(__x86_64)
70 # define RESTORE_X86_FP_STATE
71 # endif // defined(__i386__) || defined(__x86_64)
74 // contains notification macros for VTune.
75 #include "cilk-ittnotify.h"
80 // On Cygwin, string.h doesnt declare strcasecmp if __STRICT_ANSI__ is defined
81 # undef __STRICT_ANSI__
87 #include "declare-alloca.h"
90 # include <sys/resource.h>
91 # include <sys/sysinfo.h>
95 # include <sys/resource.h>
96 // BSD does not define MAP_ANONYMOUS, but *does* define MAP_ANON. Aren't standards great!
97 # define MAP_ANONYMOUS MAP_ANON
101 # include <vxWorks.h>
102 # include <vxCpuLib.h>
105 struct global_sysdep_state
107 pthread_t
*threads
; ///< Array of pthreads for system workers
108 size_t pthread_t_size
; ///< for cilk_db
111 static void internal_enforce_global_visibility();
115 void __cilkrts_init_worker_sysdep(struct __cilkrts_worker
*w
)
117 ITT_SYNC_CREATE(w
, "Scheduler");
121 void __cilkrts_destroy_worker_sysdep(struct __cilkrts_worker
*w
)
126 void __cilkrts_init_global_sysdep(global_state_t
*g
)
128 internal_enforce_global_visibility();
130 __cilkrts_init_tls_variables();
132 CILK_ASSERT(g
->total_workers
>= g
->P
- 1);
133 g
->sysdep
= __cilkrts_malloc(sizeof (struct global_sysdep_state
));
134 CILK_ASSERT(g
->sysdep
);
135 g
->sysdep
->pthread_t_size
= sizeof (pthread_t
);
137 // TBD: Should this value be g->total_workers, or g->P?
138 // Need to check what we are using this field for.
139 g
->sysdep
->threads
= __cilkrts_malloc(sizeof(pthread_t
) * g
->total_workers
);
140 CILK_ASSERT(g
->sysdep
->threads
);
146 void __cilkrts_destroy_global_sysdep(global_state_t
*g
)
148 if (g
->sysdep
->threads
)
149 __cilkrts_free(g
->sysdep
->threads
);
150 __cilkrts_free(g
->sysdep
);
153 /*************************************************************
154 Creation of worker threads:
155 *************************************************************/
157 static void internal_run_scheduler_with_exceptions(__cilkrts_worker
*w
)
159 /* We assume the stack grows down. */
161 __cilkrts_cilkscreen_establish_c_stack(&var
- 1000000, &var
);
163 __cilkrts_run_scheduler_with_exceptions(w
);
169 * scheduler_thread_proc_for_system_worker
171 * Thread start function called when we start a new worker.
174 NON_COMMON
void* scheduler_thread_proc_for_system_worker(void *arg
)
177 __cilkrts_worker
*w
= (__cilkrts_worker
*)arg
;
179 #ifdef __INTEL_COMPILER
181 // Name the threads for Advisor. They don't want a worker number.
182 __itt_thread_set_name("Cilk Worker");
183 #endif // defined USE_ITTNOTIFY
184 #endif // defined __INTEL_COMPILER
186 /* Worker startup is serialized
187 status = pthread_mutex_lock(&__cilkrts_global_mutex);
188 CILK_ASSERT(status == 0);*/
189 CILK_ASSERT(w
->l
->type
== WORKER_SYSTEM
);
190 /*status = pthread_mutex_unlock(&__cilkrts_global_mutex);
191 CILK_ASSERT(status == 0);*/
193 __cilkrts_set_tls_worker(w
);
195 START_INTERVAL(w
, INTERVAL_IN_SCHEDULER
);
196 START_INTERVAL(w
, INTERVAL_IN_RUNTIME
);
197 START_INTERVAL(w
, INTERVAL_INIT_WORKER
);
199 // Create a cilk fiber for this worker on this thread.
200 START_INTERVAL(w
, INTERVAL_FIBER_ALLOCATE_FROM_THREAD
) {
201 w
->l
->scheduling_fiber
= cilk_fiber_allocate_from_thread();
202 cilk_fiber_set_owner(w
->l
->scheduling_fiber
, w
);
203 } STOP_INTERVAL(w
, INTERVAL_FIBER_ALLOCATE_FROM_THREAD
);
205 STOP_INTERVAL(w
, INTERVAL_INIT_WORKER
);
207 internal_run_scheduler_with_exceptions(w
);
209 START_INTERVAL(w
, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD
) {
210 // Deallocate the scheduling fiber. This operation reverses the
211 // effect cilk_fiber_allocate_from_thread() and must be done in this
212 // thread before it exits.
213 int ref_count
= cilk_fiber_deallocate_from_thread(w
->l
->scheduling_fiber
);
214 // Scheduling fibers should never have extra references to them.
215 // We only get extra references into fibers because of Windows
217 CILK_ASSERT(0 == ref_count
);
218 w
->l
->scheduling_fiber
= NULL
;
219 } STOP_INTERVAL(w
, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD
);
221 STOP_INTERVAL(w
, INTERVAL_IN_RUNTIME
);
222 STOP_INTERVAL(w
, INTERVAL_IN_SCHEDULER
);
227 * We are exporting a function with this name to Inspector?
228 * What a confusing name...
230 * This function is exported so Piersol's stack trace displays
231 * reasonable information.
233 void* __cilkrts_worker_stub(void* arg
)
235 return scheduler_thread_proc_for_system_worker(arg
);
238 // /* Return the lesser of the argument and the operating system
239 // limit on the number of workers (threads) that may or ought
241 // int sysdep_thread_limit(int n, int physical_cpus)
243 // /* On Linux thread creation fails somewhere short of the
244 // number of available processes. */
245 // struct rlimit lim;
247 // if (n > 256 + 2 * physical_cpus)
248 // n = 256 + 2 * physical_cpus;
250 // if (getrlimit(RLIMIT_NPROC, &lim) == 0 && lim.rlim_cur != RLIM_INFINITY)
252 // /* If the limit reads 0 or absurdly small, ignore it. */
253 // unsigned int maxproc = (lim.rlim_cur * 3 + 3) / 4;
254 // if (maxproc > 8 + 2 * physical_cpus && maxproc < n)
262 static void write_version_file (global_state_t
*, int);
264 /* Create n worker threads from base..top-1
266 static void create_threads(global_state_t
*g
, int base
, int top
)
268 // TBD(11/30/12): We want to insert code providing the option of
269 // pinning system workers to cores.
270 for (int i
= base
; i
< top
; i
++) {
271 int status
= pthread_create(&g
->sysdep
->threads
[i
],
273 scheduler_thread_proc_for_system_worker
,
276 __cilkrts_bug("Cilk runtime error: thread creation (%d) failed: %d\n", i
, status
);
280 #if PARALLEL_THREAD_CREATE
281 static int volatile threads_created
= 0;
283 // Create approximately half of the worker threads, and then become a worker
285 static void * create_threads_and_work (void * arg
)
287 global_state_t
*g
= ((__cilkrts_worker
*)arg
)->g
;
289 create_threads(g
, g
->P
/2, g
->P
-1);
290 // Let the initial thread know that we're done.
293 // Ideally this turns into a tail call that wipes out this stack frame.
294 return scheduler_thread_proc_for_system_worker(arg
);
297 void __cilkrts_start_workers(global_state_t
*g
, int n
)
299 g
->workers_running
= 1;
302 if (!g
->sysdep
->threads
)
305 // Do we actually have any threads to create?
308 #if PARALLEL_THREAD_CREATE
310 // We create (a rounded up) half of the threads, thread one creates the rest
311 int half_threads
= (n
+1)/2;
313 // Create the first thread passing a different thread function, so that it creates threads itself
314 status
= pthread_create(&g
->sysdep
->threads
[0], NULL
, create_threads_and_work
, g
->workers
[0]);
317 __cilkrts_bug("Cilk runtime error: thread creation (0) failed: %d\n", status
);
319 // Then the rest of the ones we have to create
320 create_threads(g
, 1, half_threads
);
322 // Now wait for the first created thread to tell us it's created all of its threads.
323 // We could maybe drop this a bit lower and overlap with write_version_file.
324 while (!threads_created
)
327 // Simply create all the threads linearly here.
328 create_threads(g
, 0, n
);
331 // write the version information to a file if the environment is configured
332 // for it (the function makes the check).
333 write_version_file(g
, n
);
339 void __cilkrts_stop_workers(global_state_t
*g
)
343 // Tell the workers to give up
347 if (g
->workers_running
== 0)
350 if (!g
->sysdep
->threads
)
353 /* Make them all runnable. */
355 CILK_ASSERT(g
->workers
[0]->l
->signal_node
);
356 signal_node_msg(g
->workers
[0]->l
->signal_node
, 1);
359 for (i
= 0; i
< g
->P
- 1; ++i
) {
363 sc_status
= pthread_join(g
->sysdep
->threads
[i
], &th_status
);
365 __cilkrts_bug("Cilk runtime error: thread join (%d) failed: %d\n", i
, sc_status
);
368 g
->workers_running
= 0;
376 * @brief Returns the stack address for resuming execution of sf.
378 * This method takes in the top of the stack to use, and then returns
379 * a properly aligned address for resuming execution of sf.
381 * @param sf - The stack frame we want to resume executing.
382 * @param stack_base - The top of the stack we want to execute sf on.
385 static char* get_sp_for_executing_sf(char* stack_base
,
387 __cilkrts_stack_frame
*sf
)
389 // The original calculation that had been done to correct the stack
390 // pointer when resuming execution.
392 // But this code was never getting called in the eng branch anyway...
394 // TBD(11/30/12): This logic needs to be revisited to make sure that
395 // we are doing the proper calculation in reserving space for outgoing
396 // arguments on all platforms and architectures.
398 /* Preserve outgoing argument space and stack alignment on steal.
399 Outgoing argument space is bounded by the difference between
400 stack and frame pointers. Some user code is known to rely on
401 16 byte alignment. Maintain 32 byte alignment for future
403 #define SMASK 31 /* 32 byte alignment */
405 char *fp
= FP(sf
), *sp
= SP(sf
);
406 int fp_align
= (int)(size_t)fp
& SMASK
;
407 ptrdiff_t space
= fp
- sp
;
409 fprintf(stderr
, "Here: fp = %p, sp = %p\n", fp
, sp
);
410 char *top_aligned
= (char *)((((size_t)stack_base
- SMASK
) & ~(size_t)SMASK
) | fp_align
);
411 /* Don't allocate an unreasonable amount of stack space. */
413 fprintf(stderr
, "Here: stack_base = %p, top_aligned=%p, space=%ld\n",
414 stack_base
, top_aligned
, space
);
416 space
= 32 + (space
& SMASK
);
417 else if (space
> 40 * 1024)
418 space
= 40 * 1024 + (space
& SMASK
);
420 return top_aligned
- space
;
424 #define PERFORM_FRAME_SIZE_CALCULATION 0
426 char* new_stack_base
= stack_base
- 256;
428 #if PERFORM_FRAME_SIZE_CALCULATION
429 // If there is a frame size saved, then use that as the
430 // correction instead of 256.
431 if (ff
->frame_size
> 0) {
432 if (ff
->frame_size
< 40*1024) {
433 new_stack_base
= stack_base
- ff
->frame_size
;
436 // If for some reason, our frame size calculation is giving us
437 // a number which is bigger than about 10 pages, then
438 // there is likely something wrong here? Don't allocate
439 // an unreasonable amount of space.
440 new_stack_base
= stack_base
- 40*1024;
445 // Whatever correction we choose, align the final stack top.
446 // This alignment seems to be necessary in particular on 32-bit
447 // Linux, and possibly Mac. (Is 32-byte alignment is sufficient?)
448 /* 256-byte alignment. Why not? */
449 const uintptr_t align_mask
= ~(256 -1);
450 new_stack_base
= (char*)((size_t)new_stack_base
& align_mask
);
451 return new_stack_base
;
454 char* sysdep_reset_jump_buffers_for_resume(cilk_fiber
* fiber
,
456 __cilkrts_stack_frame
*sf
)
459 fprintf(stderr
, "ThreadId=%p (fiber_proc_to_resume), Fiber %p. sf = %p. ff=%p, ff->sync_sp=%p\n",
460 cilkos_get_current_thread_id(),
467 void* sp
= (void*)get_sp_for_executing_sf(cilk_fiber_get_stack_base(fiber
), ff
, sf
);
470 /* Debugging: make sure stack is accessible. */
471 ((volatile char *)sp
)[-1];
473 // Adjust the saved_sp to account for the SP we're about to run. This will
474 // allow us to track fluctations in the stack
476 fprintf(stderr
, "ThreadId=%p, about to take stack ff=%p, sp=%p, sync_sp=%p\n",
477 cilkos_get_current_thread_id(),
482 __cilkrts_take_stack(ff
, sp
);
487 NORETURN
sysdep_longjmp_to_sf(char* new_sp
,
488 __cilkrts_stack_frame
*sf
,
489 full_frame
*ff_for_exceptions
/* UNUSED on Unix */)
493 "ThreadId=%p. resume user code, sf=%p, new_sp = %p, original SP(sf) = %p, FP(sf) = %p\n",
494 cilkos_get_current_thread_id(), sf
, new_sp
, SP(sf
), FP(sf
));
497 // Set the stack pointer.
500 #ifdef RESTORE_X86_FP_STATE
501 if (CILK_FRAME_VERSION_VALUE(sf
->flags
) >= 1) {
502 // Restore the floating point state that was set in this frame at the
505 // This feature is only available in ABI 1 or later frames, and only
506 // needed on IA64 or Intel64 processors.
507 restore_x86_fp_state(sf
);
511 CILK_LONGJMP(sf
->ctx
);
518 #include <sys/mman.h>
522 void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker
*w
,
524 __cilkrts_stack_frame
*sf
,
528 (void)w
; /* unused */
533 if (ff
->frame_size
== 0)
534 ff
->frame_size
= __cilkrts_get_frame_size(sf
);
536 // Null loot's sp for debugging purposes (so we'll know it's not valid)
541 /*************************************************************
543 *************************************************************/
548 #include "internal/cilk_version.h"
551 #include <sys/utsname.h>
558 /* (Non-static) dummy function is used by get_runtime_path() to find the path
559 * to the .so containing the Cilk runtime.
561 void dummy_function() { }
564 * Return a string with the path to the Cilk runtime, or "unknown" if the path
565 * cannot be determined.
567 static const char *get_runtime_path ()
569 // dladdr is a glibc extension. If it's available, use it to find the path
573 if (0 != dladdr(dummy_function
, &info
))
574 return info
.dli_fname
;
577 // If dladdr isn't available, or dladdr failed, we can't know the path for
586 /* if the environment variable, CILK_VERSION, is defined, writes the version
587 * information to the specified file.
588 * g is the global state that was just created, and n is the number of workers
589 * that were made (or requested from RML) for it.
591 static void write_version_file (global_state_t
*g
, int n
)
593 const char *env
; // environment variable.
594 char buf
[256]; // print buffer.
598 struct utsname sys_info
;
600 int err
; // error code from system calls.
602 // if CILK_VERSION is not set, or if the file cannot be opened, fail
603 // silently. Otherwise open the file for writing (or use stderr or stdout
604 // if the user specifies).
605 if (NULL
== (env
= getenv("CILK_VERSION"))) return;
606 if (0 == strcasecmp(env
, "stderr")) fp
= stderr
;
607 else if (0 == strcasecmp(env
, "stdout")) fp
= stdout
;
608 else if (NULL
== (fp
= fopen(env
, "w"))) return;
610 // get a string for the current time. E.g.,
611 // Cilk runtime initialized: Thu Jun 10 13:28:00 2010
613 strftime(buf
, 256, "%a %b %d %H:%M:%S %Y", localtime(&t
));
614 fprintf(fp
, "Cilk runtime initialized: %s\n", buf
);
616 // Print runtime info. E.g.,
617 // Cilk runtime information
618 // ========================
619 // Cilk version: 2.0.0 Build 9184
620 // Built by willtor on host willtor-desktop
621 // Compilation date: Thu Jun 10 13:27:42 2010
622 // Compiled with ICC V99.9.9, ICC build date: 20100610
624 fprintf(fp
, "\nCilk runtime information\n");
625 fprintf(fp
, "========================\n");
626 fprintf(fp
, "Cilk version: %d.%d.%d Build %d\n",
632 char * vxWorksVer
= VXWORKS_VERSION
;
633 fprintf(fp
, "Cross compiled for %s\n",vxWorksVer
);
634 // user and host not avalible if VxWorks cross compiled on windows build host
637 // User and host are not available for GCC builds
639 fprintf(fp
, "Built by "BUILD_USER
" on host "BUILD_HOST
"\n");
641 #endif // __VXWORKS__
643 // GCC has requested that this be removed for GCC builds
645 fprintf(fp
, "Compilation date: "__DATE__
" "__TIME__
"\n");
648 #ifdef __INTEL_COMPILER
649 // Compiled by the Intel C/C++ compiler.
650 fprintf(fp
, "Compiled with ICC V%d.%d.%d, ICC build date: %d\n",
651 __INTEL_COMPILER
/ 100,
652 (__INTEL_COMPILER
/ 10) % 10,
653 __INTEL_COMPILER
% 10,
654 __INTEL_COMPILER_BUILD_DATE
);
657 fprintf(fp
, "Compiled with GCC V%d.%d.%d\n",
660 __GNUC_PATCHLEVEL__
);
661 #endif // defined __INTEL_COMPILER
663 // Print system info. E.g.,
664 // System information
665 // ==================
666 // Cilk runtime path: /opt/icc/64/lib/libcilkrts.so.5
667 // System OS: Linux, release 2.6.28-19-generic
668 // System architecture: x86_64
670 fprintf(fp
, "\nSystem information\n");
671 fprintf(fp
, "==================\n");
672 fprintf(fp
, "Cilk runtime path: %s\n", get_runtime_path());
674 err
= uname(&sys_info
);
675 fprintf(fp
, "System OS: %s, release %s\n",
676 err
< 0 ? "unknown" : sys_info
.sysname
,
677 err
< 0 ? "?" : sys_info
.release
);
678 fprintf(fp
, "System architecture: %s\n",
679 err
< 0 ? "unknown" : sys_info
.machine
);
681 fprintf(fp
, "System OS: %s, release %s\n",
682 "VxWorks", RUNTIME_NAME RUNTIME_VERSION
);
683 fprintf(fp
, "System architecture: %s\n",
687 // Print thread info. E.g.,
688 // Thread information
689 // ==================
691 // Cilk workers requested: 8
692 // Thread creator: Private
694 fprintf(fp
, "\nThread information\n");
695 fprintf(fp
, "==================\n");
697 fprintf(fp
, "System cores: %d\n", (int)__builtin_popcount(vxCpuEnabledGet()));
699 fprintf(fp
, "System cores: %d\n", (int)sysconf(_SC_NPROCESSORS_ONLN
));
701 fprintf(fp
, "Cilk workers requested: %d\n", n
);
702 #if (PARALLEL_THREAD_CREATE)
703 fprintf(fp
, "Thread creator: Private (parallel)\n");
705 fprintf(fp
, "Thread creator: Private\n");
708 if (fp
!= stderr
&& fp
!= stdout
) fclose(fp
);
709 else fflush(fp
); // flush the handle buffer if it is stdout or stderr.
714 * __cilkrts_establish_c_stack
716 * Tell Cilkscreen about the user stack bounds.
718 * Note that the Cilk V1 runtime only included the portion of the stack from
719 * the entry into Cilk, down. We don't appear to be able to find that, but
720 * I think this will be sufficient.
723 void __cilkrts_establish_c_stack(void)
725 /* FIXME: Not implemented. */
727 /* TBD: Do we need this */
729 void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end);
732 MEMORY_BASIC_INFORMATION mbi;
734 r = VirtualQuery (&mbi,
738 __cilkrts_cilkscreen_establish_c_stack((char *)mbi.BaseAddress,
739 (char *)mbi.BaseAddress + mbi.RegionSize);
745 * internal_enforce_global_visibility
747 * Ensure global visibility of public symbols, for proper Cilk-TBB interop.
749 * If Cilk runtime is loaded dynamically, its symbols might remain unavailable
750 * for global search with dladdr; that might prevent TBB from finding Cilk
751 * in the process address space and initiating the interop protocol.
752 * The workaround is for the library to open itself with RTLD_GLOBAL flag.
755 static __attribute__((noinline
))
756 void internal_enforce_global_visibility()
759 void* handle
= dlopen( get_runtime_path(), RTLD_GLOBAL
|RTLD_LAZY
);
761 /* For proper reference counting, close the handle immediately. */
762 if( handle
) dlclose(handle
);
768 c-file-style:"bsd" **
770 indent-tabs-mode:nil **