* function.c (dump_stack_clash_frame_info): New function.
[official-gcc.git] / libcilkrts / runtime / os-unix.c
blobc419fb68abfac00bc08c8365ffda680e3ac7cce6
1 /* os-unix.c -*-C-*-
3 *************************************************************************
5 * Copyright (C) 2009-2016, Intel Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * *********************************************************************
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
44 * not tracked.
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
48 **************************************************************************/
50 #include "os.h"
51 #include "bug.h"
52 #include "cilk_malloc.h"
53 #include <internal/abi.h>
55 #if defined __linux__
56 # include <sys/sysinfo.h>
57 # include <sys/syscall.h>
59 #elif defined __APPLE__
60 # include <sys/sysctl.h>
61 // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
63 #elif defined __VXWORKS__
64 # include <vxWorks.h>
65 # include <vxCpuLib.h>
66 # include <taskLib.h>
68 // Solaris
69 #elif defined __sun__ && defined __svr4__
70 # include <sched.h>
72 // OSes we know about which don't require any additional files
73 #elif defined __CYGWIN__ || \
74 defined __DragonFly__ || \
75 defined __FreeBSD__ || \
76 defined __GNU__
77 // No additional include files
79 #else
80 # error "Unsupported OS"
81 #endif
83 #include <stdarg.h>
84 #include <stddef.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <pthread.h>
90 #include <sys/types.h>
94 // /* Thread-local storage */
95 // #ifdef _WIN32
96 // typedef unsigned cilkos_tls_key_t;
97 // #else
98 // typedef pthread_key_t cilkos_tls_key_t;
99 // #endif
100 // cilkos_tls_key_t cilkos_allocate_tls_key();
101 // void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
102 // void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
104 #if !defined CILK_WORKER_TLS
105 static int cilk_keys_defined;
106 static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
108 #if SUPPORT_GET_CURRENT_FIBER > 0
109 static pthread_key_t fiber_key;
110 #endif
112 static void *serial_worker;
115 // This destructor is called when a pthread dies to deallocate the
116 // pedigree node.
117 static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
119 __cilkrts_pedigree* pedigree_tls
120 = (__cilkrts_pedigree*)pedigree_tls_ptr;
121 if (pedigree_tls) {
122 // Assert that we have either one or two nodes
123 // left in the pedigree chain.
124 // If we have more, then something is going wrong...
125 CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
126 __cilkrts_free(pedigree_tls);
130 void __cilkrts_init_tls_variables(void)
132 int status;
133 /* This will be called once in serial execution before any
134 Cilk parallelism so we do not need to worry about races
135 on cilk_keys_defined. */
136 if (cilk_keys_defined)
137 return;
138 status = pthread_key_create(&worker_key, NULL);
139 CILK_ASSERT (status == 0);
140 status = pthread_key_create(&pedigree_leaf_key,
141 __cilkrts_pedigree_leaf_destructor);
142 CILK_ASSERT (status == 0);
143 status = pthread_key_create(&tbb_interop_key, NULL);
144 CILK_ASSERT (status == 0);
146 #if SUPPORT_GET_CURRENT_FIBER > 0
147 status = pthread_key_create(&fiber_key, NULL);
148 CILK_ASSERT (status == 0);
149 #endif
150 cilk_keys_defined = 1;
151 return;
154 COMMON_SYSDEP
155 void* cilkos_get_current_thread_id(void)
157 return (void*)pthread_self();
161 CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
163 if (__builtin_expect(cilk_keys_defined, 1))
164 return (__cilkrts_worker *)pthread_getspecific(worker_key);
165 else
166 return serial_worker;
170 CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
172 return (__cilkrts_worker *)pthread_getspecific(worker_key);
175 COMMON_SYSDEP
176 __cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
178 if (__builtin_expect(cilk_keys_defined, 1))
179 return (__cilk_tbb_stack_op_thunk *)
180 pthread_getspecific(tbb_interop_key);
181 else
182 return 0;
185 // This counter should be updated atomically.
186 static int __cilkrts_global_pedigree_tls_counter = -1;
188 COMMON_SYSDEP
189 __cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
191 __cilkrts_pedigree *pedigree_tls;
192 if (__builtin_expect(cilk_keys_defined, 1)) {
193 pedigree_tls =
194 (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
196 else {
197 return 0;
200 if (!pedigree_tls && create_new) {
201 // This call creates two nodes, X and Y.
202 // X == pedigree_tls[0] is the leaf node, which gets copied
203 // in and out of a user worker w when w binds and unbinds.
204 // Y == pedigree_tls[1] is the root node,
205 // which is a constant node that represents the user worker
206 // thread w.
207 pedigree_tls = (__cilkrts_pedigree*)
208 __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
210 // This call sets the TLS pointer to the new node.
211 __cilkrts_set_tls_pedigree_leaf(pedigree_tls);
213 pedigree_tls[0].rank = 0;
214 pedigree_tls[0].parent = &pedigree_tls[1];
216 // Create Y, whose rank begins as the global counter value.
217 pedigree_tls[1].rank =
218 __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
220 pedigree_tls[1].parent = NULL;
221 CILK_ASSERT(pedigree_tls[1].rank != -1);
223 return pedigree_tls;
226 #if SUPPORT_GET_CURRENT_FIBER > 0
227 COMMON_SYSDEP
228 cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
230 if (__builtin_expect(cilk_keys_defined, 1))
231 return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
232 else
233 return NULL;
235 #endif
237 COMMON_SYSDEP
238 void __cilkrts_set_tls_worker(__cilkrts_worker *w)
240 if (__builtin_expect(cilk_keys_defined, 1)) {
241 int status;
242 status = pthread_setspecific(worker_key, w);
243 CILK_ASSERT (status == 0);
244 return;
246 else
248 serial_worker = w;
252 COMMON_SYSDEP
253 void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
255 if (__builtin_expect(cilk_keys_defined, 1)) {
256 int status;
257 status = pthread_setspecific(tbb_interop_key, t);
258 CILK_ASSERT (status == 0);
259 return;
261 abort();
264 COMMON_SYSDEP
265 void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
267 if (__builtin_expect(cilk_keys_defined, 1)) {
268 int status;
269 status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
270 CILK_ASSERT (status == 0);
271 return;
273 abort();
276 #if SUPPORT_GET_CURRENT_FIBER > 0
277 COMMON_SYSDEP
278 void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
280 if (__builtin_expect(cilk_keys_defined, 1)) {
281 int status;
282 status = pthread_setspecific(fiber_key, fiber);
283 CILK_ASSERT (status == 0);
284 return;
286 abort();
288 #endif
290 #else
291 void __cilkrts_init_tls_variables(void)
294 #endif
296 #if defined (__linux__) && ! defined(__ANDROID__)
298 * Get the thread id, rather than the pid. In the case of MIC offload, it's
299 * possible that we have multiple threads entering Cilk, and each has a
300 * different affinity.
302 static pid_t linux_gettid(void)
304 return syscall(SYS_gettid);
308 * On Linux we look at the thread affinity mask and restrict ourself to one
309 * thread for each of the hardware contexts to which we are bound.
310 * Therefore if user does
311 * % taskset 0-1 cilkProgram
312 * # restrict execution to hardware contexts zero and one
313 * the Cilk program will only use two threads even if it is running on a
314 * machine that has 32 hardware contexts.
315 * This is the right thing to do, because the threads are restricted to two
316 * hardware contexts by the affinity mask set by taskset, and if we were to
317 * create extra threads they would simply oversubscribe the hardware resources
318 * we can use.
319 * This is particularly important on MIC in offload mode, where the affinity
320 * mask is set by the offload library to force the offload code away from
321 * cores that have offload support threads running on them.
323 static int linux_get_affinity_count ()
325 long system_cores = sysconf(_SC_NPROCESSORS_ONLN);
326 int affinity_cores = 0;
328 #if defined HAVE_PTHREAD_AFFINITY_NP
330 #if defined (CPU_ALLOC_SIZE) && ! defined(DONT_USE_CPU_ALLOC_SIZE)
331 // Statically allocated cpu_set_t's max out at 1024 cores. If
332 // CPU_ALLOC_SIZE is available, use it to support large numbers of cores
333 size_t cpusetsize = CPU_ALLOC_SIZE(system_cores);
334 cpu_set_t *process_mask = (cpu_set_t *)__cilkrts_malloc(cpusetsize);
336 // Get the affinity mask for this thread
337 int err = pthread_getaffinity_np(pthread_self(),
338 cpusetsize,
339 process_mask);
341 // Count the available cores.
342 if (0 == err)
343 affinity_cores = CPU_COUNT_S(cpusetsize, process_mask);
345 __cilkrts_free(process_mask);
347 #else
348 // CPU_ALLOC_SIZE isn't available, or this is the Intel compiler build
349 // and we have to support RHEL5. Use a statically allocated cpu_set_t
351 cpu_set_t process_mask;
353 // Extract the thread affinity mask
354 int err = pthread_getaffinity_np(pthread_self(),
355 sizeof(process_mask),
356 &process_mask);
358 if (0 == err)
360 // We have extracted the mask OK, so now we can count the number of
361 // threads in it. This is linear in the maximum number of CPUs
362 // available, We could do a logarithmic version, if we assume the
363 // format of the mask, but it's not really worth it. We only call
364 // this at thread startup anyway.
365 int i;
366 for (i = 0; i < CPU_SETSIZE; i++)
368 if (CPU_ISSET(i, &process_mask))
370 affinity_cores++;
374 #endif // CPU_ALLOC_SIZE
375 #endif // ! defined HAVE_PTHREAD_AFFINITY_NP
377 // If we've got a count of cores this thread is supposed to use, that's
378 // the number or cores we'll use. Otherwise, default to the number of
379 // cores on the system.
380 if (0 == affinity_cores)
381 return system_cores;
382 else
383 return affinity_cores;
385 #endif // defined (__linux__) && ! defined(__ANDROID__)
388 * __cilkrts_hardware_cpu_count
390 * Returns the number of available CPUs on this hardware. This is architecture-
391 * specific.
394 COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
396 #if defined __ANDROID__ || \
397 defined __CYGWIN__ || \
398 defined __DragonFly__ || \
399 defined __FreeBSD__ || \
400 (defined(__sun__) && defined(__svr4__))
401 return (int)sysconf(_SC_NPROCESSORS_ONLN);
402 #elif defined __MIC__
403 /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
404 /// on KNC. Also, ignore the last core.
405 int count = (int)sysconf (_SC_NPROCESSORS_ONLN);
406 return count/2 - 2;
407 #elif defined __linux__
408 return linux_get_affinity_count();
409 #elif defined __APPLE__
410 int count;
411 size_t len = sizeof count;
412 int status = sysctlbyname("hw.logicalcpu", &count, &len, 0, 0);
413 assert(0 == status);
415 return count;
416 #elif defined __VXWORKS__
417 return __builtin_popcount(vxCpuEnabledGet());
418 #else
419 #error "Unsupported architecture"
420 #endif
423 COMMON_SYSDEP void __cilkrts_idle(void)
425 // This is another version of __cilkrts_yield() to be used when
426 // silencing workers that are not stealing work.
427 #if defined(__ANDROID__) || \
428 defined(__FreeBSD__) || \
429 defined(__VXWORKS__) || \
430 (defined(__sun__) && defined(__svr4__))
431 sched_yield();
432 #elif defined(__MIC__)
433 _mm_delay_32(1024);
434 #elif defined(__linux__) || \
435 defined(__APPLE__) || \
436 defined(__CYGWIN__)
438 usleep(10000);
439 #else
440 # error "Unsupported architecture"
441 #endif
444 COMMON_SYSDEP void __cilkrts_sleep(void)
446 #ifdef __VXWORKS__
447 taskDelay(1);
448 #else
449 usleep(1);
450 #endif
453 COMMON_SYSDEP void __cilkrts_yield(void)
455 #if defined(__ANDROID__) || \
456 defined(__APPLE__) || \
457 defined(__CYGWIN__) || \
458 defined(__FreeBSD__) || \
459 defined(__VXWORKS__) || \
460 (defined(__sun__) && defined(__svr4__))
461 // Call sched_yield to yield quantum. I'm not sure why we
462 // don't do this on Linux also.
463 sched_yield();
464 #elif defined(__MIC__)
465 // On MIC, pthread_yield() really trashes things. Arch's measurements
466 // showed that calling _mm_delay_32() (or doing nothing) was a better
467 // option. Delaying 1024 clock cycles is a reasonable compromise between
468 // giving up the processor and latency starting up when work becomes
469 // available
470 _mm_delay_32(1024);
471 #elif defined(__linux__)
472 // On Linux, call pthread_yield (which in turn will call sched_yield)
473 // to yield quantum.
474 pthread_yield();
475 #else
476 # error "Unsupported architecture"
477 #endif
480 COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
481 const char* varname)
483 CILK_ASSERT(value);
484 CILK_ASSERT(varname);
486 const char* envstr = getenv(varname);
487 if (envstr)
489 size_t len = cilk_strlen(envstr);
490 if (len > vallen - 1)
491 return len + 1;
492 cilk_strcpy_s(value, vallen, envstr);
493 return len;
495 else
497 value[0] = '\0';
498 return 0;
503 * Unrecoverable error: Print an error message and abort execution.
505 COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
507 va_list l;
508 fflush(NULL);
509 fprintf(stderr, "Cilk error: ");
510 va_start(l, fmt);
511 vfprintf(stderr, fmt, l);
512 va_end(l);
513 fprintf(stderr, "Exiting.\n");
514 fflush(stderr);
516 abort();
520 * Print a warning message and return.
522 COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
524 va_list l;
525 fflush(NULL);
526 fprintf(stderr, "Cilk warning: ");
527 va_start(l, fmt);
528 vfprintf(stderr, fmt, l);
529 va_end(l);
530 fflush(stderr);
533 #ifdef __VXWORKS__
534 #ifdef _WRS_KERNEL
535 void cilkStart()
537 __cilkrts_init_tls_variables();
539 #else
540 _WRS_CONSTRUCTOR(cilkInit, 100)
542 __cilkrts_init_tls_variables();
544 #endif
545 #else
546 static void __attribute__((constructor)) init_once()
548 /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
549 __cilkrts_init_tls_variables();
551 #endif
554 #define PAGE 4096
555 #define CILK_MIN_STACK_SIZE (4*PAGE)
556 // Default size for the stacks that we create in Cilk for Unix.
557 #define CILK_DEFAULT_STACK_SIZE 0x100000
560 * Convert the user's specified stack size into a "reasonable" value
561 * for this OS.
563 size_t cilkos_validate_stack_size(size_t specified_stack_size) {
564 // Convert any negative value to the default.
565 if (specified_stack_size == 0) {
566 CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
567 return CILK_DEFAULT_STACK_SIZE;
569 // Round values in between 0 and CILK_MIN_STACK_SIZE up to
570 // CILK_MIN_STACK_SIZE.
571 if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
572 return CILK_MIN_STACK_SIZE;
574 if ((specified_stack_size % PAGE) > 0) {
575 // Round the user's stack size value up to nearest page boundary.
576 return (PAGE * (1 + specified_stack_size / PAGE));
578 return specified_stack_size;
581 long cilkos_atomic_add(volatile long* p, long x)
583 return __sync_add_and_fetch(p, x);
586 /* End os-unix.c */