Merged revisions 209304,209307,209332,209338-209339,209343,209346,209351,209354,20936...
[official-gcc.git] / gcc-4_9 / libcilkrts / runtime / os-unix.c
blobc9ddbbc59612ff8cf6649b30db20322f3c068bda
1 /* os-unix.c -*-C-*-
3 *************************************************************************
5 * @copyright
6 * Copyright (C) 2009-2013, Intel Corporation
7 * All rights reserved.
8 *
9 * @copyright
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * * Neither the name of Intel Corporation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
24 * @copyright
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 **************************************************************************/
39 #ifdef __linux__
40 // define _GNU_SOURCE before *any* #include.
41 // Even <stdint.h> will break later #includes if this macro is not
42 // already defined when it is #included.
43 # define _GNU_SOURCE
44 #endif
46 #include "os.h"
47 #include "bug.h"
48 #include "cilk_malloc.h"
49 #include <internal/abi.h>
51 #if defined __linux__
52 # include <sys/sysinfo.h>
53 # include <sys/syscall.h>
54 #elif defined __APPLE__
55 # include <sys/sysctl.h>
56 // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
57 #elif defined __FreeBSD__
58 // No additional include files
59 #elif defined __CYGWIN__
60 // Cygwin on Windows - no additional include files
61 #elif defined __VXWORKS__
62 # include <vxWorks.h>
63 # include <vxCpuLib.h>
64 # include <taskLib.h>
65 // Solaris
66 #elif defined __sun__ && defined __svr4__
67 # include <sched.h>
68 #else
69 # error "Unsupported OS"
70 #endif
72 #include <stdarg.h>
73 #include <stddef.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <unistd.h>
78 #include <pthread.h>
79 #include <sys/types.h>
83 // /* Thread-local storage */
84 // #ifdef _WIN32
85 // typedef unsigned cilkos_tls_key_t;
86 // #else
87 // typedef pthread_key_t cilkos_tls_key_t;
88 // #endif
89 // cilkos_tls_key_t cilkos_allocate_tls_key();
90 // void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
91 // void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
93 #if !defined CILK_WORKER_TLS
94 static int cilk_keys_defined;
95 static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
97 #if SUPPORT_GET_CURRENT_FIBER > 0
98 static pthread_key_t fiber_key;
99 #endif
101 static void *serial_worker;
104 // This destructor is called when a pthread dies to deallocate the
105 // pedigree node.
106 static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
108 __cilkrts_pedigree* pedigree_tls
109 = (__cilkrts_pedigree*)pedigree_tls_ptr;
110 if (pedigree_tls) {
111 // Assert that we have either one or two nodes
112 // left in the pedigree chain.
113 // If we have more, then something is going wrong...
114 CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
115 __cilkrts_free(pedigree_tls);
119 void __cilkrts_init_tls_variables(void)
121 int status;
122 /* This will be called once in serial execution before any
123 Cilk parallelism so we do not need to worry about races
124 on cilk_keys_defined. */
125 if (cilk_keys_defined)
126 return;
127 status = pthread_key_create(&worker_key, NULL);
128 CILK_ASSERT (status == 0);
129 status = pthread_key_create(&pedigree_leaf_key,
130 __cilkrts_pedigree_leaf_destructor);
131 CILK_ASSERT (status == 0);
132 status = pthread_key_create(&tbb_interop_key, NULL);
133 CILK_ASSERT (status == 0);
135 #if SUPPORT_GET_CURRENT_FIBER > 0
136 status = pthread_key_create(&fiber_key, NULL);
137 CILK_ASSERT (status == 0);
138 #endif
139 cilk_keys_defined = 1;
140 return;
143 COMMON_SYSDEP
144 void* cilkos_get_current_thread_id(void)
146 return (void*)pthread_self();
150 CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
152 if (__builtin_expect(cilk_keys_defined, 1))
153 return (__cilkrts_worker *)pthread_getspecific(worker_key);
154 else
155 return serial_worker;
159 CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
161 return (__cilkrts_worker *)pthread_getspecific(worker_key);
164 COMMON_SYSDEP
165 __cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
167 if (__builtin_expect(cilk_keys_defined, 1))
168 return (__cilk_tbb_stack_op_thunk *)
169 pthread_getspecific(tbb_interop_key);
170 else
171 return 0;
174 // This counter should be updated atomically.
175 static int __cilkrts_global_pedigree_tls_counter = -1;
177 COMMON_SYSDEP
178 __cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
180 __cilkrts_pedigree *pedigree_tls;
181 if (__builtin_expect(cilk_keys_defined, 1)) {
182 pedigree_tls =
183 (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
185 else {
186 return 0;
189 if (!pedigree_tls && create_new) {
190 // This call creates two nodes, X and Y.
191 // X == pedigree_tls[0] is the leaf node, which gets copied
192 // in and out of a user worker w when w binds and unbinds.
193 // Y == pedigree_tls[1] is the root node,
194 // which is a constant node that represents the user worker
195 // thread w.
196 pedigree_tls = (__cilkrts_pedigree*)
197 __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
199 // This call sets the TLS pointer to the new node.
200 __cilkrts_set_tls_pedigree_leaf(pedigree_tls);
202 pedigree_tls[0].rank = 0;
203 pedigree_tls[0].parent = &pedigree_tls[1];
205 // Create Y, whose rank begins as the global counter value.
206 pedigree_tls[1].rank =
207 __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
209 pedigree_tls[1].parent = NULL;
210 CILK_ASSERT(pedigree_tls[1].rank != -1);
212 return pedigree_tls;
215 #if SUPPORT_GET_CURRENT_FIBER > 0
216 COMMON_SYSDEP
217 cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
219 if (__builtin_expect(cilk_keys_defined, 1))
220 return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
221 else
222 return NULL;
224 #endif
226 COMMON_SYSDEP
227 void __cilkrts_set_tls_worker(__cilkrts_worker *w)
229 if (__builtin_expect(cilk_keys_defined, 1)) {
230 int status;
231 status = pthread_setspecific(worker_key, w);
232 CILK_ASSERT (status == 0);
233 return;
235 else
237 serial_worker = w;
241 COMMON_SYSDEP
242 void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
244 if (__builtin_expect(cilk_keys_defined, 1)) {
245 int status;
246 status = pthread_setspecific(tbb_interop_key, t);
247 CILK_ASSERT (status == 0);
248 return;
250 abort();
253 COMMON_SYSDEP
254 void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
256 if (__builtin_expect(cilk_keys_defined, 1)) {
257 int status;
258 status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
259 CILK_ASSERT (status == 0);
260 return;
262 abort();
265 #if SUPPORT_GET_CURRENT_FIBER > 0
266 COMMON_SYSDEP
267 void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
269 if (__builtin_expect(cilk_keys_defined, 1)) {
270 int status;
271 status = pthread_setspecific(fiber_key, fiber);
272 CILK_ASSERT (status == 0);
273 return;
275 abort();
277 #endif
279 #else
280 void __cilkrts_init_tls_variables(void)
283 #endif
285 #if defined (__linux__) && ! defined(__ANDROID__)
287 * Get the thread id, rather than the pid. In the case of MIC offload, it's
288 * possible that we have multiple threads entering Cilk, and each has a
289 * different affinity.
291 static pid_t linux_gettid(void)
293 return syscall(SYS_gettid);
297 * On Linux we look at the thread affinity mask and restrict ourself to one
298 * thread for each of the hardware contexts to which we are bound.
299 * Therefore if user does
300 * % taskset 0-1 cilkProgram
301 * # restrict execution to hardware contexts zero and one
302 * the Cilk program will only use two threads even if it is running on a
303 * machine that has 32 hardware contexts.
304 * This is the right thing to do, because the threads are restricted to two
305 * hardware contexts by the affinity mask set by taskset, and if we were to
306 * create extra threads they would simply oversubscribe the hardware resources
307 * we can use.
308 * This is particularly important on MIC in offload mode, where the affinity
309 * mask is set by the offload library to force the offload code away from
310 * cores that have offload support threads running on them.
312 static int linux_get_affinity_count (int tid)
314 #if !defined HAVE_PTHREAD_AFFINITY_NP
315 return 0;
316 #else
318 cpu_set_t process_mask;
320 // Extract the thread affinity mask
321 int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
323 if (0 != err)
325 return 0;
328 // We have extracted the mask OK, so now we can count the number of threads
329 // in it. This is linear in the maximum number of CPUs available, We
330 // could do a logarithmic version, if we assume the format of the mask,
331 // but it's not really worth it. We only call this at thread startup
332 // anyway.
333 int available_procs = 0;
334 int i;
335 for (i = 0; i < CPU_SETSIZE; i++)
337 if (CPU_ISSET(i, &process_mask))
339 available_procs++;
343 return available_procs;
344 #endif
346 #endif // defined (__linux__) && ! defined(__ANDROID__)
349 * __cilkrts_hardware_cpu_count
351 * Returns the number of available CPUs on this hardware. This is architecture-
352 * specific.
355 COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
357 #if defined __ANDROID__ || (defined(__sun__) && defined(__svr4__))
358 return sysconf (_SC_NPROCESSORS_ONLN);
359 #elif defined __MIC__
360 /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
361 /// on KNC. Also, ignore the last core.
362 int P = sysconf (_SC_NPROCESSORS_ONLN);
363 return P/2 - 2;
364 #elif defined __linux__
365 int affinity_count = linux_get_affinity_count(linux_gettid());
367 return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
368 #elif defined __APPLE__
369 int count = 0;
370 int cmd[2] = { CTL_HW, HW_NCPU };
371 size_t len = sizeof count;
372 int status = sysctl(cmd, 2, &count, &len, 0, 0);
373 assert(status >= 0);
374 assert((unsigned)count == count);
376 return count;
377 #elif defined __FreeBSD__ || defined __CYGWIN__
378 int ncores = sysconf(_SC_NPROCESSORS_ONLN);
380 return ncores;
381 // Just get the number of processors
382 // return sysconf(_SC_NPROCESSORS_ONLN);
383 #elif defined __VXWORKS__
384 return __builtin_popcount( vxCpuEnabledGet() );
385 #else
386 #error "Unknown architecture"
387 #endif
390 COMMON_SYSDEP void __cilkrts_sleep(void)
392 #ifdef __VXWORKS__
393 taskDelay(1);
394 #else
395 usleep(1);
396 #endif
399 COMMON_SYSDEP void __cilkrts_yield(void)
401 #if __APPLE__ || __FreeBSD__ || __VXWORKS__
402 // On MacOS, call sched_yield to yield quantum. I'm not sure why we
403 // don't do this on Linux also.
404 sched_yield();
405 #elif defined(__MIC__)
406 // On MIC, pthread_yield() really trashes things. Arch's measurements
407 // showed that calling _mm_delay_32() (or doing nothing) was a better
408 // option. Delaying 1024 clock cycles is a reasonable compromise between
409 // giving up the processor and latency starting up when work becomes
410 // available
411 _mm_delay_32(1024);
412 #elif defined(__ANDROID__) || (defined(__sun__) && defined(__svr4__))
413 // On Android and Solaris, call sched_yield to yield quantum. I'm not
414 // sure why we don't do this on Linux also.
415 sched_yield();
416 #else
417 // On Linux, call pthread_yield (which in turn will call sched_yield)
418 // to yield quantum.
419 pthread_yield();
420 #endif
423 COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
424 const char* varname)
426 CILK_ASSERT(value);
427 CILK_ASSERT(varname);
429 const char* envstr = getenv(varname);
430 if (envstr)
432 size_t len = strlen(envstr);
433 if (len > vallen - 1)
434 return len + 1;
436 strcpy(value, envstr);
437 return len;
439 else
441 value[0] = '\0';
442 return 0;
447 * Unrecoverable error: Print an error message and abort execution.
449 COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
451 va_list l;
452 fflush(NULL);
453 fprintf(stderr, "Cilk error: ");
454 va_start(l, fmt);
455 vfprintf(stderr, fmt, l);
456 va_end(l);
457 fprintf(stderr, "Exiting.\n");
458 fflush(stderr);
460 abort();
464 * Print a warning message and return.
466 COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
468 va_list l;
469 fflush(NULL);
470 fprintf(stderr, "Cilk warning: ");
471 va_start(l, fmt);
472 vfprintf(stderr, fmt, l);
473 va_end(l);
474 fflush(stderr);
477 static void __attribute__((constructor)) init_once()
479 /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
480 __cilkrts_init_tls_variables();
484 #define PAGE 4096
485 #define CILK_MIN_STACK_SIZE (4*PAGE)
486 // Default size for the stacks that we create in Cilk for Unix.
487 #define CILK_DEFAULT_STACK_SIZE 0x100000
490 * Convert the user's specified stack size into a "reasonable" value
491 * for this OS.
493 size_t cilkos_validate_stack_size(size_t specified_stack_size) {
494 // Convert any negative value to the default.
495 if (specified_stack_size == 0) {
496 CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
497 return CILK_DEFAULT_STACK_SIZE;
499 // Round values in between 0 and CILK_MIN_STACK_SIZE up to
500 // CILK_MIN_STACK_SIZE.
501 if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
502 return CILK_MIN_STACK_SIZE;
504 if ((specified_stack_size % PAGE) > 0) {
505 // Round the user's stack size value up to nearest page boundary.
506 return (PAGE * (1 + specified_stack_size / PAGE));
508 return specified_stack_size;
511 long cilkos_atomic_add(volatile long* p, long x)
513 return __sync_add_and_fetch(p, x);
516 /* End os-unix.c */