PR bootstrap/82916
[official-gcc.git] / libcilkrts / include / internal / abi.h
blob3f38485d26b03c7c970b42bf62ff4916304f7283
1 /*
2 * abi.h
4 * Copyright (C) 2009-2016, Intel Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
28 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
31 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
34 * *********************************************************************
36 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
37 * a repository at cilkplus.org. Changes made to this file that are not
38 * submitted through the contribution process detailed at
39 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
40 * time that a new version is released. Changes only submitted to the
41 * GNU compiler collection or posted to the git repository at
42 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
43 * not tracked.
45 * We welcome your contributions to this open source project. Thank you
46 * for your assistance in helping us improve Cilk Plus.
48 ******************************************************************************/
50 /**
51 * @file abi.h
53 * @brief Defines the application binary interface between the compiler and
54 * the Intel Cilk Plus runtime.
57 #ifndef CILK_INTERNAL_ABI_H
58 #define CILK_INTERNAL_ABI_H
61 #include <cilk/common.h>
62 #include <stddef.h> // Needed to define size_t
64 /**
65 * Jump buffers are OS and architecture dependent
67 #if ! defined(_MSC_VER)
68 /* Non-Windows - only need 5 registers for the jump buffer for both IA32 and Intel64 */
69 typedef void *__CILK_JUMP_BUFFER[5];
71 /** OS-specific implementation of setjmp */
72 # define CILK_SETJMP(X) __builtin_setjmp(X)
73 /** OS-specific implementation of longjmp */
74 # define CILK_LONGJMP(X) __builtin_longjmp(X,1)
75 #else
76 /* Windows - things are a little more complicated */
77 # if defined(_M_X64)
78 /* Intel64 - Use an OS-defined jump buffer */
79 # include <setjmp.h>
80 typedef jmp_buf __CILK_JUMP_BUFFER;
82 # define CILK_SETJMP(X) setjmp(X)
83 # define CILK_LONGJMP(X) longjmp(X, 1)
84 # elif defined(_M_IX86)
85 /**
86 * Windows x86 - Use a simplified version of the Windows jump buffer for x86
87 * setjmp is provided by __cilkrts_setjmp which passes jump buffer in EAX and
88 * destination in EDX longjmp is provided by an internal routine which uses
89 * this structure
91 typedef struct
93 unsigned long Ebp;
94 unsigned long Ebx;
95 unsigned long Edi;
96 unsigned long Esi;
97 unsigned long Esp;
98 unsigned long Eip;
99 unsigned long Registration;
100 unsigned long TryLevel;
101 } __CILK_JUMP_BUFFER;
103 # else
104 # error Unexpected architecture - Need to define __CILK_JUMP_BUFFER
105 # endif /* _M_X64 */
107 #endif /* defined(_MSC_VER) */
109 /* struct tags */
110 typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; ///< struct tag for stack frame
112 // Forwarded declarations
113 typedef struct global_state_t global_state_t; ///< Forwarded declaration for global state
114 typedef struct local_state local_state; ///< Forwarded declaration for local state
115 typedef struct cilkred_map cilkred_map; ///< Forward declaration for reducer map
117 /// Forwarded declaration for system-dependent worker state
118 typedef struct __cilkrts_worker_sysdep_state
119 __cilkrts_worker_sysdep_state;
122 * The worker struct contains per-worker information that needs to be
123 * visible to the compiler, or rooted here.
125 * For 32-bit Windows we need to be aligning the structures on 4-byte
126 * boundaries to match where ICL is allocating the birthrank and rank
127 * in the __cilkrts_stack_frame. It's 4-byte aligned instead of 8-byte
128 * aligned. This is OK because the compiler is dealing with the 64-bit
129 * quantities as two 32-bit values. So change the packing to be on
130 * 4-byte boundaries.
132 * The fields of the worker struct can be classified as either local
133 * or shared.
135 * Local: This field is only accessed by the thread bound to this
136 * worker struct. Local fields can be freely accessed without
137 * acquiring locks.
139 * Shared: This field may be accessed by multiple worker threads.
140 * Accesses to shared fields usually requires locks, except in
141 * special situations where one can prove that locks are
142 * unnecessary.
144 * The fields of the worker struct can also be classified as
145 * "read-only" if the field does not change after it is initialized.
146 * Otherwise, the field is "read/write". Read-only fields do not
147 * require locks to access (ignoring the synchronization that might be
148 * needed for initialization if this can occur in parallel).
150 * Finally, we explicitly classify some fields as "synchronization"
151 * fields if they are used as part of a synchronization protocol in
152 * the runtime. These variables are generally shared and read/write.
153 * Mostly, this category includes lock variables and other variables
154 * that are involved in synchronization protocols (i.e., the THE
155 * protocol).
157 #if defined(_MSC_VER) && defined(_M_IX86)
158 #pragma pack(push, 4)
159 #endif
161 struct __cilkrts_worker {
163 * T, H, and E pointers in the THE protocol See "The implementation of
164 * the Cilk-5 multithreaded language", PLDI 1998:
165 * http://portal.acm.org/citation.cfm?doid=277652.277725
167 * Synchronization fields. [shared read/write]
169 __cilkrts_stack_frame *volatile *volatile tail;
170 __cilkrts_stack_frame *volatile *volatile head; /**< @copydoc tail */
171 __cilkrts_stack_frame *volatile *volatile exc; /**< @copydoc tail */
174 * Addition to the THE protocol to allow us to protect some set of
175 * entries in the tail queue from stealing. Normally, this is set
176 * beyond the end of the task queue, indicating that all entries are
177 * available for stealing. During exception handling, protected_tail
178 * may be set to the first entry in the task queue, indicating that
179 * stealing is not allowed.
181 * Synchronization field.
183 __cilkrts_stack_frame *volatile *volatile protected_tail;
186 * Limit of the Lazy Task Queue, to detect queue overflow
187 * [local read-only]
189 __cilkrts_stack_frame *volatile *ltq_limit;
192 * Worker id.
193 * [local read-only]
195 int32_t self;
198 * Global state of the runtime system, opaque to the client.
199 * [local read-only]
201 global_state_t *g;
204 * Additional per-worker state of the runtime system that we want
205 * to maintain hidden from the client.
206 * [shared read-only]
208 local_state *l;
211 * Map from reducer names to reducer values.
212 * [local read/write]
214 cilkred_map *reducer_map;
217 * A slot that points to the currently executing Cilk frame.
218 * [local read/write]
220 __cilkrts_stack_frame *current_stack_frame;
223 * Reserved space for a pointer.
224 * Used to be __cilkrts_stack_frame *volatile *volatile saved_protected_tail;
226 void* reserved;
229 * System-dependent part of the worker state
230 * [local read-only]
232 __cilkrts_worker_sysdep_state *sysdep;
234 #if __CILKRTS_ABI_VERSION >= 1
236 * Per-worker pedigree information used to support scheduling-independent
237 * pseudo-random numbers.
238 * [local read/write]
240 __cilkrts_pedigree pedigree;
241 #endif /* __CILKRTS_ABI_VERSION >= 1 */
246 * Every spawning function has a frame descriptor. A spawning function
247 * is a function that spawns or detaches. Only spawning functions
248 * are visible to the Cilk runtime.
250 struct __cilkrts_stack_frame
253 * flags is an integer with values defined below. Client code
254 * initializes flags to CILK_FRAME_VERSION before the first Cilk
255 * operation.
257 * The low 24-bits of the 'flags' field are the flags, proper. The high
258 * 8-bits are the version number.
260 * IMPORTANT: bits in this word are set and read by the PARENT ONLY,
261 * not by a spawned child. In particular, the STOLEN and UNSYNCHED
262 * bits are set on a steal and are read before a sync. Since there
263 * is no synchronization (locking) on this word, any attempt to set
264 * or read these bits asynchronously in a child would result in a race.
266 uint32_t flags;
268 /** Not currently used. Not initialized by Intel compiler. */
269 int32_t size;
271 /**
272 * call_parent points to the __cilkrts_stack_frame of the closest
273 * ancestor spawning function, including spawn helpers, of this frame.
274 * It forms a linked list ending at the first stolen frame.
276 __cilkrts_stack_frame *call_parent;
279 * The client copies the worker from TLS here when initializing
280 * the structure. The runtime ensures that the field always points
281 * to the __cilkrts_worker which currently "owns" the frame.
283 __cilkrts_worker *worker;
286 * Unix: Pending exception after sync. The sync continuation
287 * must call __cilkrts_rethrow to handle the pending exception.
289 * Windows: the handler that _would_ have been registered if our
290 * handler were not there. We maintain this for unwinding purposes.
291 * Win32: the value of this field is only defined in spawn helper
292 * functions
294 * Win64: except_data must be filled in for all functions with a
295 * __cilkrts_stack_frame
297 void *except_data;
300 * Before every spawn and nontrivial sync the client function
301 * saves its continuation here.
303 __CILK_JUMP_BUFFER ctx;
305 #if __CILKRTS_ABI_VERSION >= 1
307 * Architecture-specific floating point state. mxcsr and fpcsr should be
308 * set when CILK_SETJMP is called in client code. Note that the Win64
309 * jmpbuf for the Intel64 architecture already contains this information
310 * so there is no need to use these fields on that OS/architecture.
312 uint32_t mxcsr;
313 uint16_t fpcsr; /**< @copydoc mxcsr */
317 * reserved is not used at this time. Client code should initialize it
318 * to 0 before the first Cilk operation
320 uint16_t reserved;
323 * Pedigree information to support scheduling-independent pseudo-random
324 * numbers. There are two views of this information. The copy in a
325 * spawning function is used to stack the rank and communicate to the
326 * runtime on a steal or continuation. The copy in a spawn helper is
327 * immutable once the function is detached and is a node in the pedigree.
328 * The union is used to make clear which view we're using.
330 * In the detach sequence Client code should:
331 * - copy the worker pedigree into the spawn helper's pedigree
332 * - copy the worker pedigree into the call parent's pedigree
333 * - set the worker's rank to 0
334 * - set the worker's pedigree.next to the spawn helper's pedigree
336 union
338 __cilkrts_pedigree spawn_helper_pedigree; /* Used in spawn helpers */
339 __cilkrts_pedigree parent_pedigree; /* Used in spawning funcs */
341 #endif /* __CILKRTS_ABI_VERSION >= 1 */
345 * Restore previous structure packing for 32-bit Windows
347 #if defined(_MSC_VER) && defined(_M_IX86)
348 #pragma pack(pop)
349 #endif
351 /* Values of the flags bitfield */
352 /** CILK_FRAME_STOLEN is set if the frame has ever been stolen. */
353 #define CILK_FRAME_STOLEN 0x01
356 * CILK_FRAME_UNSYNCHED is set if the frame has been stolen and
357 * is has not yet executed _Cilk_sync. It is technically a misnomer in that a
358 * frame can have this flag set even if all children have returned.
360 #define CILK_FRAME_UNSYNCHED 0x02
363 * Is this frame detached (spawned)? If so the runtime needs
364 * to undo-detach in the slow path epilogue.
366 #define CILK_FRAME_DETACHED 0x04
369 * CILK_FRAME_EXCEPTION_PROBED is set if the frame has been probed in the
370 * exception handler first pass
372 #define CILK_FRAME_EXCEPTION_PROBED 0x08
374 /** Is this frame receiving an exception after sync? */
375 #define CILK_FRAME_EXCEPTING 0x10
378 * Is the pedigree unsynched? That is, has a synch occurred that is not
379 * yet represented in the pedigree?
381 #define CILK_FRAME_SF_PEDIGREE_UNSYNCHED 0x20
383 /** Is this the last (oldest) Cilk frame? */
384 #define CILK_FRAME_LAST 0x80
387 * Is this frame in the epilogue, or more generally after the last
388 * sync when it can no longer do any Cilk operations?
390 #define CILK_FRAME_EXITING 0x0100
392 /** Is this frame suspended? (used for debugging) */
393 #define CILK_FRAME_SUSPENDED 0x8000
395 /** Used by Windows exception handling to indicate that __cilkrts_leave_frame should do nothing */
396 #define CILK_FRAME_UNWINDING 0x10000
399 * The low 24-bits of the 'flags' field are the flags, proper. The high 8-bits
400 * are the version number.
403 /** ABI version left shifted to the high byte */
404 #define CILK_FRAME_VERSION (__CILKRTS_ABI_VERSION << 24)
406 /** Mask for the flags field to isolate the version bits */
407 #define CILK_FRAME_VERSION_MASK 0xFF000000
409 /** Mask for the flags field to isolate the flag bits */
410 #define CILK_FRAME_FLAGS_MASK 0x00FFFFFF
412 /** Convenience macro to provide access the version portion of the flags field */
413 #define CILK_FRAME_VERSION_VALUE(_flags) (((_flags) & CILK_FRAME_VERSION_MASK) >> 24)
415 /** Any undefined bits are reserved and must be zero ("MBZ" = "Must Be Zero") */
416 #define CILK_FRAME_MBZ (~ (CILK_FRAME_STOLEN | \
417 CILK_FRAME_UNSYNCHED | \
418 CILK_FRAME_DETACHED | \
419 CILK_FRAME_EXCEPTION_PROBED | \
420 CILK_FRAME_EXCEPTING | \
421 CILK_FRAME_SF_PEDIGREE_UNSYNCHED | \
422 CILK_FRAME_LAST | \
423 CILK_FRAME_EXITING | \
424 CILK_FRAME_SUSPENDED | \
425 CILK_FRAME_UNWINDING | \
426 CILK_FRAME_VERSION_MASK))
428 __CILKRTS_BEGIN_EXTERN_C
431 * Call __cilkrts_enter_frame to initialize an ABI 0 frame descriptor.
432 * Initialize the frame descriptor before spawn or detach. A function that
433 * conditionally does Cilk operations need not initialize the frame descriptor
434 * in a code path that never uses it.
436 * @param sf The __cilkrts_stack_frame that is to be initialized.
438 CILK_ABI(void) __cilkrts_enter_frame(__cilkrts_stack_frame* sf);
441 * Call __cilkrts_enter_frame to initialize an ABI 1 frame descriptor.
442 * Initialize the frame descriptor before spawn or detach. A function that
443 * conditionally does Cilk operations need not initialize the frame descriptor
444 * in a code path that never uses it.
446 * @param sf The __cilkrts_stack_frame that is to be initialized.
448 CILK_ABI(void) __cilkrts_enter_frame_1(__cilkrts_stack_frame* sf);
451 * __cilkrts_enter_frame_fast is the same as __cilkrts_enter_frame, except it
452 * assumes that the thread has already been bound to a worker.
454 * @param sf The __cilkrts_stack_frame that is to be initialized.
456 CILK_ABI(void) __cilkrts_enter_frame_fast(__cilkrts_stack_frame *sf);
459 * __cilkrts_enter_frame_fast_1 is the same as __cilkrts_enter_frame_1,
460 * except it assumes that the thread has already been bound to a worker.
462 * @param sf The __cilkrts_stack_frame that is to be initialized.
464 CILK_ABI(void) __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf);
467 * Call leave_frame before leaving a frame, after sync. This function
468 * returns except in a spawn wrapper where the parent has been stolen.
470 * @param sf The __cilkrts_stack_frame that is to be left.
472 CILK_ABI(void) __cilkrts_leave_frame(__cilkrts_stack_frame *sf);
475 * Suspends the runtime by notifying the workers that they should not try to
476 * steal. This function is supposed to be called from a non-parallel region
477 * (i.e., after cilk_sync in the top-level spawning function). Otherwise,
478 * which workers are sleeping or busy is unpredictable in general.
479 * The runtime can be resumed by calling __cilkrts_resume().
481 CILK_ABI(void) __cilkrts_suspend(void);
484 * Resumes the runtime by notifying the workers that they can steal.
486 CILK_ABI(void) __cilkrts_resume(void);
489 * Wait for any spawned children of this function to complete before
490 * continuing. This function will only return when the join counter
491 * has gone to 0. Other workers will re-enter the scheduling loop to
492 * attempt to steal additional work.
494 * @param sf The __cilkrts_stack_frame that is to be synched.
496 CILK_ABI(void) __cilkrts_sync(__cilkrts_stack_frame *sf);
499 * Called when an exception is escaping a spawn * wrapper.
500 * The stack frame's except_data field is the C++ runtime
501 * exception object. If NULL (temporary workaround) the
502 * currently caught exception should be rethrown. If this
503 * function returns normal exit functions must be called;
504 * undo-detach will have been done.
506 * @param sf The __cilkrts_stack_frame for the function that
507 * is raising an exception.
509 CILK_ABI_THROWS(void)
510 __cilkrts_return_exception(__cilkrts_stack_frame *sf);
513 * Called to re-raise an exception.
515 * @param sf The __cilkrts_stack_frame for the function that
516 * is raising an exception.
518 CILK_ABI_THROWS(void) __cilkrts_rethrow(__cilkrts_stack_frame *sf);
521 * Called at the beginning of a spawning function to get the worker
522 * that this function is running on. This worker will be used to
523 * initialize the __cilkrts_stack_frame.
525 * @return The __cilkrts_worker that the function is running on.
526 * @return NULL if this thread is not yet bound to a worker.
528 CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker(void);
531 * Similar to __cilkrts_get_tls_worker, but assumes that TLS has been
532 * initialized.
534 * @return The __cilkrts_worker that the function is running on.
535 * @return NULL if this thread is not yet bound to a worker.
537 CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker_fast(void);
540 * Binds a thread to the runtime by associating a __cilkrts_worker with
541 * it. Called if __cilkrts_get_tls_worker returns NULL. This function will
542 * initialize the runtime the first time it is called.
544 * This function is versioned by the ABI version number. The runtime
545 * will export all previous versions. This prevents using an application
546 * built with a newer compiler against an old runtime.
548 * @return The __cilkrts_worker bound to the thread the function is running
549 * on.
551 CILK_ABI(__cilkrts_worker_ptr) __cilkrts_bind_thread_1(void);
553 typedef uint32_t cilk32_t; /**< 32-bit unsigned type for cilk_for loop indicies */
555 typedef uint64_t cilk64_t; /**< 64-bit unsigned type for cilk_for loop indicies */
558 * Signature for the lambda function generated for the body of a cilk_for loop
559 * which uses 32-bit indicies
561 typedef void (*__cilk_abi_f32_t)(void *data, cilk32_t low, cilk32_t high);
564 * Signature for the lambda function generated for the body of a cilk_for lop
565 * which uses 64-bit indicies
567 typedef void (*__cilk_abi_f64_t)(void *data, cilk64_t low, cilk64_t high);
570 * @brief cilk_for implementation for 32-bit indexes.
572 * @param body The lambda function for the body of the cilk_for. The lambda
573 * function will be called to execute each grain of work.
574 * @param data Data passed by the compiler into the lambda function. Provides
575 * access to data outside the cilk_for body.
576 * @param count Number of steps in the loop.
577 * @param grain This parameter allows the compiler to pass a value from a
578 * \#pragam(grainsize) statement to allow the user to control the grainsize. If
579 * there isn't a \#pragma(grainsize) immediately preceeding cilk_for loop, Pass
580 * 0 to specify that the runtime should calculate the grainsize using its own
581 * hueristicts.
583 CILK_ABI_THROWS(void) __cilkrts_cilk_for_32(__cilk_abi_f32_t body,
584 void *data,
585 cilk32_t count,
586 int grain);
589 * @brief cilk_for implementation for 64-bit indexes.
591 * @copydetails __cilkrts_cilk_for_32
593 CILK_ABI_THROWS(void) __cilkrts_cilk_for_64(__cilk_abi_f64_t body,
594 void *data,
595 cilk64_t count,
596 int grain);
599 * @brief Allocate memory for variable length arrays. If the frame is
600 * sync'd, the memory will be allocated on the stack, otherwise it will
601 * be allocated from the heap.
603 * @param sf The __cilkrts_stack_frame for the function allocating the
604 * memory.
605 * @param size The number of bytes requested.
606 * @param distance_from_sp_to_alloca_area ?.
607 * @param align Alignment required. Always >= minimum stack alignment,
608 * >= ptr_size, and always a power of 2.
609 * @param needs_tag Non-zero if the pointer being returned needs to be
610 * tagged
612 * @return The address of the memory block allocated.
615 CILK_ABI(__cilkrts_void_ptr)
616 __cilkrts_stack_alloc(__cilkrts_stack_frame *sf,
617 size_t size,
618 size_t distance_from_sp_to_alloca_area,
619 uint32_t align,
620 uint32_t needs_tag);
623 * @brief Free memory allocated by _cilkrts_stack_alloc() for variable length
624 * arrays.
626 * @param sf The __cilkrts_stack_frame for the function allocating the
627 * memory.
628 * @param p Pointer to the memory block to be freed.
629 * @param size The number of bytes requested.
630 * @param distance_from_sp_to_alloca_area ?.
631 * @param align Alignment required. Always >= minimum stack alignment,
632 * >= ptr_size, and always a power of 2.
633 * @param know_from_stack Non-zero if the pointer is known to have been
634 * allocated on the stack and has no tag.
636 CILK_ABI(void)
637 __cilkrts_stack_free(__cilkrts_stack_frame *sf,
638 void *p,
639 size_t size,
640 size_t distance_from_sp_to_alloca_area,
641 uint32_t align,
642 uint32_t known_from_stack);
645 * @brief System-dependent code to save floating point control information
646 * to an ABI 1 or higher @c __cilkrts_stack_frame. If possible (and necessary)
647 * the code to save the floating point control information should be inlined.
649 * Note that this function does *not* save the current floating point
650 * registers. It saves the floating point control words that control
651 * precision and rounding and stuff like that.
653 * This function will be a noop for architectures that don't have warts
654 * like the floating point control words, or where the information is
655 * already being saved by the setjmp.
657 * @param sf @c __cilkrts_stack_frame for the frame we're saving the
658 * floating point control information in.
660 CILK_ABI(void)
661 __cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf);
663 __CILKRTS_END_EXTERN_C
664 #endif /* include guard */