3 /* Copyright (c) 1994 Stanford University
7 This software is provided under the terms described in
8 the "suif_copyright.h" file. */
10 #include <suif_copyright.h>
12 /*----------------------------------------------------------------------*
14 * runtime.c Run-time routines for output of SUIF shared-memory compiler
18 * int suif_get_my_id() return logical processor id
19 * int suif_num_total_ids() return total # of processors
21 * void suif_global_barrier(id) block until all procs arrive
22 * void suif_barrier(id,n) block until n procs arrive
24 * void suif_sync_neighbor(id) block until neighbors arrive
26 * void suif_lock(id) block until lock acquired
27 * void suif_unlock(id) release lock
29 * double suif_clock() return time in seconds
30 * double suif_hi_res_clock() return time in seconds (better
31 * resolution than suif_clock(),
32 * but not guaranteed not to
34 * void suif_start_timer() start accurate timer
35 * double suif_end_timer() finish accurate timer, return
38 * void suif_doall(func,ptr) parallel call
39 * int suif_doall_level() return level of parallelism
41 * void suif_restore_state(pid, my_id) called by hardware simulator to restore
42 * void restart(pid) called by hardware simulator to restore
44 * void suif_finc_init(id) reset fetch and increment counter to 0
45 * int suif_finc(id) fetch value and increment counter
47 * void suif_counter_init_all() reset all counters to 0
48 * void suif_counter_init_range(id) reset counters in range 0..id to 0
49 * void suif_counter_init(proc,id) reset counter to 0
50 * void suif_counter_incr(proc,id) increment counter
51 * void suif_counter_set(proc,id,val) set counter to val
52 * void suif_counter_set_range(proc,id,val) set counters [0..proc-1][0..id-1] to val
53 * void suif_counter_wait(proc,id,val) block until counter >= value
55 * void suif_speculate_begin() begin speculation
56 * void suif_speculate_commit() commit speculation
57 * void suif_speculate_terminate() terminate speculation (also commit)
58 * void suif_par_begin() begin speculation
59 * void suif_par_commit() commit speculation
60 * void suif_par_terminate() terminate speculation (also commit)
61 * void suif_infinite_loop() infinite loop
62 * void *suif_malloc(size_t size) allocate data, cache-line aligned
64 * void reduce_OP_TYPE() reductions with OP on TYPE
66 * void suif_reset_stats() reset statistics
68 * void suif_exit_log() log exit for other threads
70 * EXTERNAL SYMBOLS REQUIRED
71 * -------------------------
72 * int _suif_start(argc,argv,envp) root of application program
73 * int _suif_nproc required # of procs if non-zero
75 * These routines are intended to support the output of the SUIF
76 * shared-memory compiler on a variety of architectures.
77 * Currently support is provided for SGI, DASH, KSR and generic pthreads.
79 * This system is modeled after the run-time routines provided by
80 * Martin Rinard. It uses the ANL macros provided by JP Singh,
81 * Truman Joe, and other members of the DASH group at Stanford.
82 * Names & calling conventions of the run-time system roughly match
83 * those of the p4 parallel programming system by Butler & Lusk
86 * The root of the SUIF application program is named _suif_start().
87 * Parallel execution is provided by calling suif_doall() with
88 * a pointer to the function to be executed in parallel.
92 * int _suif_nproc = 0; // does not required fixed # of procs
93 * extern void *volatile _suif_aligned_args;
94 * extern void (**volatile _suif_aligned_task_f)(int);
97 * int _suif_start(argc, argv, envp)
99 * char **argv, **envp;
101 * struct my_args args;
103 * ... // work to be performed sequentially
104 * *_suif_aligned_task_f = foo;
105 * *_suif_aligned_args = .../ // arguments to task funcs
106 * suif_doall(foo); // foo is executed in parallel
107 * ... // work to be performed sequentially
111 * int myid; // id of worker thread, from 0...P-1
113 * ... // work to be performed in parallel
116 * ENVIRONMENTAL VARIABLES
117 * -----------------------
118 * PL_NUM_THREADS // number of thread to execute
119 * PL_PROC_START // physical proc id of 1st thread
120 * PL_MAXPROC // total # of procs (max physical id+1)
121 * Do not execute loops iff:
122 * PL_LIMITS_ITER // number of iterations is less than ..
123 * PL_LIMITS_LOADBALANCE // % of load imbalance is less than ..
124 * PL_LIMITS_WORK // amount of work in the loop body *
125 * # of iterations is less than ..
127 * Environmental variables are used to specify the number and mapping
128 * of threads. The system creates a number of threads equal to
129 * PL_NUM_THREADS, then maps them to physical processors starting
130 * at PL_PROC_START, wrapping at PL_MAXPROC back to processor 0.
132 * Threads are assigned logical ids from 0 to PL_NUM_THREADS-1.
133 * The original master thread is guaranteed to be thread 0. The
134 * run-time system supports only a single level of parallelism.
135 * Nested calls to suif_doall() result in a single thread with id
138 * If the SUIF compiler produces code that requires a fixed number of
139 * processors, the global variable _suif_nproc is set to the number.
140 * If a variable number of processors is allowed, it is zeroed.
142 * Lock, barrier, and fetch&inc counter routines require an id.
143 * Currently eight of each are supported. Counter routines require
144 * an processor and counter id. Currently DEFAULT_MAXCOUNTER are
145 * supported for each processor.
147 * Locks, barriers, and fetch&inc counters provide mutual exclusion
148 * on updates; they can be safely updated by multiple processors.
149 * In comparison, normal counters do not provide mutual exclusion.
150 * For safety, each processor should thus only write to its own
151 * counter, though this restriction is not enforced.
153 *----------------------------------------------------------------------*/
158 #include "runtime_names.h"
161 /*----------------------------------------------------------------------
163 *----------------------------------------------------------------------
167 /* Create padded version of variables to avoid false sharing */
169 typedef struct suif_aligned_vars
{
173 char _pad1
[CACHE_LINE_SIZE
- ((3 * sizeof(int)) % CACHE_LINE_SIZE
)];
175 MasterSlaveBarrier barrier
;
178 char _aligned_vars_space
[sizeof(suif_aligned_vars
) + (8 * CACHE_LINE_SIZE
)];
179 suif_aligned_vars
*_suif_aligned_vars
= 0;
181 /* _aligned_args_space holds doall_level, task function pointer and task
182 arguments. doall_level is set in runtime, task function pointer and
183 task arguments are set in generated code
185 char _aligned_args_space
[(2 * sizeof(void *)) + MAX_ARGS_SIZE
+
186 (8 * CACHE_LINE_SIZE
)];
188 /* Variables used in SUIF compiled code */
190 int *volatile _suif_aligned_my_nprocs
= NULL
;
191 int *volatile _suif_aligned_my_nprocs1
= NULL
;
192 int *volatile _suif_aligned_my_nprocs2
= NULL
;
194 int *volatile _suif_aligned_doall_level
= NULL
;
195 task_f
*volatile _suif_aligned_task_f
= NULL
;
196 void *volatile _suif_aligned_args
= NULL
;
198 int _thread_map
[DEFAULT_MAXPROC
];
199 int restart_map
[DEFAULT_MAXPROC
];
200 jmp_buf jmpbuf
[DEFAULT_MAXPROC
];
202 /* OBSOLETE: The following three variables are provided for
203 * compatibility with previous releases of this package. Eventually,
204 * they will be removed entirely. Note that code that uses these
205 * variables instead of the pointers to the aligned versions will not
206 * get the benefit of suif_limited_doall() or feedback to cut the
207 * degree of parallelism -- the code will run on all processors in
214 /* Runtime structure Used for feedback to
217 typedef struct suif_dynamic_feedback_struct
{
218 int iter_threshhold
; /* do not parallelize below this number of
220 int max_parallelism
; /* never use more than this number of processors */
221 } suif_dynamic_feedback
;
226 extern int _suif_start(int argc
, char **argv
, char **envp
);
227 extern int _suif_nproc
;
235 static int _limits_iter
;
236 static int _limits_ldb
;
237 static int _limits_work
;
238 static int _limits_body
;
239 static int _limits_work_small_iter
;
240 static int _dynamic_feedback_on
;
241 static char *_dynamic_feedback_file_name
;
243 static PIDTYPE _master_pid
;
245 static volatile int _main_stop
;
246 static volatile int _mylocks
[DEFAULT_MAXPROC
][DEFAULT_MAXCOUNTER
];
248 static void suif_initenv(void);
249 static void suif_init_dynamic_feedback(void);
250 static void suif_create(void (*)(void));
251 static void suif_worker(void);
252 static void _barrier(void);
253 static void _barrier2(void);
254 static void *cache_line_align(void *addr
);
255 static double safe_divide(double a
, double b
);
258 #if (defined(STATS) && defined(ALL_STATS)) || defined(FEEDBACK) || \
260 static void **task_func_table
= NULL
;
261 static char **task_name_table
= NULL
;
262 static char *current_task_name
= NULL
;
263 static int table_size
= 0;
265 static int current_task_num
= 0;
268 * These are used for integrating feedback
269 * to limit parallelism
271 #define DYNAMIC_FEEDBACK_TABLE_SIZE 512
273 static suif_dynamic_feedback
*suif_dynamic_feedback_table
;
274 static suif_dynamic_feedback
275 static_dynamic_feedback_table
[DYNAMIC_FEEDBACK_TABLE_SIZE
];
276 static int suif_dynamic_feedback_doalls
= 0;
278 #if (defined(STATS) && defined(ALL_STATS)) || defined(GEN_TRACE)
279 static int current_task_iters_const
= 0;
281 #if defined(STATS) || defined(ALL_STATS) || defined(GEN_TRACE)
282 static int current_task_num_iters
= 0;
286 static SUIF_HI_RES_TIMER_DATA_TYPE alltime_hi_res
;
287 static double alltime_low_res
;
288 static double partime
;
289 static int num_doalls
;
290 #if defined(SEQUENTIAL)
291 static int num_barriers
;
292 static int num_sync_neighbors
;
293 static int num_locks
;
294 static int num_reductions
;
296 #if defined(ALL_STATS)
297 static double *stat_time_table
= NULL
;
298 static int *stat_doalls_table
= NULL
;
299 static double *stat_iters_table
= NULL
;
300 #if defined(SEQUENTIAL)
301 static int *stat_barriers_table
= NULL
;
302 static int *stat_sync_neighbors_table
= NULL
;
303 static int *stat_locks_table
= NULL
;
304 static int *stat_reductions_table
= NULL
;
310 #if defined(GEN_TRACE)
311 typedef struct trace_data_struct
{
316 static trace_data
*trace_space
;
317 static int num_trace_dumps
= 0;
318 static int num_traces
= 0;
319 static int max_traces
= 1024*1024; /*Take 1 meg at a time */
320 static void gen_trace(int, int, double);
321 static void suif_inittrace(void);
322 static void dump_trace(void);
324 static double trace_dump_time
= 0.0;
328 #if defined(FEEDBACK)
329 extern char *feedback_name_table
[];
330 extern int feedback_nproc_limit_table
[];
331 static int *limit_nproc_table
= NULL
;
334 /* system barriers & locks */
340 /* user barriers & locks */
378 /* reduction barriers & locks */
382 ALOCKDEC(_lock_reduction
,NUM_RLOCK
)
384 /* extra function called by main when compiled with gcc */
390 /*----------------------------------------------------------------------*/
393 /* main() - read parameters, do initializations, start threads, then */
394 /* pass control to the application by calling _suif_start(). */
396 int main(int argc
, char **argv
, char **envp
)
401 /* initialize suif_aligned_{vars,args} to be aligned on cache-lines */
406 (suif_aligned_vars
*) cache_line_align(&_aligned_vars_space
[0]);
408 args
= (char *) cache_line_align(&_aligned_args_space
[0]);
410 _suif_aligned_doall_level
= (int *) args
;
411 _suif_aligned_task_f
= (task_f
*) (args
+ sizeof(void *));
412 _suif_aligned_my_nprocs
= (int *) (args
+ (2 * sizeof(void *)));
413 _suif_aligned_my_nprocs1
= (int *) (args
+ (3 * sizeof(void *)));
414 _suif_aligned_my_nprocs2
= (int *) (args
+ (4 * sizeof(void *)));
415 _suif_aligned_args
= (void *) (args
+ (6 * sizeof(void *)));
421 #if defined(SEQUENTIAL)
423 num_sync_neighbors
= 0 ;
429 /* get some parameters from environment variables */
431 _suif_aligned_vars
->_nproc
= DEFAULT_NUM_THREADS
;
432 _suif_aligned_vars
->_proc_start
= DEFAULT_PROC_START
;
433 _suif_aligned_vars
->_maxproc
= DEFAULT_MAXPROC
;
434 _limits_iter
= DEFAULT_LIMITS_ITER
;
435 _limits_ldb
= DEFAULT_LIMITS_LOADBALANCE
;
436 _limits_work
= DEFAULT_LIMITS_WORK
;
437 _limits_body
= DEFAULT_LIMITS_BODY
;
438 _limits_work_small_iter
= DEFAULT_LIMITS_WORK_SMALL_ITER
;
439 _dynamic_feedback_on
= 0;
441 for (ptr
=envp
; *ptr
; ptr
++) {
442 if (!strncmp("PL_NUM_THREADS=", *ptr
, 15)) {
443 _suif_aligned_vars
->_nproc
= atoi(*ptr
+15);
445 /* synchronization in simos_sync.h can only handle
446 nproc < CACHE_LINE_SIZE */
447 if ((_suif_aligned_vars
->_nproc
< 1) ||
448 (_suif_aligned_vars
->_nproc
>= CACHE_LINE_SIZE
)) {
449 fprintf(stderr
, "Illegal value for PL_NUM_THREADS\n");
450 _suif_aligned_vars
->_nproc
= DEFAULT_NUM_THREADS
;
453 if (!strncmp("PL_PROC_START=", *ptr
, 14)) {
454 _suif_aligned_vars
->_proc_start
= atoi(*ptr
+14);
456 if (!strncmp("PL_MAXPROC=", *ptr
, 11)) {
457 if ((_suif_aligned_vars
->_maxproc
= atoi(*ptr
+11)) < 1) {
458 fprintf(stderr
, "Illegal value for PL_MAXPROC\n");
459 _suif_aligned_vars
->_maxproc
= DEFAULT_MAXPROC
;
462 if (!strncmp("PL_LIMITS_ITER=", *ptr
, 15)) {
463 _limits_iter
= atoi(*ptr
+15);
465 if (!strncmp("PL_LIMITS_LOADBALANCE=", *ptr
, 22)) {
466 _limits_ldb
= atoi(*ptr
+22);
468 if (!strncmp("PL_LIMITS_WORK=", *ptr
, 15)) {
469 _limits_work
= atoi(*ptr
+15);
471 if (!strncmp("PL_LIMITS_BODY=", *ptr
, 15)) {
472 _limits_body
= atoi(*ptr
+15);
474 if (!strncmp("PL_LIMITS_WORK_SMALL_ITER=", *ptr
, 26)) {
475 _limits_work_small_iter
= atoi(*ptr
+26);
477 if (!strncmp("PL_DYNAMIC_FEEDBACK=", *ptr
, 20)) {
478 _dynamic_feedback_on
= 1;
479 _dynamic_feedback_file_name
= &((*ptr
)[20]);
483 /* initializations */
485 #if defined(SEQUENTIAL)
486 _suif_aligned_vars
->_nproc
= 1;
489 /* if _suif_nproc is nonzero, use compiler predetermined # of procs */
490 if (_suif_nproc
!= 0)
491 _suif_aligned_vars
->_nproc
= _suif_nproc
;
499 /* read in the dynamic feedback file for suppressing paralllelism */
500 if (_dynamic_feedback_on
) suif_init_dynamic_feedback();
501 #if defined(GEN_TRACE)
506 SUIF_INIT_HI_RES_CLOCK
507 SUIF_START_TIMER(alltime_hi_res
, alltime_low_res
);
510 /* get a unique identifier to identify the master thread */
511 _master_pid
= GETUNIQUEID
;
513 /* spawn worker threads, begin computation */
515 for(i
=1; i
< _suif_aligned_vars
->_nproc
; i
++) {
516 suif_create(suif_worker
);
519 if (_suif_aligned_vars
->_nproc
> 1) {
526 #if defined(GEN_TRACE)
532 double safe_divide(double a
, double b
) {
533 if (b
== 0.0) return(0.0);
537 void *cache_line_align(void *addr
)
539 ptrdiff_t ptr
= (ptrdiff_t) addr
;
540 ptr
= (ptr
+ (ptrdiff_t) (CACHE_LINE_SIZE
- 1)) &
541 (~((ptrdiff_t) (CACHE_LINE_SIZE
- 1)));
546 void *suif_malloc(size_t size
)
548 int num_bytes
= size
+ (2*CACHE_LINE_SIZE
);
549 void *ptr
= malloc(num_bytes
);
551 return (cache_line_align(ptr
));
555 /*----------------------------------------------------------------------*/
556 /* read in the runtime dynamic feedback file if it exists
557 * Looks for a file named by the PL_DYNAMIC_FEEDBACK environment variable.
558 * Creates a table indexed by doall loop numbers
559 * for fast runtime access to determine number of
560 * processors to run on
562 static void suif_init_dynamic_feedback()
565 char line
[512]; /* reasonable limit to a line size */
566 char name
[512]; /* reasonable limit for a string */
567 char *fname
= _dynamic_feedback_file_name
;
568 int in_region
= 0; /* flag to determine when the first line is read */
571 fp
= fopen(fname
, "r");
573 fprintf(stderr
, "Unable to open dynamic feedback file `%s', ignoring\n",
575 _dynamic_feedback_on
= 0;
579 /* read in lines until we run out */
580 while (fgets(line
, 512, fp
) != NULL
) {
581 /* ignore comments */
582 if (line
[0] == '#') { continue; }
584 /* Use scanf for simplicity */
587 if ((rv
= sscanf(line
, "DOALLS %d\n", &num_doalls
)) == 1) {
588 if (num_doalls
< 0) {
589 fprintf(stderr
, "Error in dynamic feedback file `%s', ignoring\n",
591 _dynamic_feedback_on
= 0;
596 if (num_doalls
> DYNAMIC_FEEDBACK_TABLE_SIZE
)
598 suif_dynamic_feedback_table
=
599 (suif_dynamic_feedback
*)malloc(sizeof(suif_dynamic_feedback
) *
601 if (suif_dynamic_feedback_table
== NULL
)
602 perror("malloc failed");
606 suif_dynamic_feedback_table
= static_dynamic_feedback_table
;
609 for (i
= 0; i
< num_doalls
; i
++) {
610 /* do not supress by default */
611 suif_dynamic_feedback_table
[i
].iter_threshhold
= 0;
612 suif_dynamic_feedback_table
[i
].max_parallelism
= 1000;
620 int doall_index
, iter_threshhold
, max_parallelism
;
623 if ((rv
= sscanf(line
, "ENTRY %s %d %d %d\n", name
, &doall_index
,
624 &iter_threshhold
, &max_parallelism
)) == 4) {
625 /* fprintf(stderr,"ENTRY %s %d %d %d\n", name, doall_index,
626 iter_threshhold, max_parallelism); */
627 if ((doall_index
< 0) || (doall_index
> num_doalls
)) {
629 "IGNORED: %s: doall_index(=%d) > num_doalls(=%d)\nline=%s\n",
630 fname
, doall_index
, num_doalls
, line
);
632 suif_dynamic_feedback_table
[doall_index
].iter_threshhold
=
634 suif_dynamic_feedback_table
[doall_index
].max_parallelism
=
640 fprintf(stderr
, "IGNORED: %s: inregion=%d, line=%s\n",
641 fname
, in_region
, line
);
644 suif_dynamic_feedback_doalls
= num_doalls
;
649 /* use the feedback data to determine the number of processors
650 * to run on With any luck, it will be inlined
652 static int suif_dynamic_feedback_get_nprocs(int task_func_num
,
653 int num_iters
, int num_procs
) {
655 if (!_dynamic_feedback_on
) return(num_procs
);
657 /* blindly assert that task_func_num is ALWAYS >0 and < num_doalls */
658 if ((task_func_num
< 0) || (task_func_num
>= suif_dynamic_feedback_doalls
)) {
661 /* if we are below the threshhold, run on one processor */
662 if (suif_dynamic_feedback_table
[task_func_num
].iter_threshhold
> num_iters
) {
665 /* use the min(max_parallelism, num_procs, num_iters); */
666 max_parallelism
= suif_dynamic_feedback_table
[task_func_num
].max_parallelism
;
667 max_parallelism
= ((max_parallelism
<= num_procs
)
668 ? max_parallelism
: num_procs
);
669 max_parallelism
= ((max_parallelism
<= num_iters
)
670 ? max_parallelism
: num_iters
);
671 return(max_parallelism
);
674 #if defined(GEN_TRACE)
676 * init generate trace
679 static void suif_inittrace() {
680 trace_space
= (trace_data
*)malloc(sizeof(trace_data
) * max_traces
);
681 /* for the challenge */
684 /* ignmore errors when dumping trace */
685 static void dump_trace(void) {
687 char *fname
= "RTdumpfile";
689 if (num_traces
== 0) return;
690 if (num_trace_dumps
== 0) {
691 fp
= fopen(fname
, "w");
693 fp
= fopen(fname
, "a");
695 fwrite ((void *) trace_space
, sizeof(trace_data
), num_traces
, fp
);
701 static void gen_trace(int func_num
, int num_iters
, double time
) {
702 trace_space
[num_traces
].func_num
= func_num
;
703 trace_space
[num_traces
].num_iters
= num_iters
;
704 trace_space
[num_traces
].time
= time
;
706 if (num_traces
== max_traces
) {
708 SUIF_HI_RES_TIMER_DATA_TYPE hi_res_trace_start
;
709 double low_res_trace_start
;
712 SUIF_START_TIMER(hi_res_trace_start
, low_res_trace_start
);
716 SUIF_END_TIMER(test_time
, hi_res_trace_start
, low_res_trace_start
);
717 trace_dump_time
+= test_time
;
724 /*----------------------------------------------------------------------*/
725 /* process management */
727 /* suif_initenv() - initialize environment & synchronization constructs */
729 static void suif_initenv(void)
733 /* system barriers & locks */
739 /* user barriers & locks */
750 BARINIT(_bar_global0
)
751 BARINIT(_bar_global1
)
752 BARINIT(_bar_global2
)
753 BARINIT(_bar_global3
)
754 BARINIT(_bar_global4
)
755 BARINIT(_bar_global5
)
756 BARINIT(_bar_global6
)
757 BARINIT(_bar_global7
)
777 /* reduction locks */
778 ALOCKINIT(_lock_reduction
,NUM_RLOCK
)
780 suif_counter_init_all();
783 *_suif_aligned_doall_level
= 0;
784 *_suif_aligned_my_nprocs
= _suif_aligned_vars
->_nproc
;
785 _my_nprocs
= *_suif_aligned_my_nprocs
;
787 suif_assign_nprocs();
788 _my_nprocs1
= *_suif_aligned_my_nprocs1
;
789 _my_nprocs2
= *_suif_aligned_my_nprocs2
;
792 /*----------------------------------------------------------------------*/
793 /* suif_create() - creates lightweight thread to execute function f */
795 static void suif_create(void (*f
)(void))
801 /*----------------------------------------------------------------------*/
802 /* suif_get_my_id() - return logical thread id (0...P-1) */
804 int suif_get_my_id(void)
810 return id
; /* logical id from 0 to PL_NUM_THREADS-1 */
814 /*----------------------------------------------------------------------*/
815 /* suif_num_total_ids() - return total number of thread */
817 int suif_num_total_ids(void)
819 return _suif_aligned_vars
->_nproc
; /* total number of threads */
825 return _suif_aligned_vars
->_nproc
;
830 /*----------------------------------------------------------------------*/
831 /* suif_assign_ids() - initializes _my_ids */
833 void suif_assign_ids(int myid
, int *myid1
, int *myid2
)
835 int my_nprocs1
= *_suif_aligned_my_nprocs1
;
836 *myid1
= myid
% my_nprocs1
;
837 *myid2
= myid
/ my_nprocs1
;
841 void setids_(int *myid
, int *myid1
, int *myid2
)
843 suif_assign_ids(*myid
, myid1
, myid2
);
847 /*----------------------------------------------------------------------*/
848 /* suif_assign_nprocs() - initializes _my_nprocs */
850 void suif_assign_nprocs(void)
852 int my_nprocs1
, my_nprocs2
;
853 int nproc
= *_suif_aligned_my_nprocs
;
859 else if ((nproc
== 9) ||
863 else if ((nproc
< 25) ||
866 else if ((nproc
== 25) ||
873 else if (nproc
== 49)
881 /* only modify the global variable if needed */
882 if (my_nprocs1
!= *_suif_aligned_my_nprocs1
) {
883 *_suif_aligned_my_nprocs1
= my_nprocs1
;
885 if (my_nprocs2
!= *_suif_aligned_my_nprocs2
) {
886 *_suif_aligned_my_nprocs2
= my_nprocs2
;
893 suif_assign_nprocs();
897 /*----------------------------------------------------------------------*/
898 /* suif_restore_state() - called by hardware simulator to restore registers */
900 void suif_restore_state(int pid
, int my_id
/* _thread_map[pid] */)
902 longjmp(jmpbuf
[pid
], my_id
);
906 /*----------------------------------------------------------------------*/
907 /* restart() - called by hardware simulator to restore registers */
911 restart_map
[pid
] = pid
;
916 /*----------------------------------------------------------------------*/
917 /* suif_doall_level() - return depth of nesting in doall loops */
919 int suif_doall_level(void)
921 /* (*_suif_aligned_doall_level) begins at 0 and is incremented */
922 /* after each nested call to suif_doall() */
924 return (*_suif_aligned_doall_level
);
929 /* (*_suif_aligned_doall_level) begins at 0 and is incremented */
930 /* after each nested call to suif_doall() */
932 return (*_suif_aligned_doall_level
);
936 /*----------------------------------------------------------------------*/
937 /* suif_check_work() - return whether the amount of work in the given */
938 /* parameters is greater than the threshold set by the environment */
942 int checkw_(int *lb
, int *ub
, double *work
, int *cflow
)
944 return suif_check_work(*lb
, *ub
, *work
, *cflow
);
948 int suif_check_work(int lb
, int ub
, double work
, int cflow
)
952 if (i
< _suif_aligned_vars
->_nproc
) return 0;
954 total_work
= i
* work
;
955 if(total_work
< (double)_limits_work
) return 0;
956 if(i
< _limits_iter
&&
957 total_work
< (double)_limits_work_small_iter
) return 0;
964 /*----------------------------------------------------------------------*/
965 /* suif_wait_for_end() - clean up routines */
967 void suif_wait_for_end(void)
972 SUIF_END_TIMER(alltime
, alltime_hi_res
, alltime_low_res
);
973 #if defined(GEN_TRACE)
974 alltime
= alltime
- trace_dump_time
;
975 #endif /* GEN_TRACE */
977 fprintf(stderr
, " ***************************************************************************\n");
978 fprintf(stderr
, " Dynamic Feedback %s\n",
979 (_dynamic_feedback_on
) ? "ON" : "OFF");
981 #if defined(GEN_TRACE)
982 fprintf(stderr
, " Trace output GENERATED. Dump time removed from time\n");
983 #endif /* GEN_TRACE */
985 fprintf(stderr
, " Sequential Time = %11.6f sec, \
986 Doall Invocations = %9d \n", alltime
- partime
, num_doalls
);
987 #if defined(SEQUENTIAL)
989 double avg_task_time
= safe_divide(partime
, num_doalls
);
991 fprintf(stderr
, " Parallel Time = %11.6f sec, \
992 Barrier Invocations = %9d \n", partime
, num_barriers
);
993 fprintf(stderr
, " Avg Task Time = %11.6f sec, \
994 Sync Neighbors = %9d \n",
995 avg_task_time
, num_sync_neighbors
);
996 fprintf(stderr
, " Coverage = %9.2f %%, \
997 Reductions = %9d \n",
998 safe_divide( partime
* 100.0, alltime
), num_reductions
);
1001 fprintf(stderr
, " Parallel Time = %11.6f sec\n", partime
);
1002 fprintf(stderr
, " Avg Task Time = %11.6f sec\n",
1003 safe_divide(partime
, num_doalls
));
1004 fprintf(stderr
, " Coverage = %9.2f %%\n",
1005 safe_divide(partime
* 100.0, alltime
));
1006 #endif /* SEQUENTIAL */
1007 fprintf(stderr
, " ***************************************************************************\n");
1009 #if defined(ALL_STATS)
1013 fprintf(stderr
, "\n ***************************************************************************\n");
1014 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1016 char *current_name
= task_name_table
[func_num
];
1017 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1018 if (*current_name
== '&')
1020 fprintf(stderr
, " ``%s'' Time = %11.6f sec\n", current_name
,
1021 stat_time_table
[func_num
]);
1022 fprintf(stderr
, " ``%s'' Time/iter = %11.6f sec\n", current_name
,
1023 safe_divide(stat_time_table
[func_num
],
1024 stat_iters_table
[func_num
] ));
1026 fprintf(stderr
, " ***************************************************************************\n");
1028 fprintf(stderr
, "\n ***************************************************************************\n");
1029 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1031 char *current_name
= task_name_table
[func_num
];
1032 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1033 if (*current_name
== '&')
1035 fprintf(stderr
, " ``%s'' Doalls = %d\n", current_name
,
1036 stat_doalls_table
[func_num
]);
1038 fprintf(stderr
, " ***************************************************************************\n");
1040 #if defined(SEQUENTIAL)
1041 fprintf(stderr
, "\n ***************************************************************************\n");
1042 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1044 char *current_name
= task_name_table
[func_num
];
1045 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1046 if (*current_name
== '&')
1048 fprintf(stderr
, " ``%s'' Barriers = %d\n", current_name
,
1049 stat_barriers_table
[func_num
]);
1051 fprintf(stderr
, " ***************************************************************************\n");
1053 fprintf(stderr
, "\n ***************************************************************************\n");
1054 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1056 char *current_name
= task_name_table
[func_num
];
1057 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1058 if (*current_name
== '&')
1060 fprintf(stderr
, " ``%s'' Sync Neighbors = %d\n", current_name
,
1061 stat_sync_neighbors_table
[func_num
]);
1063 fprintf(stderr
, " ***************************************************************************\n");
1065 fprintf(stderr
, "\n ***************************************************************************\n");
1066 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1068 char *current_name
= task_name_table
[func_num
];
1069 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1070 if (*current_name
== '&')
1072 fprintf(stderr
, " ``%s'' Locks = %d\n", current_name
,
1073 stat_locks_table
[func_num
]);
1075 fprintf(stderr
, " ***************************************************************************\n");
1077 fprintf(stderr
, "\n ***************************************************************************\n");
1078 for (func_num
= 0; task_func_table
[func_num
] != NULL
; ++func_num
)
1080 char *current_name
= task_name_table
[func_num
];
1081 current_name
= ((current_name
== NULL
) ? "??" : current_name
);
1082 if (*current_name
== '&')
1084 fprintf(stderr
, " ``%s'' Reductions = %d\n", current_name
,
1085 stat_reductions_table
[func_num
]);
1087 fprintf(stderr
, " ***************************************************************************\n");
1088 #endif /* SEQUENTIAL */
1090 #endif /* ALL_STATS */
1095 /*----------------------------------------------------------------------*/
1096 /* suif_reset_stats() - reset statistics */
1098 void suif_reset_stats(void)
1101 SUIF_START_TIMER(alltime_hi_res
, alltime_low_res
);
1103 #if defined(SEQUENTIAL)
1105 num_sync_neighbors
= 0;
1107 num_reductions
= 0 ;
1108 #endif /* SEQUENTIAL */
1115 /*----------------------------------------------------------------------*/
1116 /* serial execution */
1118 /* suif_serial() - start parallel region, run only on master */
1120 void suif_serial(task_f f
)
1122 (*_suif_aligned_doall_level
)++;
1124 if ((*_suif_aligned_doall_level
) > 1) /* only 1 level of parallelism */
1129 #if defined(STATS) || defined(FEEDBACK) || defined(GEN_TRACE)
1131 #if defined(ALL_STATS) || defined(FEEDBACK)
1134 #if defined(FEEDBACK)
1135 int old_nprocs
= *_suif_aligned_my_nprocs
;
1137 #if defined(STATS) || defined(GEN_TRACE)
1138 SUIF_HI_RES_TIMER_DATA_TYPE partime_hi_res
;
1139 double partime_low_res
;
1142 SUIF_START_TIMER(partime_hi_res
, partime_low_res
);
1147 #if defined(ALL_STATS) || defined(FEEDBACK)
1148 if (task_func_table
== NULL
)
1151 task_func_table
= (void **)(malloc(table_size
* sizeof(void *)));
1152 task_name_table
= (char **)(malloc(table_size
* sizeof(char *)));
1153 #if defined(ALL_STATS)
1154 stat_time_table
= (double *)(malloc(table_size
* sizeof(double)));
1155 stat_doalls_table
= (int *)(malloc(table_size
* sizeof(int)));
1156 stat_iters_table
= (double *)(malloc(table_size
* sizeof(double)));
1157 #if defined(SEQUENTIAL)
1158 stat_barriers_table
= (int *)(malloc(table_size
* sizeof(int)));
1159 stat_sync_neighbors_table
=
1160 (int *)(malloc(table_size
* sizeof(int)));
1161 stat_locks_table
= (int *)(malloc(table_size
* sizeof(int)));
1162 stat_reductions_table
= (int *)(malloc(table_size
* sizeof(int)));
1165 #if defined(FEEDBACK)
1166 limit_nproc_table
= (int *)(malloc(table_size
* sizeof(int)));
1168 task_func_table
[0] = NULL
;
1170 while (task_func_table
[func_num
] != f
)
1172 if (task_func_table
[func_num
] == NULL
)
1174 if (func_num
== table_size
- 1)
1176 void **new_func_table
;
1177 char **new_name_table
;
1178 #if defined(ALL_STATS)
1179 double *new_time_table
;
1180 int *new_doalls_table
;
1181 double *new_iters_table
;
1182 #if defined(SEQUENTIAL)
1183 int *new_barriers_table
;
1184 int *new_sync_neighbors_table
;
1185 int *new_locks_table
;
1186 int *new_reductions_table
;
1189 #if defined(FEEDBACK)
1190 int *new_nproc_table
;
1193 (void **)(malloc(table_size
* 2 * sizeof(void *)));
1195 (char **)(malloc(table_size
* 2 * sizeof(char *)));
1196 #if defined(ALL_STATS)
1198 (double *)(malloc(table_size
* 2 * sizeof(double)));
1200 (int *)(malloc(table_size
* 2 * sizeof(int)));
1202 (double *)(malloc(table_size
* 2 * sizeof(double)));
1203 #if defined(SEQUENTIAL)
1204 new_barriers_table
=
1205 (int *)(malloc(table_size
* 2 * sizeof(int)));
1206 new_sync_neighbors_table
=
1207 (int *)(malloc(table_size
* 2 * sizeof(int)));
1209 (int *)(malloc(table_size
* 2 * sizeof(int)));
1210 new_reductions_table
=
1211 (int *)(malloc(table_size
* 2 * sizeof(int)));
1214 #if defined(FEEDBACK)
1216 (int *)(malloc(table_size
* 2 * sizeof(int)));
1218 memcpy(new_func_table
, task_func_table
,
1219 table_size
* sizeof(void *));
1220 memcpy(new_name_table
, task_name_table
,
1221 table_size
* sizeof(char *));
1222 #if defined(ALL_STATS)
1223 memcpy(new_time_table
, stat_time_table
,
1224 table_size
* sizeof(double));
1225 memcpy(new_doalls_table
, stat_doalls_table
,
1226 table_size
* sizeof(int));
1227 memcpy(new_iters_table
, stat_iters_table
,
1228 table_size
* sizeof(double));
1229 #if defined(SEQUENTIAL)
1230 memcpy(new_barriers_table
, stat_barriers_table
,
1231 table_size
* sizeof(int));
1232 memcpy(new_sync_neighbors_table
, stat_sync_neighbors_table
,
1233 table_size
* sizeof(int));
1234 memcpy(new_locks_table
, stat_locks_table
,
1235 table_size
* sizeof(int));
1236 memcpy(new_reductions_table
, stat_reductions_table
,
1237 table_size
* sizeof(int));
1240 #if defined(FEEDBACK)
1241 memcpy(new_nproc_table
, limit_nproc_table
,
1242 table_size
* sizeof(int));
1244 free(task_func_table
);
1245 free(task_name_table
);
1246 #if defined(ALL_STATS)
1247 free(stat_time_table
);
1248 free(stat_doalls_table
);
1249 free(stat_iters_table
);
1250 #if defined(SEQUENTIAL)
1251 free(stat_barriers_table
);
1252 free(stat_sync_neighbors_table
);
1253 free(stat_locks_table
);
1254 free(stat_reductions_table
);
1257 #if defined(FEEDBACK)
1258 free(limit_nproc_table
);
1260 task_func_table
= new_func_table
;
1261 task_name_table
= new_name_table
;
1262 #if defined(ALL_STATS)
1263 stat_time_table
= new_time_table
;
1264 stat_doalls_table
= new_doalls_table
;
1265 stat_iters_table
= new_iters_table
;
1266 #if defined(SEQUENTIAL)
1267 stat_barriers_table
= new_barriers_table
;
1268 stat_sync_neighbors_table
= new_sync_neighbors_table
;
1269 stat_locks_table
= new_locks_table
;
1270 stat_reductions_table
= new_reductions_table
;
1273 #if defined(FEEDBACK)
1274 limit_nproc_table
= new_nproc_table
;
1278 task_func_table
[func_num
] = f
;
1279 task_name_table
[func_num
] = current_task_name
;
1280 #if defined(ALL_STATS)
1281 stat_time_table
[func_num
] = 0.0;
1282 stat_doalls_table
[func_num
] = 0;
1283 stat_iters_table
[func_num
] = 0.0;
1284 #if defined(SEQUENTIAL)
1285 stat_barriers_table
[func_num
] = 0;
1286 stat_sync_neighbors_table
[func_num
] = 0;
1287 stat_locks_table
[func_num
] = 0;
1288 stat_reductions_table
[func_num
] = 0;
1291 #if defined(FEEDBACK)
1292 limit_nproc_table
[func_num
] = 0;
1294 unsigned long entry_num
;
1297 while (feedback_name_table
[entry_num
] != NULL
)
1299 if (strcmp(feedback_name_table
[entry_num
],
1300 current_task_name
) == 0)
1302 limit_nproc_table
[func_num
] =
1303 feedback_nproc_limit_table
[entry_num
];
1310 task_func_table
[func_num
+ 1] = NULL
;
1315 current_task_num
= func_num
;
1316 #if defined(ALL_STATS)
1317 stat_doalls_table
[current_task_num
]++;
1322 #if defined(FEEDBACK)
1323 if ((limit_nproc_table
[current_task_num
] != 0) &&
1324 (limit_nproc_table
[current_task_num
] < *_suif_aligned_my_nprocs
))
1326 *_suif_aligned_my_nprocs
= limit_nproc_table
[current_task_num
];
1332 #if defined(SEQUENTIAL) && defined(STATS)
1334 #if defined(ALL_STATS)
1335 stat_barriers_table
[current_task_num
]++;
1339 #if defined(STATS) || defined(GEN_TRACE)
1340 SUIF_END_TIMER(diff_time
, partime_hi_res
, partime_low_res
);
1342 # if defined(GEN_TRACE)
1343 gen_trace(current_task_num
, current_task_num_iters
, diff_time
);
1348 /* time doesn't count as parallel if we have a feedback
1349 * table where the iteration count should shut it off
1351 if ((!_dynamic_feedback_on
) ||
1352 ((suif_dynamic_feedback_table
[current_task_num
].iter_threshhold
<=
1353 current_task_num_iters
) &&
1354 (suif_dynamic_feedback_table
[current_task_num
].max_parallelism
!= 1)))
1356 partime
= partime
+ diff_time
;
1359 #if defined(ALL_STATS)
1360 stat_time_table
[current_task_num
] += diff_time
;
1361 stat_iters_table
[current_task_num
] += current_task_num_iters
;
1363 #if defined(FEEDBACK)
1364 *_suif_aligned_my_nprocs
= old_nprocs
;
1366 #if defined(STATS) || defined(FEEDBACK)
1371 exit(0); /* some worker encountered STOP, stop also */
1374 (*_suif_aligned_doall_level
)--;
1378 /*----------------------------------------------------------------------*/
1379 /* parallel execution */
1381 /* suif_doall() - start parallel region */
1383 void suif_doall(task_f f
)
1385 (*_suif_aligned_doall_level
)++;
1387 if ((*_suif_aligned_doall_level
) > 1) /* only 1 level of parallelism */
1392 #if defined(STATS) || defined(FEEDBACK) || defined(GEN_TRACE)
1394 #if defined(ALL_STATS) || defined(FEEDBACK)
1397 #if defined(FEEDBACK)
1398 int old_nprocs
= *_suif_aligned_my_nprocs
;
1400 #if defined(STATS) || defined(GEN_TRACE)
1401 SUIF_HI_RES_TIMER_DATA_TYPE partime_hi_res
;
1402 double partime_low_res
;
1405 SUIF_START_TIMER(partime_hi_res
, partime_low_res
);
1410 #if defined(ALL_STATS) || defined(FEEDBACK)
1411 if (task_func_table
== NULL
)
1414 task_func_table
= (void **)(malloc(table_size
* sizeof(void *)));
1415 task_name_table
= (char **)(malloc(table_size
* sizeof(char *)));
1416 #if defined(ALL_STATS)
1417 stat_time_table
= (double *)(malloc(table_size
* sizeof(double)));
1418 stat_doalls_table
= (int *)(malloc(table_size
* sizeof(int)));
1419 stat_iters_table
= (double *)(malloc(table_size
* sizeof(double)));
1420 #if defined(SEQUENTIAL)
1421 stat_barriers_table
= (int *)(malloc(table_size
* sizeof(int)));
1422 stat_sync_neighbors_table
=
1423 (int *)(malloc(table_size
* sizeof(int)));
1424 stat_locks_table
= (int *)(malloc(table_size
* sizeof(int)));
1425 stat_reductions_table
= (int *)(malloc(table_size
* sizeof(int)));
1428 #if defined(FEEDBACK)
1429 limit_nproc_table
= (int *)(malloc(table_size
* sizeof(int)));
1431 task_func_table
[0] = NULL
;
1433 while (task_func_table
[func_num
] != f
)
1435 if (task_func_table
[func_num
] == NULL
)
1437 if (func_num
== table_size
- 1)
1439 void **new_func_table
;
1440 char **new_name_table
;
1441 #if defined(ALL_STATS)
1442 double *new_time_table
;
1443 int *new_doalls_table
;
1444 double *new_iters_table
;
1445 #if defined(SEQUENTIAL)
1446 int *new_barriers_table
;
1447 int *new_sync_neighbors_table
;
1448 int *new_locks_table
;
1449 int *new_reductions_table
;
1452 #if defined(FEEDBACK)
1453 int *new_nproc_table
;
1456 (void **)(malloc(table_size
* 2 * sizeof(void *)));
1458 (char **)(malloc(table_size
* 2 * sizeof(char *)));
1459 #if defined(ALL_STATS)
1461 (double *)(malloc(table_size
* 2 * sizeof(double)));
1463 (int *)(malloc(table_size
* 2 * sizeof(int)));
1465 (double *)(malloc(table_size
* 2 * sizeof(double)));
1466 #if defined(SEQUENTIAL)
1467 new_barriers_table
=
1468 (int *)(malloc(table_size
* 2 * sizeof(int)));
1469 new_sync_neighbors_table
=
1470 (int *)(malloc(table_size
* 2 * sizeof(int)));
1472 (int *)(malloc(table_size
* 2 * sizeof(int)));
1473 new_reductions_table
=
1474 (int *)(malloc(table_size
* 2 * sizeof(int)));
1477 #if defined(FEEDBACK)
1479 (int *)(malloc(table_size
* 2 * sizeof(int)));
1481 memcpy(new_func_table
, task_func_table
,
1482 table_size
* sizeof(void *));
1483 memcpy(new_name_table
, task_name_table
,
1484 table_size
* sizeof(char *));
1485 #if defined(ALL_STATS)
1486 memcpy(new_time_table
, stat_time_table
,
1487 table_size
* sizeof(double));
1488 memcpy(new_doalls_table
, stat_doalls_table
,
1489 table_size
* sizeof(int));
1490 memcpy(new_iters_table
, stat_iters_table
,
1491 table_size
* sizeof(double));
1492 #if defined(SEQUENTIAL)
1493 memcpy(new_barriers_table
, stat_barriers_table
,
1494 table_size
* sizeof(int));
1495 memcpy(new_sync_neighbors_table
, stat_sync_neighbors_table
,
1496 table_size
* sizeof(int));
1497 memcpy(new_locks_table
, stat_locks_table
,
1498 table_size
* sizeof(int));
1499 memcpy(new_reductions_table
, stat_reductions_table
,
1500 table_size
* sizeof(int));
1503 #if defined(FEEDBACK)
1504 memcpy(new_nproc_table
, limit_nproc_table
,
1505 table_size
* sizeof(int));
1507 free(task_func_table
);
1508 free(task_name_table
);
1509 #if defined(ALL_STATS)
1510 free(stat_time_table
);
1511 free(stat_doalls_table
);
1512 free(stat_iters_table
);
1513 #if defined(SEQUENTIAL)
1514 free(stat_barriers_table
);
1515 free(stat_sync_neighbors_table
);
1516 free(stat_locks_table
);
1517 free(stat_reductions_table
);
1520 #if defined(FEEDBACK)
1521 free(limit_nproc_table
);
1523 task_func_table
= new_func_table
;
1524 task_name_table
= new_name_table
;
1525 #if defined(ALL_STATS)
1526 stat_time_table
= new_time_table
;
1527 stat_doalls_table
= new_doalls_table
;
1528 stat_iters_table
= new_iters_table
;
1529 #if defined(SEQUENTIAL)
1530 stat_barriers_table
= new_barriers_table
;
1531 stat_sync_neighbors_table
= new_sync_neighbors_table
;
1532 stat_locks_table
= new_locks_table
;
1533 stat_reductions_table
= new_reductions_table
;
1536 #if defined(FEEDBACK)
1537 limit_nproc_table
= new_nproc_table
;
1541 task_func_table
[func_num
] = f
;
1542 task_name_table
[func_num
] = current_task_name
;
1543 #if defined(ALL_STATS)
1544 stat_time_table
[func_num
] = 0.0;
1545 stat_doalls_table
[func_num
] = 0;
1546 stat_iters_table
[func_num
] = 0.0;
1547 #if defined(SEQUENTIAL)
1548 stat_barriers_table
[func_num
] = 0;
1549 stat_sync_neighbors_table
[func_num
] = 0;
1550 stat_locks_table
[func_num
] = 0;
1551 stat_reductions_table
[func_num
] = 0;
1554 #if defined(FEEDBACK)
1555 limit_nproc_table
[func_num
] = 0;
1557 unsigned long entry_num
;
1560 while (feedback_name_table
[entry_num
] != NULL
)
1562 if (strcmp(feedback_name_table
[entry_num
],
1563 current_task_name
) == 0)
1565 limit_nproc_table
[func_num
] =
1566 feedback_nproc_limit_table
[entry_num
];
1573 task_func_table
[func_num
+ 1] = NULL
;
1578 current_task_num
= func_num
;
1579 #if defined(ALL_STATS)
1580 stat_doalls_table
[current_task_num
]++;
1585 #if defined(FEEDBACK)
1586 if ((limit_nproc_table
[current_task_num
] != 0) &&
1587 (limit_nproc_table
[current_task_num
] < *_suif_aligned_my_nprocs
))
1589 *_suif_aligned_my_nprocs
= limit_nproc_table
[current_task_num
];
1593 MSlaveBarrier_Release(&_suif_aligned_vars
->barrier
);
1597 #if defined(SEQUENTIAL) && defined(STATS)
1599 #if defined(ALL_STATS)
1600 stat_barriers_table
[current_task_num
]++;
1604 MSlaveBarrier_Wait(&_suif_aligned_vars
->barrier
,
1605 _suif_aligned_vars
->_nproc
);
1607 #if defined(STATS) || defined(GEN_TRACE)
1608 SUIF_END_TIMER(diff_time
, partime_hi_res
, partime_low_res
);
1610 # if defined(GEN_TRACE)
1611 gen_trace(current_task_num
, current_task_num_iters
, diff_time
);
1616 /* time doesn't count as parallel if we have a feedback
1617 * table where the iteration count should shut it off
1619 if ((!_dynamic_feedback_on
) ||
1620 ((suif_dynamic_feedback_table
[current_task_num
].iter_threshhold
<=
1621 current_task_num_iters
) &&
1622 (suif_dynamic_feedback_table
[current_task_num
].max_parallelism
!=
1624 partime
= partime
+ diff_time
;
1626 #if defined(ALL_STATS)
1627 stat_time_table
[current_task_num
] += diff_time
;
1628 stat_iters_table
[current_task_num
] += current_task_num_iters
;
1631 #if defined(FEEDBACK)
1632 *_suif_aligned_my_nprocs
= old_nprocs
;
1634 #if defined(STATS) || defined(FEEDBACK)
1639 exit(0); /* some worker encountered STOP, stop also */
1642 (*_suif_aligned_doall_level
)--;
1646 void suif_limited_doall(task_f f
, int nproc_limit
)
1648 int old_nprocs
= *_suif_aligned_my_nprocs
;
1649 if (nproc_limit
< old_nprocs
)
1650 *_suif_aligned_my_nprocs
= nproc_limit
;
1652 if (nproc_limit
< old_nprocs
)
1653 *_suif_aligned_my_nprocs
= old_nprocs
;
1656 void suif_named_doall(task_f f
, char *task_func_name
,
1657 int task_func_num
, int num_iters
,
1663 #if (defined(STATS) && defined(ALL_STATS)) || defined(GEN_TRACE)
1664 current_task_name
= task_func_name
;
1665 current_task_iters_const
= is_iters_const
;
1666 current_task_num_iters
= num_iters
;
1668 current_task_num
= task_func_num
;
1669 old_nprocs
= *_suif_aligned_my_nprocs
;
1670 new_nprocs
= suif_dynamic_feedback_get_nprocs(task_func_num
,
1671 num_iters
, old_nprocs
);
1672 if (new_nprocs
!= old_nprocs
) {
1673 *_suif_aligned_my_nprocs
= new_nprocs
;
1677 if (new_nprocs
> 1) {
1683 if (new_nprocs
!= old_nprocs
) {
1684 *_suif_aligned_my_nprocs
= old_nprocs
;
1689 /*----------------------------------------------------------------------*/
1690 /* suif_worker() - worker threads spin until work provided via suif_doall() */
1692 static void suif_worker(void)
1694 int myid
; /* id of worker thread */
1696 myid
= (THREADS_EQUAL(_master_pid
, GETUNIQUEID
)) ? 0 : FINC(_sysfop
) + 1;
1699 PROCASSIGN(((myid
+ _suif_aligned_vars
->_proc_start
) %
1700 _suif_aligned_vars
->_maxproc
))
1702 suif_counter_init(myid
, 0);
1707 #if defined(SEQUENTIAL) && defined(STATS)
1709 #if defined(ALL_STATS)
1710 stat_barriers_table
[current_task_num
]++;
1714 MSlaveBarrier_Wait(&_suif_aligned_vars
->barrier
,
1715 _suif_aligned_vars
->_nproc
);
1716 _suif_start(_argc
, _argv
, _envp
);
1717 *_suif_aligned_task_f
= 0;
1719 MSlaveBarrier_Release(&_suif_aligned_vars
->barrier
);
1723 MSlaveBarrier_SlaveFirst(&_suif_aligned_vars
->barrier
,myid
);
1726 if (!(*_suif_aligned_task_f
)) {
1729 (**_suif_aligned_task_f
)(myid
);
1732 MSlaveBarrier_SlaveEnter(&_suif_aligned_vars
->barrier
,myid
);
1737 /*----------------------------------------------------------------------*/
1738 /* suif_exit_log() - Log exit for other threads, so they don't keep waiting */
1739 /* Called when Fortran STOP statement is encountered */
1741 void suif_exit_log(void)
1743 if ((*_suif_aligned_doall_level
) > 0) { /* inside parallel region */
1744 MSlaveBarrier_Wait(&_suif_aligned_vars
->barrier
,
1745 _suif_aligned_vars
->_nproc
);
1748 *_suif_aligned_task_f
= 0;
1749 MSlaveBarrier_Release(&_suif_aligned_vars
->barrier
);
1752 if (!suif_get_my_id())
1753 suif_wait_for_end();
1755 #if defined(GEN_TRACE)
1761 /*----------------------------------------------------------------------*/
1764 /* _barrier() - barrier used by runtime system */
1766 static void _barrier(void)
1768 #if defined(SEQUENTIAL) && defined(STATS)
1770 #if defined(ALL_STATS)
1771 stat_barriers_table
[current_task_num
]++;
1775 BARRIER(_sysbar1
, _suif_aligned_vars
->_nproc
)
1778 static void _barrier2(void) /* this barrier used at end of parallel task */
1780 #if defined(SEQUENTIAL) && defined(STATS)
1782 #if defined(ALL_STATS)
1783 stat_barriers_table
[current_task_num
]++;
1787 BARRIER(_sysbar2
, _suif_aligned_vars
->_nproc
)
1790 /*----------------------------------------------------------------------*/
1791 /* suif_global_barrier() - block until all threads enter barrier */
1793 void suif_global_barrier(int id
)
1795 #if defined(SEQUENTIAL) && defined(STATS)
1797 #if defined(ALL_STATS)
1798 stat_barriers_table
[current_task_num
]++;
1804 BARRIER(_bar_global0
, _suif_aligned_vars
->_nproc
)
1807 BARRIER(_bar_global1
, _suif_aligned_vars
->_nproc
)
1810 BARRIER(_bar_global2
, _suif_aligned_vars
->_nproc
)
1813 BARRIER(_bar_global3
, _suif_aligned_vars
->_nproc
)
1816 BARRIER(_bar_global4
, _suif_aligned_vars
->_nproc
)
1819 BARRIER(_bar_global5
, _suif_aligned_vars
->_nproc
)
1822 BARRIER(_bar_global6
, _suif_aligned_vars
->_nproc
)
1825 BARRIER(_bar_global7
, _suif_aligned_vars
->_nproc
)
1830 void glbar_(int *id
)
1832 suif_global_barrier(*id
);
1836 /*----------------------------------------------------------------------*/
1837 /* suif_barrier() - block until "nproc" processor enter barrier "id" */
1839 void suif_barrier(int id
, int nproc
)
1841 #if defined(SEQUENTIAL) && defined(STATS)
1843 #if defined(ALL_STATS)
1844 stat_barriers_table
[current_task_num
]++;
1848 switch (id
) /* block until nproc threads reach barrier */
1850 case 1: BARRIER(_bar1
, nproc
) break;
1851 case 2: BARRIER(_bar2
, nproc
) break;
1852 case 3: BARRIER(_bar3
, nproc
) break;
1853 case 4: BARRIER(_bar4
, nproc
) break;
1854 case 5: BARRIER(_bar5
, nproc
) break;
1855 case 6: BARRIER(_bar6
, nproc
) break;
1856 case 7: BARRIER(_bar7
, nproc
) break;
1857 case 8: BARRIER(_bar8
, nproc
) break;
1858 default: fprintf(stderr
, "Barrier[%d] not supported\n", id
); break;
1863 /*----------------------------------------------------------------------*/
1864 /* suif_sync_neighbor() - block until neighbor threads enter barrier */
1866 void suif_sync_neighbor(int id
)
1868 #if defined(SEQUENTIAL) && defined(STATS)
1869 num_sync_neighbors
++;
1870 #if defined(ALL_STATS)
1871 stat_sync_neighbors_table
[current_task_num
]++;
1875 suif_global_barrier(id
); /* just use normal barrier for now */
1880 suif_sync_neighbor(*id
);
1885 /*----------------------------------------------------------------------*/
1888 /* suif_lock() - block until able to acquire lock "id" */
1890 void suif_lock(int id
)
1892 #if defined(SEQUENTIAL) && defined(STATS)
1894 #if defined(ALL_STATS)
1895 stat_locks_table
[current_task_num
]++;
1901 case 1: LOCK(_lock1
) break;
1902 case 2: LOCK(_lock2
) break;
1903 case 3: LOCK(_lock3
) break;
1904 case 4: LOCK(_lock4
) break;
1905 case 5: LOCK(_lock5
) break;
1906 case 6: LOCK(_lock6
) break;
1907 case 7: LOCK(_lock7
) break;
1908 case 8: LOCK(_lock8
) break;
1909 default: fprintf(stderr
, "Lock[%d] not supported\n", id
); break;
1913 void slock_(int *id
)
1918 /*----------------------------------------------------------------------*/
1919 /* suif_reduction_lock() - block until able to acquire lock "id" */
1920 #define MAX_RLOCK_ID (NUM_RLOCK-1)
1922 void suif_reduction_lock(int id
)
1924 #if defined(SEQUENTIAL) && defined(STATS)
1926 #if defined(ALL_STATS)
1927 stat_locks_table
[current_task_num
]++;
1931 ALOCK(_lock_reduction
,(id
&MAX_RLOCK_ID
))
1935 void rlock_(int *id
)
1937 suif_reduction_lock(*id
);
1941 /*----------------------------------------------------------------------*/
1942 /* suif_unlock() - release lock "id" */
1944 void suif_unlock(int id
)
1948 case 1: UNLOCK(_lock1
) break;
1949 case 2: UNLOCK(_lock2
) break;
1950 case 3: UNLOCK(_lock3
) break;
1951 case 4: UNLOCK(_lock4
) break;
1952 case 5: UNLOCK(_lock5
) break;
1953 case 6: UNLOCK(_lock6
) break;
1954 case 7: UNLOCK(_lock7
) break;
1955 case 8: UNLOCK(_lock8
) break;
1956 default: fprintf(stderr
, "Lock[%d] not supported\n", id
); break;
1961 void sulock_(int *id
)
1967 /*----------------------------------------------------------------------*/
1968 /* suif_reduction_unlock() - release lock "id" */
1970 void suif_reduction_unlock(int id
)
1972 AUNLOCK(_lock_reduction
,(id
&MAX_RLOCK_ID
))
1975 void rulock_(int *id
)
1977 suif_reduction_unlock(*id
);
1981 /*----------------------------------------------------------------------*/
1982 /* fetch & increment counters */
1984 /* suif_finc_init() - initialize fetch&inc counter "id" to 0 */
1986 void suif_finc_init(int id
)
1990 case 1: FOPINIT(_fop1
,0); break;
1991 case 2: FOPINIT(_fop2
,0); break;
1992 case 3: FOPINIT(_fop3
,0); break;
1993 case 4: FOPINIT(_fop4
,0); break;
1994 case 5: FOPINIT(_fop5
,0); break;
1995 case 6: FOPINIT(_fop6
,0); break;
1996 case 7: FOPINIT(_fop7
,0); break;
1997 case 8: FOPINIT(_fop8
,0); break;
1998 default: fprintf(stderr
, "Getsub[%d] not supported\n", id
); break;
2004 /*----------------------------------------------------------------------*/
2005 /* suif_finc() - fetch & increment counter "id" */
2007 int suif_finc(int id
)
2011 case 1: return FINC(_fop1
);
2012 case 2: return FINC(_fop2
);
2013 case 3: return FINC(_fop3
);
2014 case 4: return FINC(_fop4
);
2015 case 5: return FINC(_fop5
);
2016 case 6: return FINC(_fop6
);
2017 case 7: return FINC(_fop7
);
2018 case 8: return FINC(_fop8
);
2019 default: fprintf(stderr
, "Getsub[%d] not supported\n", id
); return -1;
2025 /*----------------------------------------------------------------------*/
2028 /* suif_counter_init_all() - clear all counters to 0 */
2030 void suif_counter_init_all(void)
2032 memset((void *) _mylocks
, 0, sizeof(_mylocks
));
2037 /*----------------------------------------------------------------------*/
2038 /* suif_counter_init_range() - zero counters from 0 to "id" for all procs */
2040 void suif_counter_init_range(int id
)
2044 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2045 id
= DEFAULT_MAXCOUNTER
-1;
2047 for (i
= 0; i
< DEFAULT_MAXPROC
; i
++) {
2048 for (j
= 0; j
<= id
; j
++) {
2054 void cinitr_(int *id
)
2056 suif_counter_init_range(*id
);
2061 /*----------------------------------------------------------------------*/
2062 /* suif_counter_init() - set "id" counter for processor "proc" to 0 */
2064 void suif_counter_init(int proc
, int id
)
2066 if ((proc
< 0) || (proc
>= DEFAULT_MAXPROC
))
2067 fprintf(stderr
, "Counter: proc %d out of range \n", proc
);
2068 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2069 fprintf(stderr
, "Counter: id %d out of range \n", id
);
2071 _mylocks
[proc
][id
] = 0;
2075 /*----------------------------------------------------------------------*/
2076 /* suif_counter_incr() - increment "id" counter for processor "proc" by 1 */
2078 void suif_counter_incr(int proc
, int id
)
2080 if ((proc
< 0) || (proc
>= DEFAULT_MAXPROC
))
2081 fprintf(stderr
, "Counter: proc %d out of range \n", proc
);
2082 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2083 fprintf(stderr
, "Counter: id %d out of range \n", id
);
2087 _mylocks
[proc
][id
]++;
2092 void cincr_(int *proc
, int *id
)
2094 suif_counter_incr(*proc
, *id
);
2098 /*----------------------------------------------------------------------*/
2099 /* suif_counter_set() - set "id" counter for processor "proc" to "val" */
2101 void suif_counter_set(int proc
, int id
, int val
)
2103 if ((proc
< 0) || (proc
>= DEFAULT_MAXPROC
))
2104 fprintf(stderr
, "Counter: proc %d out of range \n", proc
);
2105 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2106 fprintf(stderr
, "Counter: id %d out of range \n", id
);
2108 _mylocks
[proc
][id
] = val
;
2114 void cset_(int *proc
, int *id
, int *val
)
2116 suif_counter_set(*proc
, *id
, *val
);
2120 /*----------------------------------------------------------------------*/
2121 /* suif_counter_set_range() - set "0..id-1" counter for processors "0..proc-1" to "val" */
2123 void suif_counter_set_range(int proc
, int id
, int val
)
2127 if ((proc
< 0) || (proc
>= DEFAULT_MAXPROC
))
2128 fprintf(stderr
, "Counter: proc %d out of range \n", proc
);
2129 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2130 fprintf(stderr
, "Counter: id %d out of range \n", id
);
2132 for (i
= 0; i
< proc
; i
++) {
2133 for (j
= 0; j
< id
; j
++) {
2134 _mylocks
[i
][j
] = val
;
2140 void csetr_(int *proc
, int *id
, int *val
)
2142 suif_counter_set_range(*proc
, *id
, *val
);
2146 /*----------------------------------------------------------------------*/
2147 /* suif_counter_wait() - block until counter achieves value "val" or
2150 void suif_counter_wait(int proc
, int id
, int val
)
2152 if ((proc
< 0) || (proc
>= DEFAULT_MAXPROC
))
2153 fprintf(stderr
, "Counter: proc %d out of range \n", proc
);
2154 if ((id
< 0) || (id
>= DEFAULT_MAXCOUNTER
))
2155 fprintf(stderr
, "Counter: id %d out of range \n", id
);
2157 #if defined(SEQUENTIAL)
2158 fprintf(stderr
, "Counters not supported in SEQUENTIAL mode \n");
2162 while (_mylocks
[proc
][id
] < val
) ; /* spin */
2168 void cwait_(int *proc
, int *id
, int *val
)
2170 suif_counter_wait(*proc
, *id
, *val
);
2174 /*----------------------------------------------------------------------*/
2175 /* suif_speculate_begin() */
2177 void suif_speculate_begin()
2187 void suif_par_begin()
2196 /*----------------------------------------------------------------------*/
2197 /* suif_infinite_loop() */
2199 void suif_infinite_loop()
2205 /*----------------------------------------------------------------------*/
2206 /* suif_speculate_commit() */
2208 void suif_speculate_commit()
2218 void suif_par_commit()
2228 /*----------------------------------------------------------------------*/
2229 /* suif_speculate_terminate() */
2231 void suif_speculate_terminate()
2241 void suif_par_terminate()
2251 /* dummy routine for simulator */
2252 void suif_start_packing(task_f f
, char *name
) { }
2254 /*----------------------------------------------------------------------*/
2257 /* suif_clock() - return user time in seconds */
2259 static SUIF_HI_RES_TIMER_DATA_TYPE hi_res_start_time
;
2260 static double low_res_start_time
;
2262 double suif_clock(void)
2270 double suif_hi_res_clock(void)
2274 SUIF_HI_RES_CLOCK(t
);
2278 void suif_start_timer(void)
2280 SUIF_START_TIMER(hi_res_start_time
, low_res_start_time
);
2283 double suif_end_timer(void)
2286 SUIF_END_TIMER(result
, hi_res_start_time
, low_res_start_time
);
2291 /*----------------------------------------------------------------------*/
2292 /* reductions & scans */
2294 #if defined(SEQUENTIAL) && defined(STATS)
2295 #if defined(ALL_STATS)
2296 #define statistics() \
2297 { num_reductions++; stat_reductions_table[current_task_num]++; }
2299 #define statistics() { num_reductions++; }
2302 #define statistics()
2305 /* reduce routines */
2307 REDUCTION_ROUTINES(signed char, signed_char
, sc
)
2308 REDUCTION_ROUTINES(unsigned char, unsigned_char
, uc
)
2309 REDUCTION_ROUTINES(short, short, ss
)
2310 REDUCTION_ROUTINES(unsigned short, unsigned_short
, us
)
2311 REDUCTION_ROUTINES(int, int, si
)
2312 REDUCTION_ROUTINES(unsigned int, unsigned_int
, ui
)
2313 REDUCTION_ROUTINES(long, long, sl
)
2314 REDUCTION_ROUTINES(unsigned long, unsigned_long
, ul
)
2315 REDUCTION_ROUTINES(float, float, f
)
2316 REDUCTION_ROUTINES(double, double, d
)
2317 REDUCTION_ROUTINES(long double, long_double
, ld
)
2319 /* Initializations */
2320 REDUCTION_INITS(signed char, signed_char
, sc
, 0, 1, SCHAR_MIN
,
2322 REDUCTION_INITS(unsigned char, unsigned_char
, uc
, 0, 1, 0,
2324 REDUCTION_INITS(short, short, ss
, 0, 1, SHRT_MIN
,
2326 REDUCTION_INITS(unsigned short, unsigned_short
, us
, 0, 1, 0,
2328 REDUCTION_INITS(int, int, si
, 0, 1, INT_MIN
,
2330 REDUCTION_INITS(unsigned int, unsigned_int
, ui
, 0u, 1u, 0u,
2332 REDUCTION_INITS(long, long, sl
, 0l, 1l, LONG_MIN
,
2334 REDUCTION_INITS(unsigned long, unsigned_long
, ul
, 0ul, 1ul, 0ul,
2336 REDUCTION_INITS(float, float, f
, 0.0f
, 1.0f
, -FLT_MAX
,
2338 REDUCTION_INITS(double, double, d
, 0.0, 1.0, -DBL_MAX
,
2340 REDUCTION_INITS(long double, long_double
, ld
, 0.0l, 1.0l, -LDBL_MAX
,
2343 INITGEN(signed char, signed_char
, sc
)
2344 INITGEN(unsigned char, unsigned_char
, uc
)
2345 INITGEN(short, short, ss
)
2346 INITGEN(unsigned short, unsigned_short
, us
)
2347 INITGEN(int, int, si
)
2348 INITGEN(unsigned int, unsigned_int
, ui
)
2349 INITGEN(long, long, sl
)
2350 INITGEN(unsigned long, unsigned_long
, ul
)
2351 INITGEN(float, float, f
)
2352 INITGEN(double, double, d
)
2353 INITGEN(long double, long_double
, ld
)