add $(EXEEXT) to executable targets during installation for MinGW
[suif.git] / src / baseparsuif / runtime / runtime.c
blobfccb8b01212e10a0f3f857d7ac55d9d598281e72
1 /* file "runtime.c" */
3 /* Copyright (c) 1994 Stanford University
5 All rights reserved.
7 This software is provided under the terms described in
8 the "suif_copyright.h" file. */
10 #include <suif_copyright.h>
12 /*----------------------------------------------------------------------*
14 * runtime.c Run-time routines for output of SUIF shared-memory compiler
16 * GLOBAL DEFINITIONS
17 * ------------------
18 * int suif_get_my_id() return logical processor id
19 * int suif_num_total_ids() return total # of processors
21 * void suif_global_barrier(id) block until all procs arrive
22 * void suif_barrier(id,n) block until n procs arrive
24 * void suif_sync_neighbor(id) block until neighbors arrive
26 * void suif_lock(id) block until lock acquired
27 * void suif_unlock(id) release lock
29 * double suif_clock() return time in seconds
30 * double suif_hi_res_clock() return time in seconds (better
31 * resolution than suif_clock(),
32 * but not guaranteed not to
33 * overflow
34 * void suif_start_timer() start accurate timer
35 * double suif_end_timer() finish accurate timer, return
36 * time in seconds
38 * void suif_doall(func,ptr) parallel call
39 * int suif_doall_level() return level of parallelism
41 * void suif_restore_state(pid, my_id) called by hardware simulator to restore
42 * void restart(pid) called by hardware simulator to restore
44 * void suif_finc_init(id) reset fetch and increment counter to 0
45 * int suif_finc(id) fetch value and increment counter
47 * void suif_counter_init_all() reset all counters to 0
48 * void suif_counter_init_range(id) reset counters in range 0..id to 0
49 * void suif_counter_init(proc,id) reset counter to 0
50 * void suif_counter_incr(proc,id) increment counter
51 * void suif_counter_set(proc,id,val) set counter to val
52 * void suif_counter_set_range(proc,id,val) set counters [0..proc-1][0..id-1] to val
53 * void suif_counter_wait(proc,id,val) block until counter >= value
55 * void suif_speculate_begin() begin speculation
56 * void suif_speculate_commit() commit speculation
57 * void suif_speculate_terminate() terminate speculation (also commit)
58 * void suif_par_begin() begin speculation
59 * void suif_par_commit() commit speculation
60 * void suif_par_terminate() terminate speculation (also commit)
61 * void suif_infinite_loop() infinite loop
62 * void *suif_malloc(size_t size) allocate data, cache-line aligned
64 * void reduce_OP_TYPE() reductions with OP on TYPE
66 * void suif_reset_stats() reset statistics
68 * void suif_exit_log() log exit for other threads
70 * EXTERNAL SYMBOLS REQUIRED
71 * -------------------------
72 * int _suif_start(argc,argv,envp) root of application program
73 * int _suif_nproc required # of procs if non-zero
75 * These routines are intended to support the output of the SUIF
76 * shared-memory compiler on a variety of architectures.
77 * Currently support is provided for SGI, DASH, KSR and generic pthreads.
79 * This system is modeled after the run-time routines provided by
80 * Martin Rinard. It uses the ANL macros provided by JP Singh,
81 * Truman Joe, and other members of the DASH group at Stanford.
82 * Names & calling conventions of the run-time system roughly match
83 * those of the p4 parallel programming system by Butler & Lusk
84 * at Argonne.
86 * The root of the SUIF application program is named _suif_start().
87 * Parallel execution is provided by calling suif_doall() with
88 * a pointer to the function to be executed in parallel.
90 * Example program:
92 * int _suif_nproc = 0; // does not required fixed # of procs
93 * extern void *volatile _suif_aligned_args;
94 * extern void (**volatile _suif_aligned_task_f)(int);
97 * int _suif_start(argc, argv, envp)
98 * int argc;
99 * char **argv, **envp;
101 * struct my_args args;
103 * ... // work to be performed sequentially
104 * *_suif_aligned_task_f = foo;
105 * *_suif_aligned_args = .../ // arguments to task funcs
106 * suif_doall(foo); // foo is executed in parallel
107 * ... // work to be performed sequentially
110 * void foo(myid)
111 * int myid; // id of worker thread, from 0...P-1
113 * ... // work to be performed in parallel
116 * ENVIRONMENTAL VARIABLES
117 * -----------------------
118 * PL_NUM_THREADS // number of thread to execute
119 * PL_PROC_START // physical proc id of 1st thread
120 * PL_MAXPROC // total # of procs (max physical id+1)
121 * Do not execute loops iff:
122 * PL_LIMITS_ITER // number of iterations is less than ..
123 * PL_LIMITS_LOADBALANCE // % of load imbalance is less than ..
124 * PL_LIMITS_WORK // amount of work in the loop body *
125 * # of iterations is less than ..
127 * Environmental variables are used to specify the number and mapping
128 * of threads. The system creates a number of threads equal to
129 * PL_NUM_THREADS, then maps them to physical processors starting
130 * at PL_PROC_START, wrapping at PL_MAXPROC back to processor 0.
132 * Threads are assigned logical ids from 0 to PL_NUM_THREADS-1.
133 * The original master thread is guaranteed to be thread 0. The
134 * run-time system supports only a single level of parallelism.
135 * Nested calls to suif_doall() result in a single thread with id
136 * equal to -1.
138 * If the SUIF compiler produces code that requires a fixed number of
139 * processors, the global variable _suif_nproc is set to the number.
140 * If a variable number of processors is allowed, it is zeroed.
142 * Lock, barrier, and fetch&inc counter routines require an id.
143 * Currently eight of each are supported. Counter routines require
144 * an processor and counter id. Currently DEFAULT_MAXCOUNTER are
145 * supported for each processor.
147 * Locks, barriers, and fetch&inc counters provide mutual exclusion
148 * on updates; they can be safely updated by multiple processors.
149 * In comparison, normal counters do not provide mutual exclusion.
150 * For safety, each processor should thus only write to its own
151 * counter, though this restriction is not enforced.
153 *----------------------------------------------------------------------*/
155 MAIN_ENV
157 #include "runtime.h"
158 #include "runtime_names.h"
159 #include <setjmp.h>
161 /*----------------------------------------------------------------------
162 * Declarations
163 *----------------------------------------------------------------------
167 /* Create padded version of variables to avoid false sharing */
169 typedef struct suif_aligned_vars {
170 int _nproc;
171 int _proc_start;
172 int _maxproc;
173 char _pad1[CACHE_LINE_SIZE - ((3 * sizeof(int)) % CACHE_LINE_SIZE)];
175 MasterSlaveBarrier barrier;
176 } suif_aligned_vars;
178 char _aligned_vars_space[sizeof(suif_aligned_vars) + (8 * CACHE_LINE_SIZE)];
179 suif_aligned_vars *_suif_aligned_vars = 0;
181 /* _aligned_args_space holds doall_level, task function pointer and task
182 arguments. doall_level is set in runtime, task function pointer and
183 task arguments are set in generated code
185 char _aligned_args_space[(2 * sizeof(void *)) + MAX_ARGS_SIZE +
186 (8 * CACHE_LINE_SIZE)];
188 /* Variables used in SUIF compiled code */
190 int *volatile _suif_aligned_my_nprocs = NULL;
191 int *volatile _suif_aligned_my_nprocs1 = NULL;
192 int *volatile _suif_aligned_my_nprocs2 = NULL;
194 int *volatile _suif_aligned_doall_level = NULL;
195 task_f *volatile _suif_aligned_task_f = NULL;
196 void *volatile _suif_aligned_args = NULL;
198 int _thread_map[DEFAULT_MAXPROC];
199 int restart_map[DEFAULT_MAXPROC];
200 jmp_buf jmpbuf[DEFAULT_MAXPROC];
202 /* OBSOLETE: The following three variables are provided for
203 * compatibility with previous releases of this package. Eventually,
204 * they will be removed entirely. Note that code that uses these
205 * variables instead of the pointers to the aligned versions will not
206 * get the benefit of suif_limited_doall() or feedback to cut the
207 * degree of parallelism -- the code will run on all processors in
208 * that case. */
210 int _my_nprocs;
211 int _my_nprocs1;
212 int _my_nprocs2;
214 /* Runtime structure Used for feedback to
215 * limit parallelism
217 typedef struct suif_dynamic_feedback_struct {
218 int iter_threshhold; /* do not parallelize below this number of
219 iterations */
220 int max_parallelism; /* never use more than this number of processors */
221 } suif_dynamic_feedback;
224 /* External vars */
226 extern int _suif_start(int argc, char **argv, char **envp);
227 extern int _suif_nproc;
229 /* Private vars */
231 static int _argc;
232 static char **_argv;
233 static char **_envp;
235 static int _limits_iter;
236 static int _limits_ldb;
237 static int _limits_work;
238 static int _limits_body;
239 static int _limits_work_small_iter;
240 static int _dynamic_feedback_on;
241 static char *_dynamic_feedback_file_name;
243 static PIDTYPE _master_pid;
245 static volatile int _main_stop;
246 static volatile int _mylocks[DEFAULT_MAXPROC][DEFAULT_MAXCOUNTER];
248 static void suif_initenv(void);
249 static void suif_init_dynamic_feedback(void);
250 static void suif_create(void (*)(void));
251 static void suif_worker(void);
252 static void _barrier(void);
253 static void _barrier2(void);
254 static void *cache_line_align(void *addr);
255 static double safe_divide(double a, double b);
258 #if (defined(STATS) && defined(ALL_STATS)) || defined(FEEDBACK) || \
259 defined(GEN_TRACE)
260 static void **task_func_table = NULL;
261 static char **task_name_table = NULL;
262 static char *current_task_name = NULL;
263 static int table_size = 0;
264 #endif
265 static int current_task_num = 0;
268 * These are used for integrating feedback
269 * to limit parallelism
271 #define DYNAMIC_FEEDBACK_TABLE_SIZE 512
273 static suif_dynamic_feedback *suif_dynamic_feedback_table;
274 static suif_dynamic_feedback
275 static_dynamic_feedback_table[DYNAMIC_FEEDBACK_TABLE_SIZE];
276 static int suif_dynamic_feedback_doalls = 0;
278 #if (defined(STATS) && defined(ALL_STATS)) || defined(GEN_TRACE)
279 static int current_task_iters_const = 0;
280 #endif
281 #if defined(STATS) || defined(ALL_STATS) || defined(GEN_TRACE)
282 static int current_task_num_iters = 0;
283 #endif
285 #if defined(STATS)
286 static SUIF_HI_RES_TIMER_DATA_TYPE alltime_hi_res;
287 static double alltime_low_res;
288 static double partime;
289 static int num_doalls;
290 #if defined(SEQUENTIAL)
291 static int num_barriers;
292 static int num_sync_neighbors;
293 static int num_locks;
294 static int num_reductions;
295 #endif
296 #if defined(ALL_STATS)
297 static double *stat_time_table = NULL;
298 static int *stat_doalls_table = NULL;
299 static double *stat_iters_table = NULL;
300 #if defined(SEQUENTIAL)
301 static int *stat_barriers_table = NULL;
302 static int *stat_sync_neighbors_table = NULL;
303 static int *stat_locks_table = NULL;
304 static int *stat_reductions_table = NULL;
305 #endif
306 #endif
307 #endif
310 #if defined(GEN_TRACE)
311 typedef struct trace_data_struct {
312 int func_num;
313 int num_iters;
314 double time;
315 } trace_data;
316 static trace_data *trace_space;
317 static int num_trace_dumps = 0;
318 static int num_traces = 0;
319 static int max_traces = 1024*1024; /*Take 1 meg at a time */
320 static void gen_trace(int, int, double);
321 static void suif_inittrace(void);
322 static void dump_trace(void);
323 #if defined(STATS)
324 static double trace_dump_time = 0.0;
325 #endif
326 #endif
328 #if defined(FEEDBACK)
329 extern char *feedback_name_table[];
330 extern int feedback_nproc_limit_table[];
331 static int *limit_nproc_table = NULL;
332 #endif
334 /* system barriers & locks */
336 BARDEC(_sysbar1)
337 BARDEC(_sysbar2)
338 FOPDEC(_sysfop)
340 /* user barriers & locks */
342 BARDEC(_bar1)
343 BARDEC(_bar2)
344 BARDEC(_bar3)
345 BARDEC(_bar4)
346 BARDEC(_bar5)
347 BARDEC(_bar6)
348 BARDEC(_bar7)
349 BARDEC(_bar8)
351 BARDEC(_bar_global0)
352 BARDEC(_bar_global1)
353 BARDEC(_bar_global2)
354 BARDEC(_bar_global3)
355 BARDEC(_bar_global4)
356 BARDEC(_bar_global5)
357 BARDEC(_bar_global6)
358 BARDEC(_bar_global7)
360 FOPDEC(_fop1)
361 FOPDEC(_fop2)
362 FOPDEC(_fop3)
363 FOPDEC(_fop4)
364 FOPDEC(_fop5)
365 FOPDEC(_fop6)
366 FOPDEC(_fop7)
367 FOPDEC(_fop8)
369 LOCKDEC(_lock1)
370 LOCKDEC(_lock2)
371 LOCKDEC(_lock3)
372 LOCKDEC(_lock4)
373 LOCKDEC(_lock5)
374 LOCKDEC(_lock6)
375 LOCKDEC(_lock7)
376 LOCKDEC(_lock8)
378 /* reduction barriers & locks */
380 #define NUM_RLOCK 64
382 ALOCKDEC(_lock_reduction,NUM_RLOCK)
384 /* extra function called by main when compiled with gcc */
385 #ifdef __GNUC__
386 void __main() { }
387 #endif
390 /*----------------------------------------------------------------------*/
391 /* main routine */
393 /* main() - read parameters, do initializations, start threads, then */
394 /* pass control to the application by calling _suif_start(). */
396 int main(int argc, char **argv, char **envp)
398 char **ptr;
399 int i;
401 /* initialize suif_aligned_{vars,args} to be aligned on cache-lines */
403 char *args;
405 _suif_aligned_vars =
406 (suif_aligned_vars *) cache_line_align(&_aligned_vars_space[0]);
408 args = (char *) cache_line_align(&_aligned_args_space[0]);
410 _suif_aligned_doall_level = (int *) args;
411 _suif_aligned_task_f = (task_f *) (args + sizeof(void *));
412 _suif_aligned_my_nprocs = (int *) (args + (2 * sizeof(void *)));
413 _suif_aligned_my_nprocs1 = (int *) (args + (3 * sizeof(void *)));
414 _suif_aligned_my_nprocs2 = (int *) (args + (4 * sizeof(void *)));
415 _suif_aligned_args = (void *) (args + (6 * sizeof(void *)));
418 #if defined(STATS)
419 partime = 0.0;
420 num_doalls = 0 ;
421 #if defined(SEQUENTIAL)
422 num_barriers = 0 ;
423 num_sync_neighbors = 0 ;
424 num_locks = 0 ;
425 num_reductions = 0 ;
426 #endif
427 #endif
429 /* get some parameters from environment variables */
431 _suif_aligned_vars->_nproc = DEFAULT_NUM_THREADS;
432 _suif_aligned_vars->_proc_start = DEFAULT_PROC_START;
433 _suif_aligned_vars->_maxproc = DEFAULT_MAXPROC;
434 _limits_iter = DEFAULT_LIMITS_ITER;
435 _limits_ldb = DEFAULT_LIMITS_LOADBALANCE;
436 _limits_work = DEFAULT_LIMITS_WORK;
437 _limits_body = DEFAULT_LIMITS_BODY;
438 _limits_work_small_iter = DEFAULT_LIMITS_WORK_SMALL_ITER;
439 _dynamic_feedback_on = 0;
441 for (ptr=envp; *ptr; ptr++) {
442 if (!strncmp("PL_NUM_THREADS=", *ptr, 15)) {
443 _suif_aligned_vars->_nproc = atoi(*ptr+15);
445 /* synchronization in simos_sync.h can only handle
446 nproc < CACHE_LINE_SIZE */
447 if ((_suif_aligned_vars->_nproc < 1) ||
448 (_suif_aligned_vars->_nproc >= CACHE_LINE_SIZE)) {
449 fprintf(stderr, "Illegal value for PL_NUM_THREADS\n");
450 _suif_aligned_vars->_nproc = DEFAULT_NUM_THREADS;
453 if (!strncmp("PL_PROC_START=", *ptr, 14)) {
454 _suif_aligned_vars->_proc_start = atoi(*ptr+14);
456 if (!strncmp("PL_MAXPROC=", *ptr, 11)) {
457 if ((_suif_aligned_vars->_maxproc = atoi(*ptr+11)) < 1) {
458 fprintf(stderr, "Illegal value for PL_MAXPROC\n");
459 _suif_aligned_vars->_maxproc = DEFAULT_MAXPROC;
462 if (!strncmp("PL_LIMITS_ITER=", *ptr, 15)) {
463 _limits_iter = atoi(*ptr+15);
465 if (!strncmp("PL_LIMITS_LOADBALANCE=", *ptr, 22)) {
466 _limits_ldb = atoi(*ptr+22);
468 if (!strncmp("PL_LIMITS_WORK=", *ptr, 15)) {
469 _limits_work = atoi(*ptr+15);
471 if (!strncmp("PL_LIMITS_BODY=", *ptr, 15)) {
472 _limits_body = atoi(*ptr+15);
474 if (!strncmp("PL_LIMITS_WORK_SMALL_ITER=", *ptr, 26)) {
475 _limits_work_small_iter = atoi(*ptr+26);
477 if (!strncmp("PL_DYNAMIC_FEEDBACK=", *ptr, 20)) {
478 _dynamic_feedback_on = 1;
479 _dynamic_feedback_file_name = &((*ptr)[20]);
483 /* initializations */
485 #if defined(SEQUENTIAL)
486 _suif_aligned_vars->_nproc = 1;
487 #endif
489 /* if _suif_nproc is nonzero, use compiler predetermined # of procs */
490 if (_suif_nproc != 0)
491 _suif_aligned_vars->_nproc = _suif_nproc;
493 suif_initenv();
494 _argc = argc;
495 _argv = argv;
496 _envp = envp;
499 /* read in the dynamic feedback file for suppressing paralllelism */
500 if (_dynamic_feedback_on) suif_init_dynamic_feedback();
501 #if defined(GEN_TRACE)
502 suif_inittrace();
503 #endif
505 #if defined(STATS)
506 SUIF_INIT_HI_RES_CLOCK
507 SUIF_START_TIMER(alltime_hi_res, alltime_low_res);
508 #endif
510 /* get a unique identifier to identify the master thread */
511 _master_pid = GETUNIQUEID;
513 /* spawn worker threads, begin computation */
515 for(i=1; i < _suif_aligned_vars->_nproc; i++) {
516 suif_create(suif_worker);
519 if (_suif_aligned_vars->_nproc > 1) {
520 SETSCHED()
523 suif_worker();
525 suif_wait_for_end();
526 #if defined(GEN_TRACE)
527 dump_trace();
528 #endif
529 return 0;
532 double safe_divide(double a, double b) {
533 if (b == 0.0) return(0.0);
534 return(a / b);
537 void *cache_line_align(void *addr)
539 ptrdiff_t ptr = (ptrdiff_t) addr;
540 ptr = (ptr + (ptrdiff_t) (CACHE_LINE_SIZE - 1)) &
541 (~((ptrdiff_t) (CACHE_LINE_SIZE - 1)));
542 return (void *) ptr;
546 void *suif_malloc(size_t size)
548 int num_bytes = size + (2*CACHE_LINE_SIZE);
549 void *ptr = malloc(num_bytes);
551 return (cache_line_align(ptr));
555 /*----------------------------------------------------------------------*/
556 /* read in the runtime dynamic feedback file if it exists
557 * Looks for a file named by the PL_DYNAMIC_FEEDBACK environment variable.
558 * Creates a table indexed by doall loop numbers
559 * for fast runtime access to determine number of
560 * processors to run on
562 static void suif_init_dynamic_feedback()
564 FILE *fp;
565 char line[512]; /* reasonable limit to a line size */
566 char name[512]; /* reasonable limit for a string */
567 char *fname = _dynamic_feedback_file_name;
568 int in_region = 0; /* flag to determine when the first line is read */
569 int num_doalls = 0;
571 fp = fopen(fname, "r");
572 if (!fp) {
573 fprintf(stderr, "Unable to open dynamic feedback file `%s', ignoring\n",
574 fname);
575 _dynamic_feedback_on = 0;
576 return;
579 /* read in lines until we run out */
580 while (fgets(line, 512, fp) != NULL) {
581 /* ignore comments */
582 if (line[0] == '#') { continue; }
584 /* Use scanf for simplicity */
585 if (!in_region) {
586 int i, rv;
587 if ((rv = sscanf(line, "DOALLS %d\n", &num_doalls)) == 1) {
588 if (num_doalls < 0) {
589 fprintf(stderr, "Error in dynamic feedback file `%s', ignoring\n",
590 fname);
591 _dynamic_feedback_on = 0;
592 fclose(fp);
593 return;
596 if (num_doalls > DYNAMIC_FEEDBACK_TABLE_SIZE)
598 suif_dynamic_feedback_table =
599 (suif_dynamic_feedback *)malloc(sizeof(suif_dynamic_feedback) *
600 num_doalls);
601 if (suif_dynamic_feedback_table == NULL)
602 perror("malloc failed");
604 else
606 suif_dynamic_feedback_table = static_dynamic_feedback_table;
609 for (i = 0; i< num_doalls; i++) {
610 /* do not supress by default */
611 suif_dynamic_feedback_table[i].iter_threshhold = 0;
612 suif_dynamic_feedback_table[i].max_parallelism = 1000;
614 in_region = 1;
615 continue;
619 if (in_region) {
620 int doall_index, iter_threshhold, max_parallelism;
621 int rv;
623 if ((rv = sscanf(line, "ENTRY %s %d %d %d\n", name, &doall_index,
624 &iter_threshhold, &max_parallelism)) == 4) {
625 /* fprintf(stderr,"ENTRY %s %d %d %d\n", name, doall_index,
626 iter_threshhold, max_parallelism); */
627 if ((doall_index < 0) || (doall_index > num_doalls)) {
628 fprintf(stderr,
629 "IGNORED: %s: doall_index(=%d) > num_doalls(=%d)\nline=%s\n",
630 fname, doall_index, num_doalls, line);
631 } else {
632 suif_dynamic_feedback_table[doall_index].iter_threshhold =
633 iter_threshhold;
634 suif_dynamic_feedback_table[doall_index].max_parallelism =
635 max_parallelism;
637 continue;
640 fprintf(stderr, "IGNORED: %s: inregion=%d, line=%s\n",
641 fname, in_region, line);
644 suif_dynamic_feedback_doalls = num_doalls;
645 fclose(fp);
649 /* use the feedback data to determine the number of processors
650 * to run on With any luck, it will be inlined
652 static int suif_dynamic_feedback_get_nprocs(int task_func_num,
653 int num_iters, int num_procs) {
654 int max_parallelism;
655 if (!_dynamic_feedback_on) return(num_procs);
657 /* blindly assert that task_func_num is ALWAYS >0 and < num_doalls */
658 if ((task_func_num < 0) || (task_func_num >= suif_dynamic_feedback_doalls)) {
659 return(num_procs);
661 /* if we are below the threshhold, run on one processor */
662 if (suif_dynamic_feedback_table[task_func_num].iter_threshhold > num_iters) {
663 return(1);
665 /* use the min(max_parallelism, num_procs, num_iters); */
666 max_parallelism = suif_dynamic_feedback_table[task_func_num].max_parallelism;
667 max_parallelism = ((max_parallelism <= num_procs)
668 ? max_parallelism : num_procs);
669 max_parallelism = ((max_parallelism <= num_iters)
670 ? max_parallelism : num_iters);
671 return(max_parallelism);
674 #if defined(GEN_TRACE)
676 * init generate trace
677 * allocate
679 static void suif_inittrace() {
680 trace_space = (trace_data *)malloc(sizeof(trace_data) * max_traces);
681 /* for the challenge */
684 /* ignmore errors when dumping trace */
685 static void dump_trace(void) {
686 FILE *fp;
687 char *fname = "RTdumpfile";
688 int i;
689 if (num_traces == 0) return;
690 if (num_trace_dumps == 0) {
691 fp = fopen(fname, "w");
692 } else {
693 fp = fopen(fname, "a");
695 fwrite ((void *) trace_space, sizeof(trace_data), num_traces, fp);
696 fclose(fp);
697 num_traces = 0;
698 num_trace_dumps++;
701 static void gen_trace(int func_num, int num_iters, double time) {
702 trace_space[num_traces].func_num = func_num;
703 trace_space[num_traces].num_iters = num_iters;
704 trace_space[num_traces].time = time;
705 num_traces++;
706 if (num_traces == max_traces) {
707 #if defined(STATS)
708 SUIF_HI_RES_TIMER_DATA_TYPE hi_res_trace_start;
709 double low_res_trace_start;
710 double test_time;
712 SUIF_START_TIMER(hi_res_trace_start, low_res_trace_start);
713 #endif
714 dump_trace();
715 #if defined(STATS)
716 SUIF_END_TIMER(test_time, hi_res_trace_start, low_res_trace_start);
717 trace_dump_time += test_time;
718 #endif
722 #endif
724 /*----------------------------------------------------------------------*/
725 /* process management */
727 /* suif_initenv() - initialize environment & synchronization constructs */
729 static void suif_initenv(void)
731 MAIN_INITENV_LITE(,)
733 /* system barriers & locks */
735 BARINIT(_sysbar1)
736 BARINIT(_sysbar2)
737 FOPINIT(_sysfop,0)
739 /* user barriers & locks */
741 BARINIT(_bar1)
742 BARINIT(_bar2)
743 BARINIT(_bar3)
744 BARINIT(_bar4)
745 BARINIT(_bar5)
746 BARINIT(_bar6)
747 BARINIT(_bar7)
748 BARINIT(_bar8)
750 BARINIT(_bar_global0)
751 BARINIT(_bar_global1)
752 BARINIT(_bar_global2)
753 BARINIT(_bar_global3)
754 BARINIT(_bar_global4)
755 BARINIT(_bar_global5)
756 BARINIT(_bar_global6)
757 BARINIT(_bar_global7)
759 FOPINIT(_fop1,0)
760 FOPINIT(_fop2,0)
761 FOPINIT(_fop3,0)
762 FOPINIT(_fop4,0)
763 FOPINIT(_fop5,0)
764 FOPINIT(_fop6,0)
765 FOPINIT(_fop7,0)
766 FOPINIT(_fop8,0)
768 LOCKINIT(_lock1)
769 LOCKINIT(_lock2)
770 LOCKINIT(_lock3)
771 LOCKINIT(_lock4)
772 LOCKINIT(_lock5)
773 LOCKINIT(_lock6)
774 LOCKINIT(_lock7)
775 LOCKINIT(_lock8)
777 /* reduction locks */
778 ALOCKINIT(_lock_reduction,NUM_RLOCK)
780 suif_counter_init_all();
782 _main_stop = 0;
783 *_suif_aligned_doall_level = 0;
784 *_suif_aligned_my_nprocs = _suif_aligned_vars->_nproc;
785 _my_nprocs = *_suif_aligned_my_nprocs;
787 suif_assign_nprocs();
788 _my_nprocs1 = *_suif_aligned_my_nprocs1;
789 _my_nprocs2 = *_suif_aligned_my_nprocs2;
792 /*----------------------------------------------------------------------*/
793 /* suif_create() - creates lightweight thread to execute function f */
795 static void suif_create(void (*f)(void))
797 CREATE_LITE(f)
801 /*----------------------------------------------------------------------*/
802 /* suif_get_my_id() - return logical thread id (0...P-1) */
804 int suif_get_my_id(void)
806 int id;
808 GETID(id);
810 return id; /* logical id from 0 to PL_NUM_THREADS-1 */
814 /*----------------------------------------------------------------------*/
815 /* suif_num_total_ids() - return total number of thread */
817 int suif_num_total_ids(void)
819 return _suif_aligned_vars->_nproc; /* total number of threads */
823 int numids_(void)
825 return _suif_aligned_vars->_nproc;
830 /*----------------------------------------------------------------------*/
831 /* suif_assign_ids() - initializes _my_ids */
833 void suif_assign_ids(int myid, int *myid1, int *myid2)
835 int my_nprocs1 = *_suif_aligned_my_nprocs1;
836 *myid1 = myid % my_nprocs1;
837 *myid2 = myid / my_nprocs1;
841 void setids_(int *myid, int *myid1, int *myid2)
843 suif_assign_ids(*myid, myid1, myid2);
847 /*----------------------------------------------------------------------*/
848 /* suif_assign_nprocs() - initializes _my_nprocs */
850 void suif_assign_nprocs(void)
852 int my_nprocs1, my_nprocs2;
853 int nproc = *_suif_aligned_my_nprocs;
855 if (nproc < 4)
856 my_nprocs2 = 1;
857 else if (nproc < 9)
858 my_nprocs2 = 2;
859 else if ((nproc == 9) ||
860 (nproc == 15) ||
861 (nproc == 18))
862 my_nprocs2 = 3;
863 else if ((nproc < 25) ||
864 (nproc == 32))
865 my_nprocs2 = 4;
866 else if ((nproc == 25) ||
867 (nproc == 35) ||
868 (nproc == 40) ||
869 (nproc == 50))
870 my_nprocs2 = 5;
871 else if (nproc < 49)
872 my_nprocs2 = 6;
873 else if (nproc == 49)
874 my_nprocs2 = 7;
875 else
876 my_nprocs2 = 8;
878 my_nprocs1 =
879 nproc/my_nprocs2;
881 /* only modify the global variable if needed */
882 if (my_nprocs1 != *_suif_aligned_my_nprocs1) {
883 *_suif_aligned_my_nprocs1 = my_nprocs1;
885 if (my_nprocs2 != *_suif_aligned_my_nprocs2) {
886 *_suif_aligned_my_nprocs2 = my_nprocs2;
891 void setnp_(void)
893 suif_assign_nprocs();
897 /*----------------------------------------------------------------------*/
898 /* suif_restore_state() - called by hardware simulator to restore registers */
900 void suif_restore_state(int pid, int my_id /* _thread_map[pid] */)
902 longjmp(jmpbuf[pid], my_id);
906 /*----------------------------------------------------------------------*/
907 /* restart() - called by hardware simulator to restore registers */
909 int restart(int pid)
911 restart_map[pid] = pid;
912 return 0;
916 /*----------------------------------------------------------------------*/
917 /* suif_doall_level() - return depth of nesting in doall loops */
919 int suif_doall_level(void)
921 /* (*_suif_aligned_doall_level) begins at 0 and is incremented */
922 /* after each nested call to suif_doall() */
924 return (*_suif_aligned_doall_level);
927 int doalev_(void)
929 /* (*_suif_aligned_doall_level) begins at 0 and is incremented */
930 /* after each nested call to suif_doall() */
932 return (*_suif_aligned_doall_level);
936 /*----------------------------------------------------------------------*/
937 /* suif_check_work() - return whether the amount of work in the given */
938 /* parameters is greater than the threshold set by the environment */
939 /* variables */
942 int checkw_(int *lb, int *ub, double *work, int *cflow)
944 return suif_check_work(*lb, *ub, *work, *cflow);
948 int suif_check_work(int lb, int ub, double work, int cflow)
950 int i = ub-lb+1;
951 double total_work;
952 if (i < _suif_aligned_vars->_nproc) return 0;
954 total_work = i * work;
955 if(total_work < (double)_limits_work) return 0;
956 if(i < _limits_iter &&
957 total_work < (double)_limits_work_small_iter) return 0;
959 return 1;
964 /*----------------------------------------------------------------------*/
965 /* suif_wait_for_end() - clean up routines */
967 void suif_wait_for_end(void)
969 #if defined(STATS)
970 double alltime;
972 SUIF_END_TIMER(alltime, alltime_hi_res, alltime_low_res);
973 #if defined(GEN_TRACE)
974 alltime = alltime - trace_dump_time;
975 #endif /* GEN_TRACE */
977 fprintf(stderr, " ***************************************************************************\n");
978 fprintf(stderr, " Dynamic Feedback %s\n",
979 (_dynamic_feedback_on) ? "ON" : "OFF");
981 #if defined(GEN_TRACE)
982 fprintf(stderr, " Trace output GENERATED. Dump time removed from time\n");
983 #endif /* GEN_TRACE */
985 fprintf(stderr, " Sequential Time = %11.6f sec, \
986 Doall Invocations = %9d \n", alltime - partime, num_doalls );
987 #if defined(SEQUENTIAL)
989 double avg_task_time = safe_divide(partime, num_doalls);
991 fprintf(stderr, " Parallel Time = %11.6f sec, \
992 Barrier Invocations = %9d \n", partime, num_barriers );
993 fprintf(stderr, " Avg Task Time = %11.6f sec, \
994 Sync Neighbors = %9d \n",
995 avg_task_time, num_sync_neighbors );
996 fprintf(stderr, " Coverage = %9.2f %%, \
997 Reductions = %9d \n",
998 safe_divide( partime * 100.0, alltime), num_reductions );
1000 #else
1001 fprintf(stderr, " Parallel Time = %11.6f sec\n", partime);
1002 fprintf(stderr, " Avg Task Time = %11.6f sec\n",
1003 safe_divide(partime, num_doalls));
1004 fprintf(stderr, " Coverage = %9.2f %%\n",
1005 safe_divide(partime * 100.0, alltime));
1006 #endif /* SEQUENTIAL */
1007 fprintf(stderr, " ***************************************************************************\n");
1009 #if defined(ALL_STATS)
1011 int func_num;
1013 fprintf(stderr, "\n ***************************************************************************\n");
1014 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1016 char *current_name = task_name_table[func_num];
1017 current_name = ((current_name == NULL) ? "??" : current_name);
1018 if (*current_name == '&')
1019 ++current_name;
1020 fprintf(stderr, " ``%s'' Time = %11.6f sec\n", current_name,
1021 stat_time_table[func_num]);
1022 fprintf(stderr, " ``%s'' Time/iter = %11.6f sec\n", current_name,
1023 safe_divide(stat_time_table[func_num],
1024 stat_iters_table[func_num] ));
1026 fprintf(stderr, " ***************************************************************************\n");
1028 fprintf(stderr, "\n ***************************************************************************\n");
1029 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1031 char *current_name = task_name_table[func_num];
1032 current_name = ((current_name == NULL) ? "??" : current_name);
1033 if (*current_name == '&')
1034 ++current_name;
1035 fprintf(stderr, " ``%s'' Doalls = %d\n", current_name,
1036 stat_doalls_table[func_num]);
1038 fprintf(stderr, " ***************************************************************************\n");
1040 #if defined(SEQUENTIAL)
1041 fprintf(stderr, "\n ***************************************************************************\n");
1042 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1044 char *current_name = task_name_table[func_num];
1045 current_name = ((current_name == NULL) ? "??" : current_name);
1046 if (*current_name == '&')
1047 ++current_name;
1048 fprintf(stderr, " ``%s'' Barriers = %d\n", current_name,
1049 stat_barriers_table[func_num]);
1051 fprintf(stderr, " ***************************************************************************\n");
1053 fprintf(stderr, "\n ***************************************************************************\n");
1054 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1056 char *current_name = task_name_table[func_num];
1057 current_name = ((current_name == NULL) ? "??" : current_name);
1058 if (*current_name == '&')
1059 ++current_name;
1060 fprintf(stderr, " ``%s'' Sync Neighbors = %d\n", current_name,
1061 stat_sync_neighbors_table[func_num]);
1063 fprintf(stderr, " ***************************************************************************\n");
1065 fprintf(stderr, "\n ***************************************************************************\n");
1066 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1068 char *current_name = task_name_table[func_num];
1069 current_name = ((current_name == NULL) ? "??" : current_name);
1070 if (*current_name == '&')
1071 ++current_name;
1072 fprintf(stderr, " ``%s'' Locks = %d\n", current_name,
1073 stat_locks_table[func_num]);
1075 fprintf(stderr, " ***************************************************************************\n");
1077 fprintf(stderr, "\n ***************************************************************************\n");
1078 for (func_num = 0; task_func_table[func_num] != NULL; ++func_num)
1080 char *current_name = task_name_table[func_num];
1081 current_name = ((current_name == NULL) ? "??" : current_name);
1082 if (*current_name == '&')
1083 ++current_name;
1084 fprintf(stderr, " ``%s'' Reductions = %d\n", current_name,
1085 stat_reductions_table[func_num]);
1087 fprintf(stderr, " ***************************************************************************\n");
1088 #endif /* SEQUENTIAL */
1090 #endif /* ALL_STATS */
1091 #endif /* STATS */
1095 /*----------------------------------------------------------------------*/
1096 /* suif_reset_stats() - reset statistics */
1098 void suif_reset_stats(void)
1100 #if defined(STATS)
1101 SUIF_START_TIMER(alltime_hi_res, alltime_low_res);
1102 num_doalls = 0;
1103 #if defined(SEQUENTIAL)
1104 num_barriers = 0;
1105 num_sync_neighbors = 0;
1106 num_locks = 0;
1107 num_reductions = 0 ;
1108 #endif /* SEQUENTIAL */
1109 #endif /* STATS */
1115 /*----------------------------------------------------------------------*/
1116 /* serial execution */
1118 /* suif_serial() - start parallel region, run only on master */
1120 void suif_serial(task_f f)
1122 (*_suif_aligned_doall_level)++;
1124 if ((*_suif_aligned_doall_level) > 1) /* only 1 level of parallelism */
1125 (*f)(-1);
1127 else
1129 #if defined(STATS) || defined(FEEDBACK) || defined(GEN_TRACE)
1131 #if defined(ALL_STATS) || defined(FEEDBACK)
1132 int func_num = 0;
1133 #endif
1134 #if defined(FEEDBACK)
1135 int old_nprocs = *_suif_aligned_my_nprocs;
1136 #endif
1137 #if defined(STATS) || defined(GEN_TRACE)
1138 SUIF_HI_RES_TIMER_DATA_TYPE partime_hi_res;
1139 double partime_low_res;
1140 double diff_time;
1142 SUIF_START_TIMER(partime_hi_res, partime_low_res);
1143 #endif
1144 #if defined(STATS)
1145 num_doalls++;
1146 #endif
1147 #if defined(ALL_STATS) || defined(FEEDBACK)
1148 if (task_func_table == NULL)
1150 table_size = 20;
1151 task_func_table = (void **)(malloc(table_size * sizeof(void *)));
1152 task_name_table = (char **)(malloc(table_size * sizeof(char *)));
1153 #if defined(ALL_STATS)
1154 stat_time_table = (double *)(malloc(table_size * sizeof(double)));
1155 stat_doalls_table = (int *)(malloc(table_size * sizeof(int)));
1156 stat_iters_table = (double *)(malloc(table_size * sizeof(double)));
1157 #if defined(SEQUENTIAL)
1158 stat_barriers_table = (int *)(malloc(table_size * sizeof(int)));
1159 stat_sync_neighbors_table =
1160 (int *)(malloc(table_size * sizeof(int)));
1161 stat_locks_table = (int *)(malloc(table_size * sizeof(int)));
1162 stat_reductions_table = (int *)(malloc(table_size * sizeof(int)));
1163 #endif
1164 #endif
1165 #if defined(FEEDBACK)
1166 limit_nproc_table = (int *)(malloc(table_size * sizeof(int)));
1167 #endif
1168 task_func_table[0] = NULL;
1170 while (task_func_table[func_num] != f)
1172 if (task_func_table[func_num] == NULL)
1174 if (func_num == table_size - 1)
1176 void **new_func_table;
1177 char **new_name_table;
1178 #if defined(ALL_STATS)
1179 double *new_time_table;
1180 int *new_doalls_table;
1181 double *new_iters_table;
1182 #if defined(SEQUENTIAL)
1183 int *new_barriers_table;
1184 int *new_sync_neighbors_table;
1185 int *new_locks_table;
1186 int *new_reductions_table;
1187 #endif
1188 #endif
1189 #if defined(FEEDBACK)
1190 int *new_nproc_table;
1191 #endif
1192 new_func_table =
1193 (void **)(malloc(table_size * 2 * sizeof(void *)));
1194 new_name_table =
1195 (char **)(malloc(table_size * 2 * sizeof(char *)));
1196 #if defined(ALL_STATS)
1197 new_time_table =
1198 (double *)(malloc(table_size * 2 * sizeof(double)));
1199 new_doalls_table =
1200 (int *)(malloc(table_size * 2 * sizeof(int)));
1201 new_iters_table =
1202 (double *)(malloc(table_size * 2 * sizeof(double)));
1203 #if defined(SEQUENTIAL)
1204 new_barriers_table =
1205 (int *)(malloc(table_size * 2 * sizeof(int)));
1206 new_sync_neighbors_table =
1207 (int *)(malloc(table_size * 2 * sizeof(int)));
1208 new_locks_table =
1209 (int *)(malloc(table_size * 2 * sizeof(int)));
1210 new_reductions_table =
1211 (int *)(malloc(table_size * 2 * sizeof(int)));
1212 #endif
1213 #endif
1214 #if defined(FEEDBACK)
1215 new_nproc_table =
1216 (int *)(malloc(table_size * 2 * sizeof(int)));
1217 #endif
1218 memcpy(new_func_table, task_func_table,
1219 table_size * sizeof(void *));
1220 memcpy(new_name_table, task_name_table,
1221 table_size * sizeof(char *));
1222 #if defined(ALL_STATS)
1223 memcpy(new_time_table, stat_time_table,
1224 table_size * sizeof(double));
1225 memcpy(new_doalls_table, stat_doalls_table,
1226 table_size * sizeof(int));
1227 memcpy(new_iters_table, stat_iters_table,
1228 table_size * sizeof(double));
1229 #if defined(SEQUENTIAL)
1230 memcpy(new_barriers_table, stat_barriers_table,
1231 table_size * sizeof(int));
1232 memcpy(new_sync_neighbors_table, stat_sync_neighbors_table,
1233 table_size * sizeof(int));
1234 memcpy(new_locks_table, stat_locks_table,
1235 table_size * sizeof(int));
1236 memcpy(new_reductions_table, stat_reductions_table,
1237 table_size * sizeof(int));
1238 #endif
1239 #endif
1240 #if defined(FEEDBACK)
1241 memcpy(new_nproc_table, limit_nproc_table,
1242 table_size * sizeof(int));
1243 #endif
1244 free(task_func_table);
1245 free(task_name_table);
1246 #if defined(ALL_STATS)
1247 free(stat_time_table);
1248 free(stat_doalls_table);
1249 free(stat_iters_table);
1250 #if defined(SEQUENTIAL)
1251 free(stat_barriers_table);
1252 free(stat_sync_neighbors_table);
1253 free(stat_locks_table);
1254 free(stat_reductions_table);
1255 #endif
1256 #endif
1257 #if defined(FEEDBACK)
1258 free(limit_nproc_table);
1259 #endif
1260 task_func_table = new_func_table;
1261 task_name_table = new_name_table;
1262 #if defined(ALL_STATS)
1263 stat_time_table = new_time_table;
1264 stat_doalls_table = new_doalls_table;
1265 stat_iters_table = new_iters_table;
1266 #if defined(SEQUENTIAL)
1267 stat_barriers_table = new_barriers_table;
1268 stat_sync_neighbors_table = new_sync_neighbors_table;
1269 stat_locks_table = new_locks_table;
1270 stat_reductions_table = new_reductions_table;
1271 #endif
1272 #endif
1273 #if defined(FEEDBACK)
1274 limit_nproc_table = new_nproc_table;
1275 #endif
1276 table_size *= 2;
1278 task_func_table[func_num] = f;
1279 task_name_table[func_num] = current_task_name;
1280 #if defined(ALL_STATS)
1281 stat_time_table[func_num] = 0.0;
1282 stat_doalls_table[func_num] = 0;
1283 stat_iters_table[func_num] = 0.0;
1284 #if defined(SEQUENTIAL)
1285 stat_barriers_table[func_num] = 0;
1286 stat_sync_neighbors_table[func_num] = 0;
1287 stat_locks_table[func_num] = 0;
1288 stat_reductions_table[func_num] = 0;
1289 #endif
1290 #endif
1291 #if defined(FEEDBACK)
1292 limit_nproc_table[func_num] = 0;
1294 unsigned long entry_num;
1296 entry_num = 0;
1297 while (feedback_name_table[entry_num] != NULL)
1299 if (strcmp(feedback_name_table[entry_num],
1300 current_task_name) == 0)
1302 limit_nproc_table[func_num] =
1303 feedback_nproc_limit_table[entry_num];
1304 break;
1306 ++entry_num;
1309 #endif
1310 task_func_table[func_num + 1] = NULL;
1311 break;
1313 ++func_num;
1315 current_task_num = func_num;
1316 #if defined(ALL_STATS)
1317 stat_doalls_table[current_task_num]++;
1318 #endif
1319 #endif
1320 #endif
1322 #if defined(FEEDBACK)
1323 if ((limit_nproc_table[current_task_num] != 0) &&
1324 (limit_nproc_table[current_task_num] < *_suif_aligned_my_nprocs))
1326 *_suif_aligned_my_nprocs = limit_nproc_table[current_task_num];
1328 #endif
1330 (*f)(0);
1332 #if defined(SEQUENTIAL) && defined(STATS)
1333 num_barriers++;
1334 #if defined(ALL_STATS)
1335 stat_barriers_table[current_task_num]++;
1336 #endif
1337 #endif
1339 #if defined(STATS) || defined(GEN_TRACE)
1340 SUIF_END_TIMER(diff_time, partime_hi_res, partime_low_res);
1342 # if defined(GEN_TRACE)
1343 gen_trace(current_task_num, current_task_num_iters, diff_time);
1344 # endif
1345 #endif
1347 #if defined(STATS)
1348 /* time doesn't count as parallel if we have a feedback
1349 * table where the iteration count should shut it off
1351 if ((!_dynamic_feedback_on) ||
1352 ((suif_dynamic_feedback_table[current_task_num].iter_threshhold <=
1353 current_task_num_iters) &&
1354 (suif_dynamic_feedback_table[current_task_num].max_parallelism != 1)))
1356 partime = partime + diff_time;
1358 #endif
1359 #if defined(ALL_STATS)
1360 stat_time_table[current_task_num] += diff_time;
1361 stat_iters_table[current_task_num] += current_task_num_iters;
1362 #endif
1363 #if defined(FEEDBACK)
1364 *_suif_aligned_my_nprocs = old_nprocs;
1365 #endif
1366 #if defined(STATS) || defined(FEEDBACK)
1368 #endif
1370 if (_main_stop)
1371 exit(0); /* some worker encountered STOP, stop also */
1374 (*_suif_aligned_doall_level)--;
1378 /*----------------------------------------------------------------------*/
1379 /* parallel execution */
1381 /* suif_doall() - start parallel region */
1383 void suif_doall(task_f f)
1385 (*_suif_aligned_doall_level)++;
1387 if ((*_suif_aligned_doall_level) > 1) /* only 1 level of parallelism */
1388 (*f)(-1);
1390 else
1392 #if defined(STATS) || defined(FEEDBACK) || defined(GEN_TRACE)
1394 #if defined(ALL_STATS) || defined(FEEDBACK)
1395 int func_num = 0;
1396 #endif
1397 #if defined(FEEDBACK)
1398 int old_nprocs = *_suif_aligned_my_nprocs;
1399 #endif
1400 #if defined(STATS) || defined(GEN_TRACE)
1401 SUIF_HI_RES_TIMER_DATA_TYPE partime_hi_res;
1402 double partime_low_res;
1403 double diff_time;
1405 SUIF_START_TIMER(partime_hi_res, partime_low_res);
1406 #endif
1407 #if defined(STATS)
1408 num_doalls++;
1409 #endif
1410 #if defined(ALL_STATS) || defined(FEEDBACK)
1411 if (task_func_table == NULL)
1413 table_size = 20;
1414 task_func_table = (void **)(malloc(table_size * sizeof(void *)));
1415 task_name_table = (char **)(malloc(table_size * sizeof(char *)));
1416 #if defined(ALL_STATS)
1417 stat_time_table = (double *)(malloc(table_size * sizeof(double)));
1418 stat_doalls_table = (int *)(malloc(table_size * sizeof(int)));
1419 stat_iters_table = (double *)(malloc(table_size * sizeof(double)));
1420 #if defined(SEQUENTIAL)
1421 stat_barriers_table = (int *)(malloc(table_size * sizeof(int)));
1422 stat_sync_neighbors_table =
1423 (int *)(malloc(table_size * sizeof(int)));
1424 stat_locks_table = (int *)(malloc(table_size * sizeof(int)));
1425 stat_reductions_table = (int *)(malloc(table_size * sizeof(int)));
1426 #endif
1427 #endif
1428 #if defined(FEEDBACK)
1429 limit_nproc_table = (int *)(malloc(table_size * sizeof(int)));
1430 #endif
1431 task_func_table[0] = NULL;
1433 while (task_func_table[func_num] != f)
1435 if (task_func_table[func_num] == NULL)
1437 if (func_num == table_size - 1)
1439 void **new_func_table;
1440 char **new_name_table;
1441 #if defined(ALL_STATS)
1442 double *new_time_table;
1443 int *new_doalls_table;
1444 double *new_iters_table;
1445 #if defined(SEQUENTIAL)
1446 int *new_barriers_table;
1447 int *new_sync_neighbors_table;
1448 int *new_locks_table;
1449 int *new_reductions_table;
1450 #endif
1451 #endif
1452 #if defined(FEEDBACK)
1453 int *new_nproc_table;
1454 #endif
1455 new_func_table =
1456 (void **)(malloc(table_size * 2 * sizeof(void *)));
1457 new_name_table =
1458 (char **)(malloc(table_size * 2 * sizeof(char *)));
1459 #if defined(ALL_STATS)
1460 new_time_table =
1461 (double *)(malloc(table_size * 2 * sizeof(double)));
1462 new_doalls_table =
1463 (int *)(malloc(table_size * 2 * sizeof(int)));
1464 new_iters_table =
1465 (double *)(malloc(table_size * 2 * sizeof(double)));
1466 #if defined(SEQUENTIAL)
1467 new_barriers_table =
1468 (int *)(malloc(table_size * 2 * sizeof(int)));
1469 new_sync_neighbors_table =
1470 (int *)(malloc(table_size * 2 * sizeof(int)));
1471 new_locks_table =
1472 (int *)(malloc(table_size * 2 * sizeof(int)));
1473 new_reductions_table =
1474 (int *)(malloc(table_size * 2 * sizeof(int)));
1475 #endif
1476 #endif
1477 #if defined(FEEDBACK)
1478 new_nproc_table =
1479 (int *)(malloc(table_size * 2 * sizeof(int)));
1480 #endif
1481 memcpy(new_func_table, task_func_table,
1482 table_size * sizeof(void *));
1483 memcpy(new_name_table, task_name_table,
1484 table_size * sizeof(char *));
1485 #if defined(ALL_STATS)
1486 memcpy(new_time_table, stat_time_table,
1487 table_size * sizeof(double));
1488 memcpy(new_doalls_table, stat_doalls_table,
1489 table_size * sizeof(int));
1490 memcpy(new_iters_table, stat_iters_table,
1491 table_size * sizeof(double));
1492 #if defined(SEQUENTIAL)
1493 memcpy(new_barriers_table, stat_barriers_table,
1494 table_size * sizeof(int));
1495 memcpy(new_sync_neighbors_table, stat_sync_neighbors_table,
1496 table_size * sizeof(int));
1497 memcpy(new_locks_table, stat_locks_table,
1498 table_size * sizeof(int));
1499 memcpy(new_reductions_table, stat_reductions_table,
1500 table_size * sizeof(int));
1501 #endif
1502 #endif
1503 #if defined(FEEDBACK)
1504 memcpy(new_nproc_table, limit_nproc_table,
1505 table_size * sizeof(int));
1506 #endif
1507 free(task_func_table);
1508 free(task_name_table);
1509 #if defined(ALL_STATS)
1510 free(stat_time_table);
1511 free(stat_doalls_table);
1512 free(stat_iters_table);
1513 #if defined(SEQUENTIAL)
1514 free(stat_barriers_table);
1515 free(stat_sync_neighbors_table);
1516 free(stat_locks_table);
1517 free(stat_reductions_table);
1518 #endif
1519 #endif
1520 #if defined(FEEDBACK)
1521 free(limit_nproc_table);
1522 #endif
1523 task_func_table = new_func_table;
1524 task_name_table = new_name_table;
1525 #if defined(ALL_STATS)
1526 stat_time_table = new_time_table;
1527 stat_doalls_table = new_doalls_table;
1528 stat_iters_table = new_iters_table;
1529 #if defined(SEQUENTIAL)
1530 stat_barriers_table = new_barriers_table;
1531 stat_sync_neighbors_table = new_sync_neighbors_table;
1532 stat_locks_table = new_locks_table;
1533 stat_reductions_table = new_reductions_table;
1534 #endif
1535 #endif
1536 #if defined(FEEDBACK)
1537 limit_nproc_table = new_nproc_table;
1538 #endif
1539 table_size *= 2;
1541 task_func_table[func_num] = f;
1542 task_name_table[func_num] = current_task_name;
1543 #if defined(ALL_STATS)
1544 stat_time_table[func_num] = 0.0;
1545 stat_doalls_table[func_num] = 0;
1546 stat_iters_table[func_num] = 0.0;
1547 #if defined(SEQUENTIAL)
1548 stat_barriers_table[func_num] = 0;
1549 stat_sync_neighbors_table[func_num] = 0;
1550 stat_locks_table[func_num] = 0;
1551 stat_reductions_table[func_num] = 0;
1552 #endif
1553 #endif
1554 #if defined(FEEDBACK)
1555 limit_nproc_table[func_num] = 0;
1557 unsigned long entry_num;
1559 entry_num = 0;
1560 while (feedback_name_table[entry_num] != NULL)
1562 if (strcmp(feedback_name_table[entry_num],
1563 current_task_name) == 0)
1565 limit_nproc_table[func_num] =
1566 feedback_nproc_limit_table[entry_num];
1567 break;
1569 ++entry_num;
1572 #endif
1573 task_func_table[func_num + 1] = NULL;
1574 break;
1576 ++func_num;
1578 current_task_num = func_num;
1579 #if defined(ALL_STATS)
1580 stat_doalls_table[current_task_num]++;
1581 #endif
1582 #endif
1583 #endif
1585 #if defined(FEEDBACK)
1586 if ((limit_nproc_table[current_task_num] != 0) &&
1587 (limit_nproc_table[current_task_num] < *_suif_aligned_my_nprocs))
1589 *_suif_aligned_my_nprocs = limit_nproc_table[current_task_num];
1591 #endif
1593 MSlaveBarrier_Release(&_suif_aligned_vars->barrier);
1595 (*f)(0);
1597 #if defined(SEQUENTIAL) && defined(STATS)
1598 num_barriers++;
1599 #if defined(ALL_STATS)
1600 stat_barriers_table[current_task_num]++;
1601 #endif
1602 #endif
1604 MSlaveBarrier_Wait(&_suif_aligned_vars->barrier,
1605 _suif_aligned_vars->_nproc);
1607 #if defined(STATS) || defined(GEN_TRACE)
1608 SUIF_END_TIMER(diff_time, partime_hi_res, partime_low_res);
1610 # if defined(GEN_TRACE)
1611 gen_trace(current_task_num, current_task_num_iters, diff_time);
1612 # endif
1613 #endif
1615 #if defined(STATS)
1616 /* time doesn't count as parallel if we have a feedback
1617 * table where the iteration count should shut it off
1619 if ((!_dynamic_feedback_on) ||
1620 ((suif_dynamic_feedback_table[current_task_num].iter_threshhold <=
1621 current_task_num_iters) &&
1622 (suif_dynamic_feedback_table[current_task_num].max_parallelism !=
1623 1))) {
1624 partime = partime + diff_time;
1626 #if defined(ALL_STATS)
1627 stat_time_table[current_task_num] += diff_time;
1628 stat_iters_table[current_task_num] += current_task_num_iters;
1629 #endif
1630 #endif
1631 #if defined(FEEDBACK)
1632 *_suif_aligned_my_nprocs = old_nprocs;
1633 #endif
1634 #if defined(STATS) || defined(FEEDBACK)
1636 #endif
1638 if (_main_stop)
1639 exit(0); /* some worker encountered STOP, stop also */
1642 (*_suif_aligned_doall_level)--;
1646 void suif_limited_doall(task_f f, int nproc_limit)
1648 int old_nprocs = *_suif_aligned_my_nprocs;
1649 if (nproc_limit < old_nprocs)
1650 *_suif_aligned_my_nprocs = nproc_limit;
1651 suif_doall(f);
1652 if (nproc_limit < old_nprocs)
1653 *_suif_aligned_my_nprocs = old_nprocs;
1656 void suif_named_doall(task_f f, char *task_func_name,
1657 int task_func_num, int num_iters,
1658 int is_iters_const)
1660 int old_nprocs;
1661 int new_nprocs;
1663 #if (defined(STATS) && defined(ALL_STATS)) || defined(GEN_TRACE)
1664 current_task_name = task_func_name;
1665 current_task_iters_const = is_iters_const;
1666 current_task_num_iters = num_iters;
1667 #endif
1668 current_task_num = task_func_num;
1669 old_nprocs = *_suif_aligned_my_nprocs;
1670 new_nprocs = suif_dynamic_feedback_get_nprocs(task_func_num,
1671 num_iters, old_nprocs);
1672 if (new_nprocs != old_nprocs) {
1673 *_suif_aligned_my_nprocs = new_nprocs;
1674 setnp_();
1677 if (new_nprocs > 1) {
1678 suif_doall(f);
1679 } else {
1680 suif_serial(f);
1683 if (new_nprocs != old_nprocs) {
1684 *_suif_aligned_my_nprocs = old_nprocs;
1689 /*----------------------------------------------------------------------*/
1690 /* suif_worker() - worker threads spin until work provided via suif_doall() */
1692 static void suif_worker(void)
1694 int myid; /* id of worker thread */
1696 myid = (THREADS_EQUAL(_master_pid, GETUNIQUEID)) ? 0 : FINC(_sysfop) + 1;
1697 INITID(myid)
1699 PROCASSIGN(((myid + _suif_aligned_vars->_proc_start) %
1700 _suif_aligned_vars->_maxproc))
1702 suif_counter_init(myid, 0);
1704 if (myid == 0)
1707 #if defined(SEQUENTIAL) && defined(STATS)
1708 num_barriers++;
1709 #if defined(ALL_STATS)
1710 stat_barriers_table[current_task_num]++;
1711 #endif
1712 #endif
1714 MSlaveBarrier_Wait(&_suif_aligned_vars->barrier,
1715 _suif_aligned_vars->_nproc);
1716 _suif_start(_argc, _argv, _envp);
1717 *_suif_aligned_task_f = 0;
1719 MSlaveBarrier_Release(&_suif_aligned_vars->barrier);
1721 else
1723 MSlaveBarrier_SlaveFirst(&_suif_aligned_vars->barrier,myid);
1724 while(1) {
1726 if (!(*_suif_aligned_task_f)) {
1727 return;
1728 } else {
1729 (**_suif_aligned_task_f)(myid);
1732 MSlaveBarrier_SlaveEnter(&_suif_aligned_vars->barrier,myid);
1737 /*----------------------------------------------------------------------*/
1738 /* suif_exit_log() - Log exit for other threads, so they don't keep waiting */
1739 /* Called when Fortran STOP statement is encountered */
1741 void suif_exit_log(void)
1743 if ((*_suif_aligned_doall_level) > 0) { /* inside parallel region */
1744 MSlaveBarrier_Wait(&_suif_aligned_vars->barrier,
1745 _suif_aligned_vars->_nproc);
1748 *_suif_aligned_task_f = 0;
1749 MSlaveBarrier_Release(&_suif_aligned_vars->barrier);
1751 #if defined(STATS)
1752 if (!suif_get_my_id())
1753 suif_wait_for_end();
1754 #endif
1755 #if defined(GEN_TRACE)
1756 dump_trace();
1757 #endif
1761 /*----------------------------------------------------------------------*/
1762 /* barriers */
1764 /* _barrier() - barrier used by runtime system */
1766 static void _barrier(void)
1768 #if defined(SEQUENTIAL) && defined(STATS)
1769 num_barriers++;
1770 #if defined(ALL_STATS)
1771 stat_barriers_table[current_task_num]++;
1772 #endif
1773 #endif
1775 BARRIER(_sysbar1, _suif_aligned_vars->_nproc)
1778 static void _barrier2(void) /* this barrier used at end of parallel task */
1780 #if defined(SEQUENTIAL) && defined(STATS)
1781 num_barriers++;
1782 #if defined(ALL_STATS)
1783 stat_barriers_table[current_task_num]++;
1784 #endif
1785 #endif
1787 BARRIER(_sysbar2, _suif_aligned_vars->_nproc)
1790 /*----------------------------------------------------------------------*/
1791 /* suif_global_barrier() - block until all threads enter barrier */
1793 void suif_global_barrier(int id)
1795 #if defined(SEQUENTIAL) && defined(STATS)
1796 num_barriers++;
1797 #if defined(ALL_STATS)
1798 stat_barriers_table[current_task_num]++;
1799 #endif
1800 #endif
1802 switch (id % 8) {
1803 case 0:
1804 BARRIER(_bar_global0, _suif_aligned_vars->_nproc)
1805 break;
1806 case 1:
1807 BARRIER(_bar_global1, _suif_aligned_vars->_nproc)
1808 break;
1809 case 2:
1810 BARRIER(_bar_global2, _suif_aligned_vars->_nproc)
1811 break;
1812 case 3:
1813 BARRIER(_bar_global3, _suif_aligned_vars->_nproc)
1814 break;
1815 case 4:
1816 BARRIER(_bar_global4, _suif_aligned_vars->_nproc)
1817 break;
1818 case 5:
1819 BARRIER(_bar_global5, _suif_aligned_vars->_nproc)
1820 break;
1821 case 6:
1822 BARRIER(_bar_global6, _suif_aligned_vars->_nproc)
1823 break;
1824 case 7:
1825 BARRIER(_bar_global7, _suif_aligned_vars->_nproc)
1826 break;
1830 void glbar_(int *id)
1832 suif_global_barrier(*id);
1836 /*----------------------------------------------------------------------*/
1837 /* suif_barrier() - block until "nproc" processor enter barrier "id" */
1839 void suif_barrier(int id, int nproc)
1841 #if defined(SEQUENTIAL) && defined(STATS)
1842 num_barriers++;
1843 #if defined(ALL_STATS)
1844 stat_barriers_table[current_task_num]++;
1845 #endif
1846 #endif
1848 switch (id) /* block until nproc threads reach barrier */
1850 case 1: BARRIER(_bar1, nproc) break;
1851 case 2: BARRIER(_bar2, nproc) break;
1852 case 3: BARRIER(_bar3, nproc) break;
1853 case 4: BARRIER(_bar4, nproc) break;
1854 case 5: BARRIER(_bar5, nproc) break;
1855 case 6: BARRIER(_bar6, nproc) break;
1856 case 7: BARRIER(_bar7, nproc) break;
1857 case 8: BARRIER(_bar8, nproc) break;
1858 default: fprintf(stderr, "Barrier[%d] not supported\n", id); break;
1863 /*----------------------------------------------------------------------*/
1864 /* suif_sync_neighbor() - block until neighbor threads enter barrier */
1866 void suif_sync_neighbor(int id)
1868 #if defined(SEQUENTIAL) && defined(STATS)
1869 num_sync_neighbors++;
1870 #if defined(ALL_STATS)
1871 stat_sync_neighbors_table[current_task_num]++;
1872 #endif
1873 #endif
1875 suif_global_barrier(id); /* just use normal barrier for now */
1878 void sync_(int *id)
1880 suif_sync_neighbor(*id);
1885 /*----------------------------------------------------------------------*/
1886 /* locks */
1888 /* suif_lock() - block until able to acquire lock "id" */
1890 void suif_lock(int id)
1892 #if defined(SEQUENTIAL) && defined(STATS)
1893 num_locks++;
1894 #if defined(ALL_STATS)
1895 stat_locks_table[current_task_num]++;
1896 #endif
1897 #endif
1899 switch (id)
1901 case 1: LOCK(_lock1) break;
1902 case 2: LOCK(_lock2) break;
1903 case 3: LOCK(_lock3) break;
1904 case 4: LOCK(_lock4) break;
1905 case 5: LOCK(_lock5) break;
1906 case 6: LOCK(_lock6) break;
1907 case 7: LOCK(_lock7) break;
1908 case 8: LOCK(_lock8) break;
1909 default: fprintf(stderr, "Lock[%d] not supported\n", id); break;
1913 void slock_(int *id)
1915 suif_lock(*id);
1918 /*----------------------------------------------------------------------*/
1919 /* suif_reduction_lock() - block until able to acquire lock "id" */
1920 #define MAX_RLOCK_ID (NUM_RLOCK-1)
1922 void suif_reduction_lock(int id)
1924 #if defined(SEQUENTIAL) && defined(STATS)
1925 num_locks++;
1926 #if defined(ALL_STATS)
1927 stat_locks_table[current_task_num]++;
1928 #endif
1929 #endif
1931 ALOCK(_lock_reduction,(id&MAX_RLOCK_ID))
1935 void rlock_(int *id)
1937 suif_reduction_lock(*id);
1941 /*----------------------------------------------------------------------*/
1942 /* suif_unlock() - release lock "id" */
1944 void suif_unlock(int id)
1946 switch (id)
1948 case 1: UNLOCK(_lock1) break;
1949 case 2: UNLOCK(_lock2) break;
1950 case 3: UNLOCK(_lock3) break;
1951 case 4: UNLOCK(_lock4) break;
1952 case 5: UNLOCK(_lock5) break;
1953 case 6: UNLOCK(_lock6) break;
1954 case 7: UNLOCK(_lock7) break;
1955 case 8: UNLOCK(_lock8) break;
1956 default: fprintf(stderr, "Lock[%d] not supported\n", id); break;
1961 void sulock_(int *id)
1963 suif_unlock(*id);
1967 /*----------------------------------------------------------------------*/
1968 /* suif_reduction_unlock() - release lock "id" */
1970 void suif_reduction_unlock(int id)
1972 AUNLOCK(_lock_reduction,(id&MAX_RLOCK_ID))
1975 void rulock_(int *id)
1977 suif_reduction_unlock(*id);
1981 /*----------------------------------------------------------------------*/
1982 /* fetch & increment counters */
1984 /* suif_finc_init() - initialize fetch&inc counter "id" to 0 */
1986 void suif_finc_init(int id)
1988 switch (id)
1990 case 1: FOPINIT(_fop1,0); break;
1991 case 2: FOPINIT(_fop2,0); break;
1992 case 3: FOPINIT(_fop3,0); break;
1993 case 4: FOPINIT(_fop4,0); break;
1994 case 5: FOPINIT(_fop5,0); break;
1995 case 6: FOPINIT(_fop6,0); break;
1996 case 7: FOPINIT(_fop7,0); break;
1997 case 8: FOPINIT(_fop8,0); break;
1998 default: fprintf(stderr, "Getsub[%d] not supported\n", id); break;
2004 /*----------------------------------------------------------------------*/
2005 /* suif_finc() - fetch & increment counter "id" */
2007 int suif_finc(int id)
2009 switch (id)
2011 case 1: return FINC(_fop1);
2012 case 2: return FINC(_fop2);
2013 case 3: return FINC(_fop3);
2014 case 4: return FINC(_fop4);
2015 case 5: return FINC(_fop5);
2016 case 6: return FINC(_fop6);
2017 case 7: return FINC(_fop7);
2018 case 8: return FINC(_fop8);
2019 default: fprintf(stderr, "Getsub[%d] not supported\n", id); return -1;
2025 /*----------------------------------------------------------------------*/
2026 /* counters */
2028 /* suif_counter_init_all() - clear all counters to 0 */
2030 void suif_counter_init_all(void)
2032 memset((void *) _mylocks, 0, sizeof(_mylocks));
2037 /*----------------------------------------------------------------------*/
2038 /* suif_counter_init_range() - zero counters from 0 to "id" for all procs */
2040 void suif_counter_init_range(int id)
2042 int i, j;
2044 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2045 id = DEFAULT_MAXCOUNTER-1;
2047 for (i = 0; i < DEFAULT_MAXPROC; i++) {
2048 for (j = 0; j <= id; j++) {
2049 _mylocks[i][j] = 0;
2054 void cinitr_(int *id)
2056 suif_counter_init_range(*id);
2061 /*----------------------------------------------------------------------*/
2062 /* suif_counter_init() - set "id" counter for processor "proc" to 0 */
2064 void suif_counter_init(int proc, int id)
2066 if ((proc < 0) || (proc >= DEFAULT_MAXPROC))
2067 fprintf(stderr, "Counter: proc %d out of range \n", proc);
2068 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2069 fprintf(stderr, "Counter: id %d out of range \n", id);
2071 _mylocks[proc][id] = 0;
2075 /*----------------------------------------------------------------------*/
2076 /* suif_counter_incr() - increment "id" counter for processor "proc" by 1 */
2078 void suif_counter_incr(int proc, int id)
2080 if ((proc < 0) || (proc >= DEFAULT_MAXPROC))
2081 fprintf(stderr, "Counter: proc %d out of range \n", proc);
2082 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2083 fprintf(stderr, "Counter: id %d out of range \n", id);
2085 MEMORY_BARRIER
2087 _mylocks[proc][id]++;
2089 MEMORY_BARRIER
2092 void cincr_(int *proc, int *id)
2094 suif_counter_incr(*proc, *id);
2098 /*----------------------------------------------------------------------*/
2099 /* suif_counter_set() - set "id" counter for processor "proc" to "val" */
2101 void suif_counter_set(int proc, int id, int val)
2103 if ((proc < 0) || (proc >= DEFAULT_MAXPROC))
2104 fprintf(stderr, "Counter: proc %d out of range \n", proc);
2105 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2106 fprintf(stderr, "Counter: id %d out of range \n", id);
2108 _mylocks[proc][id] = val;
2110 MEMORY_BARRIER
2114 void cset_(int *proc, int *id, int *val)
2116 suif_counter_set(*proc, *id, *val);
2120 /*----------------------------------------------------------------------*/
2121 /* suif_counter_set_range() - set "0..id-1" counter for processors "0..proc-1" to "val" */
2123 void suif_counter_set_range(int proc, int id, int val)
2125 int i, j;
2127 if ((proc < 0) || (proc >= DEFAULT_MAXPROC))
2128 fprintf(stderr, "Counter: proc %d out of range \n", proc);
2129 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2130 fprintf(stderr, "Counter: id %d out of range \n", id);
2132 for (i = 0; i < proc; i++) {
2133 for (j = 0; j < id; j++) {
2134 _mylocks[i][j] = val;
2140 void csetr_(int *proc, int *id, int *val)
2142 suif_counter_set_range(*proc, *id, *val);
2146 /*----------------------------------------------------------------------*/
2147 /* suif_counter_wait() - block until counter achieves value "val" or
2148 greater */
2150 void suif_counter_wait(int proc, int id, int val)
2152 if ((proc < 0) || (proc >= DEFAULT_MAXPROC))
2153 fprintf(stderr, "Counter: proc %d out of range \n", proc);
2154 if ((id < 0) || (id >= DEFAULT_MAXCOUNTER))
2155 fprintf(stderr, "Counter: id %d out of range \n", id);
2157 #if defined(SEQUENTIAL)
2158 fprintf(stderr, "Counters not supported in SEQUENTIAL mode \n");
2159 return;
2160 #endif
2162 while (_mylocks[proc][id] < val) ; /* spin */
2164 MEMORY_BARRIER
2168 void cwait_(int *proc, int *id, int *val)
2170 suif_counter_wait(*proc, *id, *val);
2174 /*----------------------------------------------------------------------*/
2175 /* suif_speculate_begin() */
2177 void suif_speculate_begin()
2182 void sbegin_()
2187 void suif_par_begin()
2192 void pbegin_()
2196 /*----------------------------------------------------------------------*/
2197 /* suif_infinite_loop() */
2199 void suif_infinite_loop()
2201 while (1);
2205 /*----------------------------------------------------------------------*/
2206 /* suif_speculate_commit() */
2208 void suif_speculate_commit()
2213 void commit_()
2218 void suif_par_commit()
2223 void pcommi_()
2228 /*----------------------------------------------------------------------*/
2229 /* suif_speculate_terminate() */
2231 void suif_speculate_terminate()
2236 void termin_()
2241 void suif_par_terminate()
2246 void ptermi_()
2251 /* dummy routine for simulator */
2252 void suif_start_packing(task_f f, char *name) { }
2254 /*----------------------------------------------------------------------*/
2255 /* timers */
2257 /* suif_clock() - return user time in seconds */
2259 static SUIF_HI_RES_TIMER_DATA_TYPE hi_res_start_time;
2260 static double low_res_start_time;
2262 double suif_clock(void)
2264 double t;
2266 SECONDCLOCK(t)
2267 return t;
2270 double suif_hi_res_clock(void)
2272 double t;
2274 SUIF_HI_RES_CLOCK(t);
2275 return t;
2278 void suif_start_timer(void)
2280 SUIF_START_TIMER(hi_res_start_time, low_res_start_time);
2283 double suif_end_timer(void)
2285 double result;
2286 SUIF_END_TIMER(result, hi_res_start_time, low_res_start_time);
2287 return result;
2291 /*----------------------------------------------------------------------*/
2292 /* reductions & scans */
2294 #if defined(SEQUENTIAL) && defined(STATS)
2295 #if defined(ALL_STATS)
2296 #define statistics() \
2297 { num_reductions++; stat_reductions_table[current_task_num]++; }
2298 #else
2299 #define statistics() { num_reductions++; }
2300 #endif
2301 #else
2302 #define statistics()
2303 #endif
2305 /* reduce routines */
2307 REDUCTION_ROUTINES(signed char, signed_char, sc)
2308 REDUCTION_ROUTINES(unsigned char, unsigned_char, uc)
2309 REDUCTION_ROUTINES(short, short, ss)
2310 REDUCTION_ROUTINES(unsigned short, unsigned_short, us)
2311 REDUCTION_ROUTINES(int, int, si)
2312 REDUCTION_ROUTINES(unsigned int, unsigned_int, ui)
2313 REDUCTION_ROUTINES(long, long, sl)
2314 REDUCTION_ROUTINES(unsigned long, unsigned_long, ul)
2315 REDUCTION_ROUTINES(float, float, f)
2316 REDUCTION_ROUTINES(double, double, d)
2317 REDUCTION_ROUTINES(long double, long_double, ld)
2319 /* Initializations */
2320 REDUCTION_INITS(signed char, signed_char, sc, 0, 1, SCHAR_MIN,
2321 SCHAR_MAX)
2322 REDUCTION_INITS(unsigned char, unsigned_char, uc, 0, 1, 0,
2323 UCHAR_MAX)
2324 REDUCTION_INITS(short, short, ss, 0, 1, SHRT_MIN,
2325 SHRT_MAX)
2326 REDUCTION_INITS(unsigned short, unsigned_short, us, 0, 1, 0,
2327 USHRT_MAX)
2328 REDUCTION_INITS(int, int, si, 0, 1, INT_MIN,
2329 INT_MAX)
2330 REDUCTION_INITS(unsigned int, unsigned_int, ui, 0u, 1u, 0u,
2331 UINT_MAX)
2332 REDUCTION_INITS(long, long, sl, 0l, 1l, LONG_MIN,
2333 LONG_MAX)
2334 REDUCTION_INITS(unsigned long, unsigned_long, ul, 0ul, 1ul, 0ul,
2335 ULONG_MAX)
2336 REDUCTION_INITS(float, float, f, 0.0f, 1.0f, -FLT_MAX,
2337 FLT_MAX)
2338 REDUCTION_INITS(double, double, d, 0.0, 1.0, -DBL_MAX,
2339 DBL_MAX)
2340 REDUCTION_INITS(long double, long_double, ld, 0.0l, 1.0l, -LDBL_MAX,
2341 LDBL_MAX)
2343 INITGEN(signed char, signed_char, sc)
2344 INITGEN(unsigned char, unsigned_char, uc)
2345 INITGEN(short, short, ss)
2346 INITGEN(unsigned short, unsigned_short, us)
2347 INITGEN(int, int, si)
2348 INITGEN(unsigned int, unsigned_int, ui)
2349 INITGEN(long, long, sl)
2350 INITGEN(unsigned long, unsigned_long, ul)
2351 INITGEN(float, float, f)
2352 INITGEN(double, double, d)
2353 INITGEN(long double, long_double, ld)
2355 /* eof */