target-supports.exp (check_effective_target_mips_soft_float): Return true for MIPS16...
[official-gcc.git] / gcc / modulo-sched.c
blob282cb80a05a30fc058e00ea823e1bc345a83de58
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate;
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
247 static char tmp[80];
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
261 static struct sched_info sms_sched_info =
263 NULL,
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 sms_print_insn,
269 NULL,
270 compute_jump_reg_dependencies,
271 NULL, NULL,
272 NULL, NULL,
273 0, 0, 0,
275 NULL, NULL, NULL, NULL, NULL,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
284 static rtx
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
287 #ifdef HAVE_doloop_end
288 rtx reg, condition, insn, first_insn_not_to_check;
290 if (!JUMP_P (tail))
291 return NULL_RTX;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition = doloop_condition_get (tail);
295 if (! condition)
296 return NULL_RTX;
298 if (REG_P (XEXP (condition, 0)))
299 reg = XEXP (condition, 0);
300 else if (GET_CODE (XEXP (condition, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition, 0), 0)))
302 reg = XEXP (XEXP (condition, 0), 0);
303 else
304 gcc_unreachable ();
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
311 : PREV_INSN (tail));
313 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
314 if (reg_mentioned_p (reg, insn))
316 if (dump_file)
318 fprintf (dump_file, "SMS count_reg found ");
319 print_rtl_single (dump_file, reg);
320 fprintf (dump_file, " outside control in insn:\n");
321 print_rtl_single (dump_file, insn);
324 return NULL_RTX;
327 return reg;
328 #else
329 return NULL_RTX;
330 #endif
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
337 static rtx
338 const_iteration_count (rtx count_reg, basic_block pre_header,
339 HOST_WIDEST_INT * count)
341 rtx insn;
342 rtx head, tail;
344 if (! pre_header)
345 return NULL_RTX;
347 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
349 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
350 if (INSN_P (insn) && single_set (insn) &&
351 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
353 rtx pat = single_set (insn);
355 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
357 *count = INTVAL (SET_SRC (pat));
358 return insn;
361 return NULL_RTX;
364 return NULL_RTX;
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
370 static int
371 res_MII (ddg_ptr g)
373 if (targetm.sched.sms_res_mii)
374 return targetm.sched.sms_res_mii (g);
376 return (g->num_nodes / issue_rate);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
386 static void
387 set_node_sched_params (ddg_ptr g)
389 int i;
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params = (node_sched_params_ptr)
394 xcalloc (g->num_nodes,
395 sizeof (struct node_sched_params));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i = 0; i < g->num_nodes; i++)
401 /* Watch out for aliasing problems? */
402 node_sched_params[i].asap = g->nodes[i].aux.count;
403 g->nodes[i].aux.info = &node_sched_params[i];
407 static void
408 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
410 int i;
412 if (! file)
413 return;
414 for (i = 0; i < num_nodes; i++)
416 node_sched_params_ptr nsp = &node_sched_params[i];
417 rtx reg_move = nsp->first_reg_move;
418 int j;
420 fprintf (file, "Node = %d; INSN = %d\n", i,
421 (INSN_UID (g->nodes[i].insn)));
422 fprintf (file, " asap = %d:\n", nsp->asap);
423 fprintf (file, " time = %d:\n", nsp->time);
424 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
425 for (j = 0; j < nsp->nreg_moves; j++)
427 fprintf (file, " reg_move = ");
428 print_rtl_single (file, reg_move);
429 reg_move = PREV_INSN (reg_move);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
443 ii { 1 if not.
445 static struct undo_replace_buff_elem *
446 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
448 ddg_ptr g = ps->g;
449 int ii = ps->ii;
450 int i;
451 struct undo_replace_buff_elem *reg_move_replaces = NULL;
453 for (i = 0; i < g->num_nodes; i++)
455 ddg_node_ptr u = &g->nodes[i];
456 ddg_edge_ptr e;
457 int nreg_moves = 0, i_reg_move;
458 sbitmap *uses_of_defs;
459 rtx last_reg_move;
460 rtx prev_reg, old_reg;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e = u->out; e; e = e->next_out)
465 if (e->type == TRUE_DEP && e->dest != e->src)
467 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
469 if (e->distance == 1)
470 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
475 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
476 nreg_moves4e--;
478 nreg_moves = MAX (nreg_moves, nreg_moves4e);
481 if (nreg_moves == 0)
482 continue;
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
487 register. */
488 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
489 sbitmap_vector_zero (uses_of_defs, nreg_moves);
490 for (e = u->out; e; e = e->next_out)
491 if (e->type == TRUE_DEP && e->dest != e->src)
493 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
495 if (e->distance == 1)
496 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 last_reg_move = u->insn;
511 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
513 unsigned int i_use = 0;
514 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
515 rtx reg_move = gen_move_insn (new_reg, prev_reg);
516 sbitmap_iterator sbi;
518 add_insn_before (reg_move, last_reg_move, NULL);
519 last_reg_move = reg_move;
521 if (!SCHED_FIRST_REG_MOVE (u))
522 SCHED_FIRST_REG_MOVE (u) = reg_move;
524 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
526 struct undo_replace_buff_elem *rep;
528 rep = (struct undo_replace_buff_elem *)
529 xcalloc (1, sizeof (struct undo_replace_buff_elem));
530 rep->insn = g->nodes[i_use].insn;
531 rep->orig_reg = old_reg;
532 rep->new_reg = new_reg;
534 if (! reg_move_replaces)
535 reg_move_replaces = rep;
536 else
538 rep->next = reg_move_replaces;
539 reg_move_replaces = rep;
542 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
543 if (rescan)
544 df_insn_rescan (g->nodes[i_use].insn);
547 prev_reg = new_reg;
549 sbitmap_vector_free (uses_of_defs);
551 return reg_move_replaces;
554 /* Free memory allocated for the undo buffer. */
555 static void
556 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
559 while (reg_move_replaces)
561 struct undo_replace_buff_elem *rep = reg_move_replaces;
563 reg_move_replaces = reg_move_replaces->next;
564 free (rep);
568 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
569 of SCHED_ROW and SCHED_STAGE. */
570 static void
571 normalize_sched_times (partial_schedule_ptr ps)
573 int row;
574 int amount = PS_MIN_CYCLE (ps);
575 int ii = ps->ii;
576 ps_insn_ptr crr_insn;
578 for (row = 0; row < ii; row++)
579 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
581 ddg_node_ptr u = crr_insn->node;
582 int normalized_time = SCHED_TIME (u) - amount;
584 if (dump_file)
585 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
586 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
587 (u), ps->min_cycle);
588 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
589 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
590 SCHED_TIME (u) = normalized_time;
591 SCHED_ROW (u) = normalized_time % ii;
592 SCHED_STAGE (u) = normalized_time / ii;
596 /* Set SCHED_COLUMN of each node according to its position in PS. */
597 static void
598 set_columns_for_ps (partial_schedule_ptr ps)
600 int row;
602 for (row = 0; row < ps->ii; row++)
604 ps_insn_ptr cur_insn = ps->rows[row];
605 int column = 0;
607 for (; cur_insn; cur_insn = cur_insn->next_in_row)
608 SCHED_COLUMN (cur_insn->node) = column++;
612 /* Permute the insns according to their order in PS, from row 0 to
613 row ii-1, and position them right before LAST. This schedules
614 the insns of the loop kernel. */
615 static void
616 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
618 int ii = ps->ii;
619 int row;
620 ps_insn_ptr ps_ij;
622 for (row = 0; row < ii ; row++)
623 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
624 if (PREV_INSN (last) != ps_ij->node->insn)
625 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
626 PREV_INSN (last));
629 static void
630 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
631 int to_stage, int for_prolog, rtx count_reg)
633 int row;
634 ps_insn_ptr ps_ij;
636 for (row = 0; row < ps->ii; row++)
637 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
639 ddg_node_ptr u_node = ps_ij->node;
640 int j, i_reg_moves;
641 rtx reg_move = NULL_RTX;
643 /* Do not duplicate any insn which refers to count_reg as it
644 belongs to the control part.
645 TODO: This should be done by analyzing the control part of
646 the loop. */
647 if (reg_mentioned_p (count_reg, u_node->insn))
648 continue;
650 if (for_prolog)
652 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
653 number of reg_moves starting with the second occurrence of
654 u_node, which is generated if its SCHED_STAGE <= to_stage. */
655 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
656 i_reg_moves = MAX (i_reg_moves, 0);
657 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
659 /* The reg_moves start from the *first* reg_move backwards. */
660 if (i_reg_moves)
662 reg_move = SCHED_FIRST_REG_MOVE (u_node);
663 for (j = 1; j < i_reg_moves; j++)
664 reg_move = PREV_INSN (reg_move);
667 else /* It's for the epilog. */
669 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
670 starting to decrease one stage after u_node no longer occurs;
671 that is, generate all reg_moves until
672 SCHED_STAGE (u_node) == from_stage - 1. */
673 i_reg_moves = SCHED_NREG_MOVES (u_node)
674 - (from_stage - SCHED_STAGE (u_node) - 1);
675 i_reg_moves = MAX (i_reg_moves, 0);
676 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
678 /* The reg_moves start from the *last* reg_move forwards. */
679 if (i_reg_moves)
681 reg_move = SCHED_FIRST_REG_MOVE (u_node);
682 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
683 reg_move = PREV_INSN (reg_move);
687 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
688 emit_insn (copy_rtx (PATTERN (reg_move)));
689 if (SCHED_STAGE (u_node) >= from_stage
690 && SCHED_STAGE (u_node) <= to_stage)
691 duplicate_insn_chain (u_node->first_note, u_node->insn);
696 /* Generate the instructions (including reg_moves) for prolog & epilog. */
697 static void
698 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
699 rtx count_reg, rtx count_init)
701 int i;
702 int last_stage = PS_STAGE_COUNT (ps) - 1;
703 edge e;
705 /* Generate the prolog, inserting its insns on the loop-entry edge. */
706 start_sequence ();
708 if (!count_init)
710 /* Generate instructions at the beginning of the prolog to
711 adjust the loop count by STAGE_COUNT. If loop count is constant
712 (count_init), this constant is adjusted by STAGE_COUNT in
713 generate_prolog_epilog function. */
714 rtx sub_reg = NULL_RTX;
716 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
717 count_reg, GEN_INT (last_stage),
718 count_reg, 1, OPTAB_DIRECT);
719 gcc_assert (REG_P (sub_reg));
720 if (REGNO (sub_reg) != REGNO (count_reg))
721 emit_move_insn (count_reg, sub_reg);
724 for (i = 0; i < last_stage; i++)
725 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
727 /* Put the prolog on the entry edge. */
728 e = loop_preheader_edge (loop);
729 split_edge_and_insert (e, get_insns ());
731 end_sequence ();
733 /* Generate the epilog, inserting its insns on the loop-exit edge. */
734 start_sequence ();
736 for (i = 0; i < last_stage; i++)
737 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
739 /* Put the epilogue on the exit edge. */
740 gcc_assert (single_exit (loop));
741 e = single_exit (loop);
742 split_edge_and_insert (e, get_insns ());
743 end_sequence ();
746 /* Return true if all the BBs of the loop are empty except the
747 loop header. */
748 static bool
749 loop_single_full_bb_p (struct loop *loop)
751 unsigned i;
752 basic_block *bbs = get_loop_body (loop);
754 for (i = 0; i < loop->num_nodes ; i++)
756 rtx head, tail;
757 bool empty_bb = true;
759 if (bbs[i] == loop->header)
760 continue;
762 /* Make sure that basic blocks other than the header
763 have only notes labels or jumps. */
764 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
765 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
767 if (NOTE_P (head) || LABEL_P (head)
768 || (INSN_P (head) && JUMP_P (head)))
769 continue;
770 empty_bb = false;
771 break;
774 if (! empty_bb)
776 free (bbs);
777 return false;
780 free (bbs);
781 return true;
784 /* A simple loop from SMS point of view; it is a loop that is composed of
785 either a single basic block or two BBs - a header and a latch. */
786 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
787 && (EDGE_COUNT (loop->latch->preds) == 1) \
788 && (EDGE_COUNT (loop->latch->succs) == 1))
790 /* Return true if the loop is in its canonical form and false if not.
791 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
792 static bool
793 loop_canon_p (struct loop *loop)
796 if (loop->inner || !loop_outer (loop))
797 return false;
799 if (!single_exit (loop))
801 if (dump_file)
803 rtx insn = BB_END (loop->header);
805 fprintf (dump_file, "SMS loop many exits ");
806 fprintf (dump_file, " %s %d (file, line)\n",
807 insn_file (insn), insn_line (insn));
809 return false;
812 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
814 if (dump_file)
816 rtx insn = BB_END (loop->header);
818 fprintf (dump_file, "SMS loop many BBs. ");
819 fprintf (dump_file, " %s %d (file, line)\n",
820 insn_file (insn), insn_line (insn));
822 return false;
825 return true;
828 /* If there are more than one entry for the loop,
829 make it one by splitting the first entry edge and
830 redirecting the others to the new BB. */
831 static void
832 canon_loop (struct loop *loop)
834 edge e;
835 edge_iterator i;
837 /* Avoid annoying special cases of edges going to exit
838 block. */
839 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
840 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
841 split_edge (e);
843 if (loop->latch == loop->header
844 || EDGE_COUNT (loop->latch->succs) > 1)
846 FOR_EACH_EDGE (e, i, loop->header->preds)
847 if (e->src == loop->latch)
848 break;
849 split_edge (e);
853 /* Probability in % that the sms-ed loop rolls enough so that optimized
854 version may be entered. Just a guess. */
855 #define PROB_SMS_ENOUGH_ITERATIONS 80
857 /* Used to calculate the upper bound of ii. */
858 #define MAXII_FACTOR 2
860 /* Main entry point, perform SMS scheduling on the loops of the function
861 that consist of single basic blocks. */
862 static void
863 sms_schedule (void)
865 rtx insn;
866 ddg_ptr *g_arr, g;
867 int * node_order;
868 int maxii, max_asap;
869 loop_iterator li;
870 partial_schedule_ptr ps;
871 basic_block bb = NULL;
872 struct loop *loop;
873 basic_block condition_bb = NULL;
874 edge latch_edge;
875 gcov_type trip_count = 0;
877 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
878 | LOOPS_HAVE_RECORDED_EXITS);
879 if (number_of_loops () <= 1)
881 loop_optimizer_finalize ();
882 return; /* There are no loops to schedule. */
885 /* Initialize issue_rate. */
886 if (targetm.sched.issue_rate)
888 int temp = reload_completed;
890 reload_completed = 1;
891 issue_rate = targetm.sched.issue_rate ();
892 reload_completed = temp;
894 else
895 issue_rate = 1;
897 /* Initialize the scheduler. */
898 current_sched_info = &sms_sched_info;
900 /* Init Data Flow analysis, to be used in interloop dep calculation. */
901 df_set_flags (DF_LR_RUN_DCE);
902 df_rd_add_problem ();
903 df_note_add_problem ();
904 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
905 df_analyze ();
906 regstat_compute_calls_crossed ();
907 sched_init ();
909 /* Allocate memory to hold the DDG array one entry for each loop.
910 We use loop->num as index into this array. */
911 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
913 /* Build DDGs for all the relevant loops and hold them in G_ARR
914 indexed by the loop index. */
915 FOR_EACH_LOOP (li, loop, 0)
917 rtx head, tail;
918 rtx count_reg;
920 /* For debugging. */
921 if (dbg_cnt (sms_sched_loop) == false)
923 if (dump_file)
924 fprintf (dump_file, "SMS reached max limit... \n");
926 break;
929 if (! loop_canon_p (loop))
930 continue;
932 if (! loop_single_full_bb_p (loop))
933 continue;
935 bb = loop->header;
937 get_ebb_head_tail (bb, bb, &head, &tail);
938 latch_edge = loop_latch_edge (loop);
939 gcc_assert (single_exit (loop));
940 if (single_exit (loop)->count)
941 trip_count = latch_edge->count / single_exit (loop)->count;
943 /* Perfrom SMS only on loops that their average count is above threshold. */
945 if ( latch_edge->count
946 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
948 if (dump_file)
950 fprintf (dump_file, " %s %d (file, line)\n",
951 insn_file (tail), insn_line (tail));
952 fprintf (dump_file, "SMS single-bb-loop\n");
953 if (profile_info && flag_branch_probabilities)
955 fprintf (dump_file, "SMS loop-count ");
956 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
957 (HOST_WIDEST_INT) bb->count);
958 fprintf (dump_file, "\n");
959 fprintf (dump_file, "SMS trip-count ");
960 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
961 (HOST_WIDEST_INT) trip_count);
962 fprintf (dump_file, "\n");
963 fprintf (dump_file, "SMS profile-sum-max ");
964 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
965 (HOST_WIDEST_INT) profile_info->sum_max);
966 fprintf (dump_file, "\n");
969 continue;
972 /* Make sure this is a doloop. */
973 if ( !(count_reg = doloop_register_get (head, tail)))
974 continue;
976 /* Don't handle BBs with calls or barriers, or !single_set insns,
977 or auto-increment insns (to avoid creating invalid reg-moves
978 for the auto-increment insns).
979 ??? Should handle auto-increment insns.
980 ??? Should handle insns defining subregs. */
981 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
983 rtx set;
985 if (CALL_P (insn)
986 || BARRIER_P (insn)
987 || (INSN_P (insn) && !JUMP_P (insn)
988 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
989 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
990 || (INSN_P (insn) && (set = single_set (insn))
991 && GET_CODE (SET_DEST (set)) == SUBREG))
992 break;
995 if (insn != NEXT_INSN (tail))
997 if (dump_file)
999 if (CALL_P (insn))
1000 fprintf (dump_file, "SMS loop-with-call\n");
1001 else if (BARRIER_P (insn))
1002 fprintf (dump_file, "SMS loop-with-barrier\n");
1003 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1004 fprintf (dump_file, "SMS reg inc\n");
1005 else if ((INSN_P (insn) && !JUMP_P (insn)
1006 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1007 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1008 else
1009 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1010 print_rtl_single (dump_file, insn);
1013 continue;
1016 if (! (g = create_ddg (bb, 0)))
1018 if (dump_file)
1019 fprintf (dump_file, "SMS doloop\n");
1020 continue;
1023 g_arr[loop->num] = g;
1026 /* We don't want to perform SMS on new loops - created by versioning. */
1027 FOR_EACH_LOOP (li, loop, 0)
1029 rtx head, tail;
1030 rtx count_reg, count_init;
1031 int mii, rec_mii;
1032 unsigned stage_count = 0;
1033 HOST_WIDEST_INT loop_count = 0;
1035 if (! (g = g_arr[loop->num]))
1036 continue;
1038 if (dump_file)
1039 print_ddg (dump_file, g);
1041 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1043 latch_edge = loop_latch_edge (loop);
1044 gcc_assert (single_exit (loop));
1045 if (single_exit (loop)->count)
1046 trip_count = latch_edge->count / single_exit (loop)->count;
1048 if (dump_file)
1050 fprintf (dump_file, " %s %d (file, line)\n",
1051 insn_file (tail), insn_line (tail));
1052 fprintf (dump_file, "SMS single-bb-loop\n");
1053 if (profile_info && flag_branch_probabilities)
1055 fprintf (dump_file, "SMS loop-count ");
1056 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1057 (HOST_WIDEST_INT) bb->count);
1058 fprintf (dump_file, "\n");
1059 fprintf (dump_file, "SMS profile-sum-max ");
1060 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1061 (HOST_WIDEST_INT) profile_info->sum_max);
1062 fprintf (dump_file, "\n");
1064 fprintf (dump_file, "SMS doloop\n");
1065 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1066 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1067 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1071 /* In case of th loop have doloop register it gets special
1072 handling. */
1073 count_init = NULL_RTX;
1074 if ((count_reg = doloop_register_get (head, tail)))
1076 basic_block pre_header;
1078 pre_header = loop_preheader_edge (loop)->src;
1079 count_init = const_iteration_count (count_reg, pre_header,
1080 &loop_count);
1082 gcc_assert (count_reg);
1084 if (dump_file && count_init)
1086 fprintf (dump_file, "SMS const-doloop ");
1087 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1088 loop_count);
1089 fprintf (dump_file, "\n");
1092 node_order = XNEWVEC (int, g->num_nodes);
1094 mii = 1; /* Need to pass some estimate of mii. */
1095 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1096 mii = MAX (res_MII (g), rec_mii);
1097 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1099 if (dump_file)
1100 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1101 rec_mii, mii, maxii);
1103 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1104 ASAP. */
1105 set_node_sched_params (g);
1107 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1109 if (ps)
1110 stage_count = PS_STAGE_COUNT (ps);
1112 /* Stage count of 1 means that there is no interleaving between
1113 iterations, let the scheduling passes do the job. */
1114 if (stage_count < 1
1115 || (count_init && (loop_count <= stage_count))
1116 || (flag_branch_probabilities && (trip_count <= stage_count)))
1118 if (dump_file)
1120 fprintf (dump_file, "SMS failed... \n");
1121 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1122 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1123 fprintf (dump_file, ", trip-count=");
1124 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1125 fprintf (dump_file, ")\n");
1127 continue;
1129 else
1131 struct undo_replace_buff_elem *reg_move_replaces;
1133 if (dump_file)
1135 fprintf (dump_file,
1136 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1137 stage_count);
1138 print_partial_schedule (ps, dump_file);
1139 fprintf (dump_file,
1140 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1141 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1144 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1145 the closing_branch was scheduled and should appear in the last (ii-1)
1146 row. Otherwise, we are free to schedule the branch, and we let nodes
1147 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1148 row; this should reduce stage_count to minimum.
1149 TODO: Revisit the issue of scheduling the insns of the
1150 control part relative to the branch when the control part
1151 has more than one insn. */
1152 normalize_sched_times (ps);
1153 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1154 set_columns_for_ps (ps);
1156 canon_loop (loop);
1158 /* case the BCT count is not known , Do loop-versioning */
1159 if (count_reg && ! count_init)
1161 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1162 GEN_INT(stage_count));
1163 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1164 * REG_BR_PROB_BASE) / 100;
1166 loop_version (loop, comp_rtx, &condition_bb,
1167 prob, prob, REG_BR_PROB_BASE - prob,
1168 true);
1171 /* Set new iteration count of loop kernel. */
1172 if (count_reg && count_init)
1173 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1174 - stage_count + 1);
1176 /* Now apply the scheduled kernel to the RTL of the loop. */
1177 permute_partial_schedule (ps, g->closing_branch->first_note);
1179 /* Mark this loop as software pipelined so the later
1180 scheduling passes doesn't touch it. */
1181 if (! flag_resched_modulo_sched)
1182 g->bb->flags |= BB_DISABLE_SCHEDULE;
1183 /* The life-info is not valid any more. */
1184 df_set_bb_dirty (g->bb);
1186 reg_move_replaces = generate_reg_moves (ps, true);
1187 if (dump_file)
1188 print_node_sched_params (dump_file, g->num_nodes, g);
1189 /* Generate prolog and epilog. */
1190 generate_prolog_epilog (ps, loop, count_reg, count_init);
1192 free_undo_replace_buff (reg_move_replaces);
1195 free_partial_schedule (ps);
1196 free (node_sched_params);
1197 free (node_order);
1198 free_ddg (g);
1201 regstat_free_calls_crossed ();
1202 free (g_arr);
1204 /* Release scheduler data, needed until now because of DFA. */
1205 sched_finish ();
1206 loop_optimizer_finalize ();
1209 /* The SMS scheduling algorithm itself
1210 -----------------------------------
1211 Input: 'O' an ordered list of insns of a loop.
1212 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1214 'Q' is the empty Set
1215 'PS' is the partial schedule; it holds the currently scheduled nodes with
1216 their cycle/slot.
1217 'PSP' previously scheduled predecessors.
1218 'PSS' previously scheduled successors.
1219 't(u)' the cycle where u is scheduled.
1220 'l(u)' is the latency of u.
1221 'd(v,u)' is the dependence distance from v to u.
1222 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1223 the node ordering phase.
1224 'check_hardware_resources_conflicts(u, PS, c)'
1225 run a trace around cycle/slot through DFA model
1226 to check resource conflicts involving instruction u
1227 at cycle c given the partial schedule PS.
1228 'add_to_partial_schedule_at_time(u, PS, c)'
1229 Add the node/instruction u to the partial schedule
1230 PS at time c.
1231 'calculate_register_pressure(PS)'
1232 Given a schedule of instructions, calculate the register
1233 pressure it implies. One implementation could be the
1234 maximum number of overlapping live ranges.
1235 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1236 registers available in the hardware.
1238 1. II = MII.
1239 2. PS = empty list
1240 3. for each node u in O in pre-computed order
1241 4. if (PSP(u) != Q && PSS(u) == Q) then
1242 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1243 6. start = Early_start; end = Early_start + II - 1; step = 1
1244 11. else if (PSP(u) == Q && PSS(u) != Q) then
1245 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1246 13. start = Late_start; end = Late_start - II + 1; step = -1
1247 14. else if (PSP(u) != Q && PSS(u) != Q) then
1248 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1249 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1250 17. start = Early_start;
1251 18. end = min(Early_start + II - 1 , Late_start);
1252 19. step = 1
1253 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1254 21. start = ASAP(u); end = start + II - 1; step = 1
1255 22. endif
1257 23. success = false
1258 24. for (c = start ; c != end ; c += step)
1259 25. if check_hardware_resources_conflicts(u, PS, c) then
1260 26. add_to_partial_schedule_at_time(u, PS, c)
1261 27. success = true
1262 28. break
1263 29. endif
1264 30. endfor
1265 31. if (success == false) then
1266 32. II = II + 1
1267 33. if (II > maxII) then
1268 34. finish - failed to schedule
1269 35. endif
1270 36. goto 2.
1271 37. endif
1272 38. endfor
1273 39. if (calculate_register_pressure(PS) > maxRP) then
1274 40. goto 32.
1275 41. endif
1276 42. compute epilogue & prologue
1277 43. finish - succeeded to schedule
1280 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1281 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1282 set to 0 to save compile time. */
1283 #define DFA_HISTORY SMS_DFA_HISTORY
1285 /* A threshold for the number of repeated unsuccessful attempts to insert
1286 an empty row, before we flush the partial schedule and start over. */
1287 #define MAX_SPLIT_NUM 10
1288 /* Given the partial schedule PS, this function calculates and returns the
1289 cycles in which we can schedule the node with the given index I.
1290 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1291 noticed that there are several cases in which we fail to SMS the loop
1292 because the sched window of a node is empty due to tight data-deps. In
1293 such cases we want to unschedule some of the predecessors/successors
1294 until we get non-empty scheduling window. It returns -1 if the
1295 scheduling window is empty and zero otherwise. */
1297 static int
1298 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1299 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1301 int start, step, end;
1302 ddg_edge_ptr e;
1303 int u = nodes_order [i];
1304 ddg_node_ptr u_node = &ps->g->nodes[u];
1305 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1306 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1307 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1308 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1309 int psp_not_empty;
1310 int pss_not_empty;
1312 /* 1. compute sched window for u (start, end, step). */
1313 sbitmap_zero (psp);
1314 sbitmap_zero (pss);
1315 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1316 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1318 if (psp_not_empty && !pss_not_empty)
1320 int early_start = INT_MIN;
1322 end = INT_MAX;
1323 for (e = u_node->in; e != 0; e = e->next_in)
1325 ddg_node_ptr v_node = e->src;
1327 if (dump_file)
1329 fprintf (dump_file, "\nProcessing edge: ");
1330 print_ddg_edge (dump_file, e);
1331 fprintf (dump_file,
1332 "\nScheduling %d (%d) in psp_not_empty,"
1333 " checking p %d (%d): ", u_node->cuid,
1334 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1335 (v_node->insn));
1338 if (TEST_BIT (sched_nodes, v_node->cuid))
1340 int p_st = SCHED_TIME (v_node);
1342 early_start =
1343 MAX (early_start, p_st + e->latency - (e->distance * ii));
1345 if (dump_file)
1346 fprintf (dump_file, "pred st = %d; early_start = %d; ", p_st,
1347 early_start);
1349 if (e->data_type == MEM_DEP)
1350 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1352 else if (dump_file)
1353 fprintf (dump_file, "the node is not scheduled\n");
1355 start = early_start;
1356 end = MIN (end, early_start + ii);
1357 step = 1;
1359 if (dump_file)
1360 fprintf (dump_file,
1361 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1362 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1365 else if (!psp_not_empty && pss_not_empty)
1367 int late_start = INT_MAX;
1369 end = INT_MIN;
1370 for (e = u_node->out; e != 0; e = e->next_out)
1372 ddg_node_ptr v_node = e->dest;
1374 if (dump_file)
1376 fprintf (dump_file, "\nProcessing edge:");
1377 print_ddg_edge (dump_file, e);
1378 fprintf (dump_file,
1379 "\nScheduling %d (%d) in pss_not_empty,"
1380 " checking s %d (%d): ", u_node->cuid,
1381 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1382 (v_node->insn));
1385 if (TEST_BIT (sched_nodes, v_node->cuid))
1387 int s_st = SCHED_TIME (v_node);
1389 late_start = MIN (late_start,
1390 s_st - e->latency + (e->distance * ii));
1392 if (dump_file)
1393 fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
1394 late_start);
1396 if (e->data_type == MEM_DEP)
1397 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1398 if (dump_file)
1399 fprintf (dump_file, "end = %d\n", end);
1402 else if (dump_file)
1403 fprintf (dump_file, "the node is not scheduled\n");
1406 start = late_start;
1407 end = MAX (end, late_start - ii);
1408 step = -1;
1410 if (dump_file)
1411 fprintf (dump_file,
1412 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1413 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1417 else if (psp_not_empty && pss_not_empty)
1419 int early_start = INT_MIN;
1420 int late_start = INT_MAX;
1422 start = INT_MIN;
1423 end = INT_MAX;
1424 for (e = u_node->in; e != 0; e = e->next_in)
1426 ddg_node_ptr v_node = e->src;
1428 if (dump_file)
1430 fprintf (dump_file, "\nProcessing edge:");
1431 print_ddg_edge (dump_file, e);
1432 fprintf (dump_file,
1433 "\nScheduling %d (%d) in psp_pss_not_empty,"
1434 " checking p %d (%d): ", u_node->cuid, INSN_UID
1435 (u_node->insn), v_node->cuid, INSN_UID
1436 (v_node->insn));
1439 if (TEST_BIT (sched_nodes, v_node->cuid))
1441 int p_st = SCHED_TIME (v_node);
1443 early_start = MAX (early_start,
1444 p_st + e->latency
1445 - (e->distance * ii));
1447 if (dump_file)
1448 fprintf (dump_file, "pred st = %d; early_start = %d;", p_st,
1449 early_start);
1451 if (e->data_type == MEM_DEP)
1452 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1454 else if (dump_file)
1455 fprintf (dump_file, "the node is not scheduled\n");
1458 for (e = u_node->out; e != 0; e = e->next_out)
1460 ddg_node_ptr v_node = e->dest;
1462 if (dump_file)
1464 fprintf (dump_file, "\nProcessing edge:");
1465 print_ddg_edge (dump_file, e);
1466 fprintf (dump_file,
1467 "\nScheduling %d (%d) in psp_pss_not_empty,"
1468 " checking s %d (%d): ", u_node->cuid, INSN_UID
1469 (u_node->insn), v_node->cuid, INSN_UID
1470 (v_node->insn));
1473 if (TEST_BIT (sched_nodes, v_node->cuid))
1475 int s_st = SCHED_TIME (v_node);
1477 late_start = MIN (late_start,
1478 s_st - e->latency
1479 + (e->distance * ii));
1481 if (dump_file)
1482 fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
1483 late_start);
1485 if (e->data_type == MEM_DEP)
1486 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1488 else if (dump_file)
1489 fprintf (dump_file, "the node is not scheduled\n");
1492 start = MAX (start, early_start);
1493 end = MIN (end, MIN (early_start + ii, late_start + 1));
1494 step = 1;
1496 else /* psp is empty && pss is empty. */
1498 start = SCHED_ASAP (u_node);
1499 end = start + ii;
1500 step = 1;
1503 *start_p = start;
1504 *step_p = step;
1505 *end_p = end;
1506 sbitmap_free (psp);
1507 sbitmap_free (pss);
1509 if ((start >= end && step == 1) || (start <= end && step == -1))
1511 if (dump_file)
1512 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1513 start, end, step);
1514 return -1;
1517 return 0;
1520 /* This function implements the scheduling algorithm for SMS according to the
1521 above algorithm. */
1522 static partial_schedule_ptr
1523 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1525 int ii = mii;
1526 int i, c, success, num_splits = 0;
1527 int flush_and_start_over = true;
1528 int num_nodes = g->num_nodes;
1529 ddg_edge_ptr e;
1530 ps_insn_ptr psi;
1531 int start, end, step; /* Place together into one struct? */
1532 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1533 sbitmap must_precede = sbitmap_alloc (num_nodes);
1534 sbitmap must_follow = sbitmap_alloc (num_nodes);
1535 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1537 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1539 sbitmap_ones (tobe_scheduled);
1540 sbitmap_zero (sched_nodes);
1542 while (flush_and_start_over && (ii < maxii))
1545 if (dump_file)
1546 fprintf (dump_file, "Starting with ii=%d\n", ii);
1547 flush_and_start_over = false;
1548 sbitmap_zero (sched_nodes);
1550 for (i = 0; i < num_nodes; i++)
1552 int u = nodes_order[i];
1553 ddg_node_ptr u_node = &ps->g->nodes[u];
1554 rtx insn = u_node->insn;
1556 if (!INSN_P (insn))
1558 RESET_BIT (tobe_scheduled, u);
1559 continue;
1562 if (JUMP_P (insn)) /* Closing branch handled later. */
1564 RESET_BIT (tobe_scheduled, u);
1565 continue;
1568 if (TEST_BIT (sched_nodes, u))
1569 continue;
1571 /* Try to get non-empty scheduling window. */
1572 success = 0;
1573 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1574 &step, &end) == 0)
1576 if (dump_file)
1577 fprintf (dump_file, "\nTrying to schedule node %d \
1578 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1579 (g->nodes[u].insn)), start, end, step);
1580 /* Use must_follow & must_precede bitmaps to determine order
1581 of nodes within the cycle. */
1583 /* use must_follow & must_precede bitmaps to determine order
1584 of nodes within the cycle. */
1585 sbitmap_zero (must_precede);
1586 sbitmap_zero (must_follow);
1587 /* TODO: We can add an insn to the must_precede or must_follow
1588 bitmaps only if it has tight dependence to U and they
1589 both scheduled in the same row. The current check is less
1590 conservative and content with the fact that both U and the
1591 insn are scheduled in the same row. */
1592 for (e = u_node->in; e != 0; e = e->next_in)
1593 if (TEST_BIT (sched_nodes, e->src->cuid)
1594 && (SMODULO (SCHED_TIME (e->src), ii) ==
1595 SMODULO (start, ii)))
1596 SET_BIT (must_precede, e->src->cuid);
1598 for (e = u_node->out; e != 0; e = e->next_out)
1599 if (TEST_BIT (sched_nodes, e->dest->cuid)
1600 && (SMODULO (SCHED_TIME (e->dest), ii) ==
1601 SMODULO ((end - step), ii)))
1602 SET_BIT (must_follow, e->dest->cuid);
1604 gcc_assert ((step > 0 && start < end)
1605 || (step < 0 && start > end));
1607 for (c = start; c != end; c += step)
1609 verify_partial_schedule (ps, sched_nodes);
1611 psi = ps_add_node_check_conflicts (ps, u_node, c,
1612 must_precede,
1613 must_follow);
1615 if (psi)
1617 SCHED_TIME (u_node) = c;
1618 SET_BIT (sched_nodes, u);
1619 success = 1;
1620 num_splits = 0;
1621 if (dump_file)
1622 fprintf (dump_file, "Scheduled w/o split in %d\n", c);
1624 break;
1627 verify_partial_schedule (ps, sched_nodes);
1629 if (!success)
1631 int split_row;
1633 if (ii++ == maxii)
1634 break;
1636 if (num_splits >= MAX_SPLIT_NUM)
1638 num_splits = 0;
1639 flush_and_start_over = true;
1640 verify_partial_schedule (ps, sched_nodes);
1641 reset_partial_schedule (ps, ii);
1642 verify_partial_schedule (ps, sched_nodes);
1643 break;
1646 num_splits++;
1647 if (step == 1)
1648 split_row = compute_split_row (sched_nodes, start, end,
1649 ps->ii, u_node);
1650 else
1651 split_row = compute_split_row (sched_nodes, end, start,
1652 ps->ii, u_node);
1654 ps_insert_empty_row (ps, split_row, sched_nodes);
1655 i--; /* Go back and retry node i. */
1657 if (dump_file)
1658 fprintf (dump_file, "num_splits=%d\n", num_splits);
1661 /* ??? If (success), check register pressure estimates. */
1662 } /* Continue with next node. */
1663 } /* While flush_and_start_over. */
1664 if (ii >= maxii)
1666 free_partial_schedule (ps);
1667 ps = NULL;
1669 else
1670 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1672 sbitmap_free (sched_nodes);
1673 sbitmap_free (must_precede);
1674 sbitmap_free (must_follow);
1675 sbitmap_free (tobe_scheduled);
1677 return ps;
1680 /* This function inserts a new empty row into PS at the position
1681 according to SPLITROW, keeping all already scheduled instructions
1682 intact and updating their SCHED_TIME and cycle accordingly. */
1683 static void
1684 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1685 sbitmap sched_nodes)
1687 ps_insn_ptr crr_insn;
1688 ps_insn_ptr *rows_new;
1689 int ii = ps->ii;
1690 int new_ii = ii + 1;
1691 int row;
1693 verify_partial_schedule (ps, sched_nodes);
1695 /* We normalize sched_time and rotate ps to have only non-negative sched
1696 times, for simplicity of updating cycles after inserting new row. */
1697 split_row -= ps->min_cycle;
1698 split_row = SMODULO (split_row, ii);
1699 if (dump_file)
1700 fprintf (dump_file, "split_row=%d\n", split_row);
1702 normalize_sched_times (ps);
1703 rotate_partial_schedule (ps, ps->min_cycle);
1705 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1706 for (row = 0; row < split_row; row++)
1708 rows_new[row] = ps->rows[row];
1709 ps->rows[row] = NULL;
1710 for (crr_insn = rows_new[row];
1711 crr_insn; crr_insn = crr_insn->next_in_row)
1713 ddg_node_ptr u = crr_insn->node;
1714 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1716 SCHED_TIME (u) = new_time;
1717 crr_insn->cycle = new_time;
1718 SCHED_ROW (u) = new_time % new_ii;
1719 SCHED_STAGE (u) = new_time / new_ii;
1724 rows_new[split_row] = NULL;
1726 for (row = split_row; row < ii; row++)
1728 rows_new[row + 1] = ps->rows[row];
1729 ps->rows[row] = NULL;
1730 for (crr_insn = rows_new[row + 1];
1731 crr_insn; crr_insn = crr_insn->next_in_row)
1733 ddg_node_ptr u = crr_insn->node;
1734 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1736 SCHED_TIME (u) = new_time;
1737 crr_insn->cycle = new_time;
1738 SCHED_ROW (u) = new_time % new_ii;
1739 SCHED_STAGE (u) = new_time / new_ii;
1743 /* Updating ps. */
1744 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1745 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1746 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1747 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1748 free (ps->rows);
1749 ps->rows = rows_new;
1750 ps->ii = new_ii;
1751 gcc_assert (ps->min_cycle >= 0);
1753 verify_partial_schedule (ps, sched_nodes);
1755 if (dump_file)
1756 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1757 ps->max_cycle);
1760 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1761 UP which are the boundaries of it's scheduling window; compute using
1762 SCHED_NODES and II a row in the partial schedule that can be split
1763 which will separate a critical predecessor from a critical successor
1764 thereby expanding the window, and return it. */
1765 static int
1766 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1767 ddg_node_ptr u_node)
1769 ddg_edge_ptr e;
1770 int lower = INT_MIN, upper = INT_MAX;
1771 ddg_node_ptr crit_pred = NULL;
1772 ddg_node_ptr crit_succ = NULL;
1773 int crit_cycle;
1775 for (e = u_node->in; e != 0; e = e->next_in)
1777 ddg_node_ptr v_node = e->src;
1779 if (TEST_BIT (sched_nodes, v_node->cuid)
1780 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1781 if (SCHED_TIME (v_node) > lower)
1783 crit_pred = v_node;
1784 lower = SCHED_TIME (v_node);
1788 if (crit_pred != NULL)
1790 crit_cycle = SCHED_TIME (crit_pred) + 1;
1791 return SMODULO (crit_cycle, ii);
1794 for (e = u_node->out; e != 0; e = e->next_out)
1796 ddg_node_ptr v_node = e->dest;
1797 if (TEST_BIT (sched_nodes, v_node->cuid)
1798 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1799 if (SCHED_TIME (v_node) < upper)
1801 crit_succ = v_node;
1802 upper = SCHED_TIME (v_node);
1806 if (crit_succ != NULL)
1808 crit_cycle = SCHED_TIME (crit_succ);
1809 return SMODULO (crit_cycle, ii);
1812 if (dump_file)
1813 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1815 return SMODULO ((low + up + 1) / 2, ii);
1818 static void
1819 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
1821 int row;
1822 ps_insn_ptr crr_insn;
1824 for (row = 0; row < ps->ii; row++)
1825 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1827 ddg_node_ptr u = crr_insn->node;
1829 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1830 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1831 popcount (sched_nodes) == number of insns in ps. */
1832 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1833 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
1838 /* This page implements the algorithm for ordering the nodes of a DDG
1839 for modulo scheduling, activated through the
1840 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1842 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1843 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1844 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1845 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1846 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1847 #define DEPTH(x) (ASAP ((x)))
1849 typedef struct node_order_params * nopa;
1851 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1852 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1853 static nopa calculate_order_params (ddg_ptr, int, int *);
1854 static int find_max_asap (ddg_ptr, sbitmap);
1855 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1856 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1858 enum sms_direction {BOTTOMUP, TOPDOWN};
1860 struct node_order_params
1862 int asap;
1863 int alap;
1864 int height;
1867 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1868 static void
1869 check_nodes_order (int *node_order, int num_nodes)
1871 int i;
1872 sbitmap tmp = sbitmap_alloc (num_nodes);
1874 sbitmap_zero (tmp);
1876 if (dump_file)
1877 fprintf (dump_file, "SMS final nodes order: \n");
1879 for (i = 0; i < num_nodes; i++)
1881 int u = node_order[i];
1883 if (dump_file)
1884 fprintf (dump_file, "%d ", u);
1885 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1887 SET_BIT (tmp, u);
1890 if (dump_file)
1891 fprintf (dump_file, "\n");
1893 sbitmap_free (tmp);
1896 /* Order the nodes of G for scheduling and pass the result in
1897 NODE_ORDER. Also set aux.count of each node to ASAP.
1898 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
1899 static int
1900 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
1902 int i;
1903 int rec_mii = 0;
1904 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1906 nopa nops = calculate_order_params (g, mii, pmax_asap);
1908 if (dump_file)
1909 print_sccs (dump_file, sccs, g);
1911 order_nodes_of_sccs (sccs, node_order);
1913 if (sccs->num_sccs > 0)
1914 /* First SCC has the largest recurrence_length. */
1915 rec_mii = sccs->sccs[0]->recurrence_length;
1917 /* Save ASAP before destroying node_order_params. */
1918 for (i = 0; i < g->num_nodes; i++)
1920 ddg_node_ptr v = &g->nodes[i];
1921 v->aux.count = ASAP (v);
1924 free (nops);
1925 free_ddg_all_sccs (sccs);
1926 check_nodes_order (node_order, g->num_nodes);
1928 return rec_mii;
1931 static void
1932 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1934 int i, pos = 0;
1935 ddg_ptr g = all_sccs->ddg;
1936 int num_nodes = g->num_nodes;
1937 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1938 sbitmap on_path = sbitmap_alloc (num_nodes);
1939 sbitmap tmp = sbitmap_alloc (num_nodes);
1940 sbitmap ones = sbitmap_alloc (num_nodes);
1942 sbitmap_zero (prev_sccs);
1943 sbitmap_ones (ones);
1945 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1946 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1947 for (i = 0; i < all_sccs->num_sccs; i++)
1949 ddg_scc_ptr scc = all_sccs->sccs[i];
1951 /* Add nodes on paths from previous SCCs to the current SCC. */
1952 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1953 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1955 /* Add nodes on paths from the current SCC to previous SCCs. */
1956 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1957 sbitmap_a_or_b (tmp, tmp, on_path);
1959 /* Remove nodes of previous SCCs from current extended SCC. */
1960 sbitmap_difference (tmp, tmp, prev_sccs);
1962 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1963 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1966 /* Handle the remaining nodes that do not belong to any scc. Each call
1967 to order_nodes_in_scc handles a single connected component. */
1968 while (pos < g->num_nodes)
1970 sbitmap_difference (tmp, ones, prev_sccs);
1971 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1973 sbitmap_free (prev_sccs);
1974 sbitmap_free (on_path);
1975 sbitmap_free (tmp);
1976 sbitmap_free (ones);
1979 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1980 static struct node_order_params *
1981 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
1983 int u;
1984 int max_asap;
1985 int num_nodes = g->num_nodes;
1986 ddg_edge_ptr e;
1987 /* Allocate a place to hold ordering params for each node in the DDG. */
1988 nopa node_order_params_arr;
1990 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1991 node_order_params_arr = (nopa) xcalloc (num_nodes,
1992 sizeof (struct node_order_params));
1994 /* Set the aux pointer of each node to point to its order_params structure. */
1995 for (u = 0; u < num_nodes; u++)
1996 g->nodes[u].aux.info = &node_order_params_arr[u];
1998 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1999 calculate ASAP, ALAP, mobility, distance, and height for each node
2000 in the dependence (direct acyclic) graph. */
2002 /* We assume that the nodes in the array are in topological order. */
2004 max_asap = 0;
2005 for (u = 0; u < num_nodes; u++)
2007 ddg_node_ptr u_node = &g->nodes[u];
2009 ASAP (u_node) = 0;
2010 for (e = u_node->in; e; e = e->next_in)
2011 if (e->distance == 0)
2012 ASAP (u_node) = MAX (ASAP (u_node),
2013 ASAP (e->src) + e->latency);
2014 max_asap = MAX (max_asap, ASAP (u_node));
2017 for (u = num_nodes - 1; u > -1; u--)
2019 ddg_node_ptr u_node = &g->nodes[u];
2021 ALAP (u_node) = max_asap;
2022 HEIGHT (u_node) = 0;
2023 for (e = u_node->out; e; e = e->next_out)
2024 if (e->distance == 0)
2026 ALAP (u_node) = MIN (ALAP (u_node),
2027 ALAP (e->dest) - e->latency);
2028 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2029 HEIGHT (e->dest) + e->latency);
2032 if (dump_file)
2034 fprintf (dump_file, "\nOrder params\n");
2035 for (u = 0; u < num_nodes; u++)
2037 ddg_node_ptr u_node = &g->nodes[u];
2039 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2040 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2044 *pmax_asap = max_asap;
2045 return node_order_params_arr;
2048 static int
2049 find_max_asap (ddg_ptr g, sbitmap nodes)
2051 unsigned int u = 0;
2052 int max_asap = -1;
2053 int result = -1;
2054 sbitmap_iterator sbi;
2056 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2058 ddg_node_ptr u_node = &g->nodes[u];
2060 if (max_asap < ASAP (u_node))
2062 max_asap = ASAP (u_node);
2063 result = u;
2066 return result;
2069 static int
2070 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2072 unsigned int u = 0;
2073 int max_hv = -1;
2074 int min_mob = INT_MAX;
2075 int result = -1;
2076 sbitmap_iterator sbi;
2078 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2080 ddg_node_ptr u_node = &g->nodes[u];
2082 if (max_hv < HEIGHT (u_node))
2084 max_hv = HEIGHT (u_node);
2085 min_mob = MOB (u_node);
2086 result = u;
2088 else if ((max_hv == HEIGHT (u_node))
2089 && (min_mob > MOB (u_node)))
2091 min_mob = MOB (u_node);
2092 result = u;
2095 return result;
2098 static int
2099 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2101 unsigned int u = 0;
2102 int max_dv = -1;
2103 int min_mob = INT_MAX;
2104 int result = -1;
2105 sbitmap_iterator sbi;
2107 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2109 ddg_node_ptr u_node = &g->nodes[u];
2111 if (max_dv < DEPTH (u_node))
2113 max_dv = DEPTH (u_node);
2114 min_mob = MOB (u_node);
2115 result = u;
2117 else if ((max_dv == DEPTH (u_node))
2118 && (min_mob > MOB (u_node)))
2120 min_mob = MOB (u_node);
2121 result = u;
2124 return result;
2127 /* Places the nodes of SCC into the NODE_ORDER array starting
2128 at position POS, according to the SMS ordering algorithm.
2129 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2130 the NODE_ORDER array, starting from position zero. */
2131 static int
2132 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2133 int * node_order, int pos)
2135 enum sms_direction dir;
2136 int num_nodes = g->num_nodes;
2137 sbitmap workset = sbitmap_alloc (num_nodes);
2138 sbitmap tmp = sbitmap_alloc (num_nodes);
2139 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2140 sbitmap predecessors = sbitmap_alloc (num_nodes);
2141 sbitmap successors = sbitmap_alloc (num_nodes);
2143 sbitmap_zero (predecessors);
2144 find_predecessors (predecessors, g, nodes_ordered);
2146 sbitmap_zero (successors);
2147 find_successors (successors, g, nodes_ordered);
2149 sbitmap_zero (tmp);
2150 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2152 sbitmap_copy (workset, tmp);
2153 dir = BOTTOMUP;
2155 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2157 sbitmap_copy (workset, tmp);
2158 dir = TOPDOWN;
2160 else
2162 int u;
2164 sbitmap_zero (workset);
2165 if ((u = find_max_asap (g, scc)) >= 0)
2166 SET_BIT (workset, u);
2167 dir = BOTTOMUP;
2170 sbitmap_zero (zero_bitmap);
2171 while (!sbitmap_equal (workset, zero_bitmap))
2173 int v;
2174 ddg_node_ptr v_node;
2175 sbitmap v_node_preds;
2176 sbitmap v_node_succs;
2178 if (dir == TOPDOWN)
2180 while (!sbitmap_equal (workset, zero_bitmap))
2182 v = find_max_hv_min_mob (g, workset);
2183 v_node = &g->nodes[v];
2184 node_order[pos++] = v;
2185 v_node_succs = NODE_SUCCESSORS (v_node);
2186 sbitmap_a_and_b (tmp, v_node_succs, scc);
2188 /* Don't consider the already ordered successors again. */
2189 sbitmap_difference (tmp, tmp, nodes_ordered);
2190 sbitmap_a_or_b (workset, workset, tmp);
2191 RESET_BIT (workset, v);
2192 SET_BIT (nodes_ordered, v);
2194 dir = BOTTOMUP;
2195 sbitmap_zero (predecessors);
2196 find_predecessors (predecessors, g, nodes_ordered);
2197 sbitmap_a_and_b (workset, predecessors, scc);
2199 else
2201 while (!sbitmap_equal (workset, zero_bitmap))
2203 v = find_max_dv_min_mob (g, workset);
2204 v_node = &g->nodes[v];
2205 node_order[pos++] = v;
2206 v_node_preds = NODE_PREDECESSORS (v_node);
2207 sbitmap_a_and_b (tmp, v_node_preds, scc);
2209 /* Don't consider the already ordered predecessors again. */
2210 sbitmap_difference (tmp, tmp, nodes_ordered);
2211 sbitmap_a_or_b (workset, workset, tmp);
2212 RESET_BIT (workset, v);
2213 SET_BIT (nodes_ordered, v);
2215 dir = TOPDOWN;
2216 sbitmap_zero (successors);
2217 find_successors (successors, g, nodes_ordered);
2218 sbitmap_a_and_b (workset, successors, scc);
2221 sbitmap_free (tmp);
2222 sbitmap_free (workset);
2223 sbitmap_free (zero_bitmap);
2224 sbitmap_free (predecessors);
2225 sbitmap_free (successors);
2226 return pos;
2230 /* This page contains functions for manipulating partial-schedules during
2231 modulo scheduling. */
2233 /* Create a partial schedule and allocate a memory to hold II rows. */
2235 static partial_schedule_ptr
2236 create_partial_schedule (int ii, ddg_ptr g, int history)
2238 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2239 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2240 ps->ii = ii;
2241 ps->history = history;
2242 ps->min_cycle = INT_MAX;
2243 ps->max_cycle = INT_MIN;
2244 ps->g = g;
2246 return ps;
2249 /* Free the PS_INSNs in rows array of the given partial schedule.
2250 ??? Consider caching the PS_INSN's. */
2251 static void
2252 free_ps_insns (partial_schedule_ptr ps)
2254 int i;
2256 for (i = 0; i < ps->ii; i++)
2258 while (ps->rows[i])
2260 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2262 free (ps->rows[i]);
2263 ps->rows[i] = ps_insn;
2265 ps->rows[i] = NULL;
2269 /* Free all the memory allocated to the partial schedule. */
2271 static void
2272 free_partial_schedule (partial_schedule_ptr ps)
2274 if (!ps)
2275 return;
2276 free_ps_insns (ps);
2277 free (ps->rows);
2278 free (ps);
2281 /* Clear the rows array with its PS_INSNs, and create a new one with
2282 NEW_II rows. */
2284 static void
2285 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2287 if (!ps)
2288 return;
2289 free_ps_insns (ps);
2290 if (new_ii == ps->ii)
2291 return;
2292 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2293 * sizeof (ps_insn_ptr));
2294 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2295 ps->ii = new_ii;
2296 ps->min_cycle = INT_MAX;
2297 ps->max_cycle = INT_MIN;
2300 /* Prints the partial schedule as an ii rows array, for each rows
2301 print the ids of the insns in it. */
2302 void
2303 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2305 int i;
2307 for (i = 0; i < ps->ii; i++)
2309 ps_insn_ptr ps_i = ps->rows[i];
2311 fprintf (dump, "\n[CYCLE %d ]: ", i);
2312 while (ps_i)
2314 fprintf (dump, "%d, ",
2315 INSN_UID (ps_i->node->insn));
2316 ps_i = ps_i->next_in_row;
2321 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2322 static ps_insn_ptr
2323 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2325 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2327 ps_i->node = node;
2328 ps_i->next_in_row = NULL;
2329 ps_i->prev_in_row = NULL;
2330 ps_i->row_rest_count = rest_count;
2331 ps_i->cycle = cycle;
2333 return ps_i;
2337 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2338 node is not found in the partial schedule, else returns true. */
2339 static bool
2340 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2342 int row;
2344 if (!ps || !ps_i)
2345 return false;
2347 row = SMODULO (ps_i->cycle, ps->ii);
2348 if (! ps_i->prev_in_row)
2350 if (ps_i != ps->rows[row])
2351 return false;
2353 ps->rows[row] = ps_i->next_in_row;
2354 if (ps->rows[row])
2355 ps->rows[row]->prev_in_row = NULL;
2357 else
2359 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2360 if (ps_i->next_in_row)
2361 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2363 free (ps_i);
2364 return true;
2367 /* Unlike what literature describes for modulo scheduling (which focuses
2368 on VLIW machines) the order of the instructions inside a cycle is
2369 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2370 where the current instruction should go relative to the already
2371 scheduled instructions in the given cycle. Go over these
2372 instructions and find the first possible column to put it in. */
2373 static bool
2374 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2375 sbitmap must_precede, sbitmap must_follow)
2377 ps_insn_ptr next_ps_i;
2378 ps_insn_ptr first_must_follow = NULL;
2379 ps_insn_ptr last_must_precede = NULL;
2380 int row;
2382 if (! ps_i)
2383 return false;
2385 row = SMODULO (ps_i->cycle, ps->ii);
2387 /* Find the first must follow and the last must precede
2388 and insert the node immediately after the must precede
2389 but make sure that it there is no must follow after it. */
2390 for (next_ps_i = ps->rows[row];
2391 next_ps_i;
2392 next_ps_i = next_ps_i->next_in_row)
2394 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2395 && ! first_must_follow)
2396 first_must_follow = next_ps_i;
2397 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2399 /* If we have already met a node that must follow, then
2400 there is no possible column. */
2401 if (first_must_follow)
2402 return false;
2403 else
2404 last_must_precede = next_ps_i;
2408 /* Now insert the node after INSERT_AFTER_PSI. */
2410 if (! last_must_precede)
2412 ps_i->next_in_row = ps->rows[row];
2413 ps_i->prev_in_row = NULL;
2414 if (ps_i->next_in_row)
2415 ps_i->next_in_row->prev_in_row = ps_i;
2416 ps->rows[row] = ps_i;
2418 else
2420 ps_i->next_in_row = last_must_precede->next_in_row;
2421 last_must_precede->next_in_row = ps_i;
2422 ps_i->prev_in_row = last_must_precede;
2423 if (ps_i->next_in_row)
2424 ps_i->next_in_row->prev_in_row = ps_i;
2427 return true;
2430 /* Advances the PS_INSN one column in its current row; returns false
2431 in failure and true in success. Bit N is set in MUST_FOLLOW if
2432 the node with cuid N must be come after the node pointed to by
2433 PS_I when scheduled in the same cycle. */
2434 static int
2435 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2436 sbitmap must_follow)
2438 ps_insn_ptr prev, next;
2439 int row;
2440 ddg_node_ptr next_node;
2442 if (!ps || !ps_i)
2443 return false;
2445 row = SMODULO (ps_i->cycle, ps->ii);
2447 if (! ps_i->next_in_row)
2448 return false;
2450 next_node = ps_i->next_in_row->node;
2452 /* Check if next_in_row is dependent on ps_i, both having same sched
2453 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2454 if (TEST_BIT (must_follow, next_node->cuid))
2455 return false;
2457 /* Advance PS_I over its next_in_row in the doubly linked list. */
2458 prev = ps_i->prev_in_row;
2459 next = ps_i->next_in_row;
2461 if (ps_i == ps->rows[row])
2462 ps->rows[row] = next;
2464 ps_i->next_in_row = next->next_in_row;
2466 if (next->next_in_row)
2467 next->next_in_row->prev_in_row = ps_i;
2469 next->next_in_row = ps_i;
2470 ps_i->prev_in_row = next;
2472 next->prev_in_row = prev;
2473 if (prev)
2474 prev->next_in_row = next;
2476 return true;
2479 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2480 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2481 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2482 before/after (respectively) the node pointed to by PS_I when scheduled
2483 in the same cycle. */
2484 static ps_insn_ptr
2485 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2486 sbitmap must_precede, sbitmap must_follow)
2488 ps_insn_ptr ps_i;
2489 int rest_count = 1;
2490 int row = SMODULO (cycle, ps->ii);
2492 if (ps->rows[row]
2493 && ps->rows[row]->row_rest_count >= issue_rate)
2494 return NULL;
2496 if (ps->rows[row])
2497 rest_count += ps->rows[row]->row_rest_count;
2499 ps_i = create_ps_insn (node, rest_count, cycle);
2501 /* Finds and inserts PS_I according to MUST_FOLLOW and
2502 MUST_PRECEDE. */
2503 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2505 free (ps_i);
2506 return NULL;
2509 return ps_i;
2512 /* Advance time one cycle. Assumes DFA is being used. */
2513 static void
2514 advance_one_cycle (void)
2516 if (targetm.sched.dfa_pre_cycle_insn)
2517 state_transition (curr_state,
2518 targetm.sched.dfa_pre_cycle_insn ());
2520 state_transition (curr_state, NULL);
2522 if (targetm.sched.dfa_post_cycle_insn)
2523 state_transition (curr_state,
2524 targetm.sched.dfa_post_cycle_insn ());
2529 /* Checks if PS has resource conflicts according to DFA, starting from
2530 FROM cycle to TO cycle; returns true if there are conflicts and false
2531 if there are no conflicts. Assumes DFA is being used. */
2532 static int
2533 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2535 int cycle;
2537 state_reset (curr_state);
2539 for (cycle = from; cycle <= to; cycle++)
2541 ps_insn_ptr crr_insn;
2542 /* Holds the remaining issue slots in the current row. */
2543 int can_issue_more = issue_rate;
2545 /* Walk through the DFA for the current row. */
2546 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2547 crr_insn;
2548 crr_insn = crr_insn->next_in_row)
2550 rtx insn = crr_insn->node->insn;
2552 if (!INSN_P (insn))
2553 continue;
2555 /* Check if there is room for the current insn. */
2556 if (!can_issue_more || state_dead_lock_p (curr_state))
2557 return true;
2559 /* Update the DFA state and return with failure if the DFA found
2560 recource conflicts. */
2561 if (state_transition (curr_state, insn) >= 0)
2562 return true;
2564 if (targetm.sched.variable_issue)
2565 can_issue_more =
2566 targetm.sched.variable_issue (sched_dump, sched_verbose,
2567 insn, can_issue_more);
2568 /* A naked CLOBBER or USE generates no instruction, so don't
2569 let them consume issue slots. */
2570 else if (GET_CODE (PATTERN (insn)) != USE
2571 && GET_CODE (PATTERN (insn)) != CLOBBER)
2572 can_issue_more--;
2575 /* Advance the DFA to the next cycle. */
2576 advance_one_cycle ();
2578 return false;
2581 /* Checks if the given node causes resource conflicts when added to PS at
2582 cycle C. If not the node is added to PS and returned; otherwise zero
2583 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2584 cuid N must be come before/after (respectively) the node pointed to by
2585 PS_I when scheduled in the same cycle. */
2586 ps_insn_ptr
2587 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2588 int c, sbitmap must_precede,
2589 sbitmap must_follow)
2591 int has_conflicts = 0;
2592 ps_insn_ptr ps_i;
2594 /* First add the node to the PS, if this succeeds check for
2595 conflicts, trying different issue slots in the same row. */
2596 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2597 return NULL; /* Failed to insert the node at the given cycle. */
2599 has_conflicts = ps_has_conflicts (ps, c, c)
2600 || (ps->history > 0
2601 && ps_has_conflicts (ps,
2602 c - ps->history,
2603 c + ps->history));
2605 /* Try different issue slots to find one that the given node can be
2606 scheduled in without conflicts. */
2607 while (has_conflicts)
2609 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2610 break;
2611 has_conflicts = ps_has_conflicts (ps, c, c)
2612 || (ps->history > 0
2613 && ps_has_conflicts (ps,
2614 c - ps->history,
2615 c + ps->history));
2618 if (has_conflicts)
2620 remove_node_from_ps (ps, ps_i);
2621 return NULL;
2624 ps->min_cycle = MIN (ps->min_cycle, c);
2625 ps->max_cycle = MAX (ps->max_cycle, c);
2626 return ps_i;
2629 /* Rotate the rows of PS such that insns scheduled at time
2630 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2631 void
2632 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2634 int i, row, backward_rotates;
2635 int last_row = ps->ii - 1;
2637 if (start_cycle == 0)
2638 return;
2640 backward_rotates = SMODULO (start_cycle, ps->ii);
2642 /* Revisit later and optimize this into a single loop. */
2643 for (i = 0; i < backward_rotates; i++)
2645 ps_insn_ptr first_row = ps->rows[0];
2647 for (row = 0; row < last_row; row++)
2648 ps->rows[row] = ps->rows[row+1];
2650 ps->rows[last_row] = first_row;
2653 ps->max_cycle -= start_cycle;
2654 ps->min_cycle -= start_cycle;
2657 #endif /* INSN_SCHEDULING */
2659 static bool
2660 gate_handle_sms (void)
2662 return (optimize > 0 && flag_modulo_sched);
2666 /* Run instruction scheduler. */
2667 /* Perform SMS module scheduling. */
2668 static unsigned int
2669 rest_of_handle_sms (void)
2671 #ifdef INSN_SCHEDULING
2672 basic_block bb;
2674 /* Collect loop information to be used in SMS. */
2675 cfg_layout_initialize (0);
2676 sms_schedule ();
2678 /* Update the life information, because we add pseudos. */
2679 max_regno = max_reg_num ();
2681 /* Finalize layout changes. */
2682 FOR_EACH_BB (bb)
2683 if (bb->next_bb != EXIT_BLOCK_PTR)
2684 bb->aux = bb->next_bb;
2685 free_dominance_info (CDI_DOMINATORS);
2686 cfg_layout_finalize ();
2687 #endif /* INSN_SCHEDULING */
2688 return 0;
2691 struct tree_opt_pass pass_sms =
2693 "sms", /* name */
2694 gate_handle_sms, /* gate */
2695 rest_of_handle_sms, /* execute */
2696 NULL, /* sub */
2697 NULL, /* next */
2698 0, /* static_pass_number */
2699 TV_SMS, /* tv_id */
2700 0, /* properties_required */
2701 0, /* properties_provided */
2702 0, /* properties_destroyed */
2703 TODO_dump_func, /* todo_flags_start */
2704 TODO_df_finish | TODO_verify_rtl_sharing |
2705 TODO_dump_func |
2706 TODO_ggc_collect, /* todo_flags_finish */
2707 'm' /* letter */