Update concepts branch to revision 131834
[official-gcc.git] / gcc / modulo-sched.c
blobf11bc1c35cffb0afbcc338ccb4a78ac60a1c7202
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate;
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
247 static char tmp[80];
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
261 static struct sched_info sms_sched_info =
263 NULL,
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 sms_print_insn,
269 NULL,
270 compute_jump_reg_dependencies,
271 NULL, NULL,
272 NULL, NULL,
273 0, 0, 0,
275 NULL, NULL, NULL, NULL, NULL,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
284 static rtx
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
287 #ifdef HAVE_doloop_end
288 rtx reg, condition, insn, first_insn_not_to_check;
290 if (!JUMP_P (tail))
291 return NULL_RTX;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition = doloop_condition_get (tail);
295 if (! condition)
296 return NULL_RTX;
298 if (REG_P (XEXP (condition, 0)))
299 reg = XEXP (condition, 0);
300 else if (GET_CODE (XEXP (condition, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition, 0), 0)))
302 reg = XEXP (XEXP (condition, 0), 0);
303 else
304 gcc_unreachable ();
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
311 : PREV_INSN (tail));
313 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
314 if (reg_mentioned_p (reg, insn))
316 if (dump_file)
318 fprintf (dump_file, "SMS count_reg found ");
319 print_rtl_single (dump_file, reg);
320 fprintf (dump_file, " outside control in insn:\n");
321 print_rtl_single (dump_file, insn);
324 return NULL_RTX;
327 return reg;
328 #else
329 return NULL_RTX;
330 #endif
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
337 static rtx
338 const_iteration_count (rtx count_reg, basic_block pre_header,
339 HOST_WIDEST_INT * count)
341 rtx insn;
342 rtx head, tail;
344 if (! pre_header)
345 return NULL_RTX;
347 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
349 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
350 if (INSN_P (insn) && single_set (insn) &&
351 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
353 rtx pat = single_set (insn);
355 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
357 *count = INTVAL (SET_SRC (pat));
358 return insn;
361 return NULL_RTX;
364 return NULL_RTX;
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
370 static int
371 res_MII (ddg_ptr g)
373 if (targetm.sched.sms_res_mii)
374 return targetm.sched.sms_res_mii (g);
376 return (g->num_nodes / issue_rate);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
386 static void
387 set_node_sched_params (ddg_ptr g)
389 int i;
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params = (node_sched_params_ptr)
394 xcalloc (g->num_nodes,
395 sizeof (struct node_sched_params));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i = 0; i < g->num_nodes; i++)
401 /* Watch out for aliasing problems? */
402 node_sched_params[i].asap = g->nodes[i].aux.count;
403 g->nodes[i].aux.info = &node_sched_params[i];
407 static void
408 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
410 int i;
412 if (! file)
413 return;
414 for (i = 0; i < num_nodes; i++)
416 node_sched_params_ptr nsp = &node_sched_params[i];
417 rtx reg_move = nsp->first_reg_move;
418 int j;
420 fprintf (file, "Node = %d; INSN = %d\n", i,
421 (INSN_UID (g->nodes[i].insn)));
422 fprintf (file, " asap = %d:\n", nsp->asap);
423 fprintf (file, " time = %d:\n", nsp->time);
424 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
425 for (j = 0; j < nsp->nreg_moves; j++)
427 fprintf (file, " reg_move = ");
428 print_rtl_single (file, reg_move);
429 reg_move = PREV_INSN (reg_move);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
443 ii { 1 if not.
445 static struct undo_replace_buff_elem *
446 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
448 ddg_ptr g = ps->g;
449 int ii = ps->ii;
450 int i;
451 struct undo_replace_buff_elem *reg_move_replaces = NULL;
453 for (i = 0; i < g->num_nodes; i++)
455 ddg_node_ptr u = &g->nodes[i];
456 ddg_edge_ptr e;
457 int nreg_moves = 0, i_reg_move;
458 sbitmap *uses_of_defs;
459 rtx last_reg_move;
460 rtx prev_reg, old_reg;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e = u->out; e; e = e->next_out)
465 if (e->type == TRUE_DEP && e->dest != e->src)
467 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
469 if (e->distance == 1)
470 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
475 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
476 nreg_moves4e--;
478 nreg_moves = MAX (nreg_moves, nreg_moves4e);
481 if (nreg_moves == 0)
482 continue;
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
487 register. */
488 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
489 sbitmap_vector_zero (uses_of_defs, nreg_moves);
490 for (e = u->out; e; e = e->next_out)
491 if (e->type == TRUE_DEP && e->dest != e->src)
493 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
495 if (e->distance == 1)
496 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 /* Insert the reg-moves right before the notes which precede
510 the insn they relates to. */
511 last_reg_move = u->first_note;
513 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
515 unsigned int i_use = 0;
516 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
517 rtx reg_move = gen_move_insn (new_reg, prev_reg);
518 sbitmap_iterator sbi;
520 add_insn_before (reg_move, last_reg_move, NULL);
521 last_reg_move = reg_move;
523 if (!SCHED_FIRST_REG_MOVE (u))
524 SCHED_FIRST_REG_MOVE (u) = reg_move;
526 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
528 struct undo_replace_buff_elem *rep;
530 rep = (struct undo_replace_buff_elem *)
531 xcalloc (1, sizeof (struct undo_replace_buff_elem));
532 rep->insn = g->nodes[i_use].insn;
533 rep->orig_reg = old_reg;
534 rep->new_reg = new_reg;
536 if (! reg_move_replaces)
537 reg_move_replaces = rep;
538 else
540 rep->next = reg_move_replaces;
541 reg_move_replaces = rep;
544 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
545 if (rescan)
546 df_insn_rescan (g->nodes[i_use].insn);
549 prev_reg = new_reg;
551 sbitmap_vector_free (uses_of_defs);
553 return reg_move_replaces;
556 /* Free memory allocated for the undo buffer. */
557 static void
558 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
561 while (reg_move_replaces)
563 struct undo_replace_buff_elem *rep = reg_move_replaces;
565 reg_move_replaces = reg_move_replaces->next;
566 free (rep);
570 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
571 of SCHED_ROW and SCHED_STAGE. */
572 static void
573 normalize_sched_times (partial_schedule_ptr ps)
575 int row;
576 int amount = PS_MIN_CYCLE (ps);
577 int ii = ps->ii;
578 ps_insn_ptr crr_insn;
580 for (row = 0; row < ii; row++)
581 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
583 ddg_node_ptr u = crr_insn->node;
584 int normalized_time = SCHED_TIME (u) - amount;
586 if (dump_file)
587 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
588 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
589 (u), ps->min_cycle);
590 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
591 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
592 SCHED_TIME (u) = normalized_time;
593 SCHED_ROW (u) = normalized_time % ii;
594 SCHED_STAGE (u) = normalized_time / ii;
598 /* Set SCHED_COLUMN of each node according to its position in PS. */
599 static void
600 set_columns_for_ps (partial_schedule_ptr ps)
602 int row;
604 for (row = 0; row < ps->ii; row++)
606 ps_insn_ptr cur_insn = ps->rows[row];
607 int column = 0;
609 for (; cur_insn; cur_insn = cur_insn->next_in_row)
610 SCHED_COLUMN (cur_insn->node) = column++;
614 /* Permute the insns according to their order in PS, from row 0 to
615 row ii-1, and position them right before LAST. This schedules
616 the insns of the loop kernel. */
617 static void
618 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
620 int ii = ps->ii;
621 int row;
622 ps_insn_ptr ps_ij;
624 for (row = 0; row < ii ; row++)
625 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
626 if (PREV_INSN (last) != ps_ij->node->insn)
627 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
628 PREV_INSN (last));
631 static void
632 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
633 int to_stage, int for_prolog, rtx count_reg)
635 int row;
636 ps_insn_ptr ps_ij;
638 for (row = 0; row < ps->ii; row++)
639 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
641 ddg_node_ptr u_node = ps_ij->node;
642 int j, i_reg_moves;
643 rtx reg_move = NULL_RTX;
645 /* Do not duplicate any insn which refers to count_reg as it
646 belongs to the control part.
647 TODO: This should be done by analyzing the control part of
648 the loop. */
649 if (reg_mentioned_p (count_reg, u_node->insn))
650 continue;
652 if (for_prolog)
654 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
655 number of reg_moves starting with the second occurrence of
656 u_node, which is generated if its SCHED_STAGE <= to_stage. */
657 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
658 i_reg_moves = MAX (i_reg_moves, 0);
659 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
661 /* The reg_moves start from the *first* reg_move backwards. */
662 if (i_reg_moves)
664 reg_move = SCHED_FIRST_REG_MOVE (u_node);
665 for (j = 1; j < i_reg_moves; j++)
666 reg_move = PREV_INSN (reg_move);
669 else /* It's for the epilog. */
671 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
672 starting to decrease one stage after u_node no longer occurs;
673 that is, generate all reg_moves until
674 SCHED_STAGE (u_node) == from_stage - 1. */
675 i_reg_moves = SCHED_NREG_MOVES (u_node)
676 - (from_stage - SCHED_STAGE (u_node) - 1);
677 i_reg_moves = MAX (i_reg_moves, 0);
678 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
680 /* The reg_moves start from the *last* reg_move forwards. */
681 if (i_reg_moves)
683 reg_move = SCHED_FIRST_REG_MOVE (u_node);
684 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
685 reg_move = PREV_INSN (reg_move);
689 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
690 emit_insn (copy_rtx (PATTERN (reg_move)));
691 if (SCHED_STAGE (u_node) >= from_stage
692 && SCHED_STAGE (u_node) <= to_stage)
693 duplicate_insn_chain (u_node->first_note, u_node->insn);
698 /* Generate the instructions (including reg_moves) for prolog & epilog. */
699 static void
700 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
701 rtx count_reg, rtx count_init)
703 int i;
704 int last_stage = PS_STAGE_COUNT (ps) - 1;
705 edge e;
707 /* Generate the prolog, inserting its insns on the loop-entry edge. */
708 start_sequence ();
710 if (!count_init)
712 /* Generate instructions at the beginning of the prolog to
713 adjust the loop count by STAGE_COUNT. If loop count is constant
714 (count_init), this constant is adjusted by STAGE_COUNT in
715 generate_prolog_epilog function. */
716 rtx sub_reg = NULL_RTX;
718 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
719 count_reg, GEN_INT (last_stage),
720 count_reg, 1, OPTAB_DIRECT);
721 gcc_assert (REG_P (sub_reg));
722 if (REGNO (sub_reg) != REGNO (count_reg))
723 emit_move_insn (count_reg, sub_reg);
726 for (i = 0; i < last_stage; i++)
727 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
729 /* Put the prolog on the entry edge. */
730 e = loop_preheader_edge (loop);
731 split_edge_and_insert (e, get_insns ());
733 end_sequence ();
735 /* Generate the epilog, inserting its insns on the loop-exit edge. */
736 start_sequence ();
738 for (i = 0; i < last_stage; i++)
739 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
741 /* Put the epilogue on the exit edge. */
742 gcc_assert (single_exit (loop));
743 e = single_exit (loop);
744 split_edge_and_insert (e, get_insns ());
745 end_sequence ();
748 /* Return true if all the BBs of the loop are empty except the
749 loop header. */
750 static bool
751 loop_single_full_bb_p (struct loop *loop)
753 unsigned i;
754 basic_block *bbs = get_loop_body (loop);
756 for (i = 0; i < loop->num_nodes ; i++)
758 rtx head, tail;
759 bool empty_bb = true;
761 if (bbs[i] == loop->header)
762 continue;
764 /* Make sure that basic blocks other than the header
765 have only notes labels or jumps. */
766 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
767 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
769 if (NOTE_P (head) || LABEL_P (head)
770 || (INSN_P (head) && JUMP_P (head)))
771 continue;
772 empty_bb = false;
773 break;
776 if (! empty_bb)
778 free (bbs);
779 return false;
782 free (bbs);
783 return true;
786 /* A simple loop from SMS point of view; it is a loop that is composed of
787 either a single basic block or two BBs - a header and a latch. */
788 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
789 && (EDGE_COUNT (loop->latch->preds) == 1) \
790 && (EDGE_COUNT (loop->latch->succs) == 1))
792 /* Return true if the loop is in its canonical form and false if not.
793 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
794 static bool
795 loop_canon_p (struct loop *loop)
798 if (loop->inner || !loop_outer (loop))
800 if (dump_file)
801 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
802 return false;
805 if (!single_exit (loop))
807 if (dump_file)
809 rtx insn = BB_END (loop->header);
811 fprintf (dump_file, "SMS loop many exits ");
812 fprintf (dump_file, " %s %d (file, line)\n",
813 insn_file (insn), insn_line (insn));
815 return false;
818 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
820 if (dump_file)
822 rtx insn = BB_END (loop->header);
824 fprintf (dump_file, "SMS loop many BBs. ");
825 fprintf (dump_file, " %s %d (file, line)\n",
826 insn_file (insn), insn_line (insn));
828 return false;
831 return true;
834 /* If there are more than one entry for the loop,
835 make it one by splitting the first entry edge and
836 redirecting the others to the new BB. */
837 static void
838 canon_loop (struct loop *loop)
840 edge e;
841 edge_iterator i;
843 /* Avoid annoying special cases of edges going to exit
844 block. */
845 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
846 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
847 split_edge (e);
849 if (loop->latch == loop->header
850 || EDGE_COUNT (loop->latch->succs) > 1)
852 FOR_EACH_EDGE (e, i, loop->header->preds)
853 if (e->src == loop->latch)
854 break;
855 split_edge (e);
859 /* Probability in % that the sms-ed loop rolls enough so that optimized
860 version may be entered. Just a guess. */
861 #define PROB_SMS_ENOUGH_ITERATIONS 80
863 /* Used to calculate the upper bound of ii. */
864 #define MAXII_FACTOR 2
866 /* Main entry point, perform SMS scheduling on the loops of the function
867 that consist of single basic blocks. */
868 static void
869 sms_schedule (void)
871 rtx insn;
872 ddg_ptr *g_arr, g;
873 int * node_order;
874 int maxii, max_asap;
875 loop_iterator li;
876 partial_schedule_ptr ps;
877 basic_block bb = NULL;
878 struct loop *loop;
879 basic_block condition_bb = NULL;
880 edge latch_edge;
881 gcov_type trip_count = 0;
883 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
884 | LOOPS_HAVE_RECORDED_EXITS);
885 if (number_of_loops () <= 1)
887 loop_optimizer_finalize ();
888 return; /* There are no loops to schedule. */
891 /* Initialize issue_rate. */
892 if (targetm.sched.issue_rate)
894 int temp = reload_completed;
896 reload_completed = 1;
897 issue_rate = targetm.sched.issue_rate ();
898 reload_completed = temp;
900 else
901 issue_rate = 1;
903 /* Initialize the scheduler. */
904 current_sched_info = &sms_sched_info;
906 /* Init Data Flow analysis, to be used in interloop dep calculation. */
907 df_set_flags (DF_LR_RUN_DCE);
908 df_rd_add_problem ();
909 df_note_add_problem ();
910 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
911 df_analyze ();
912 regstat_compute_calls_crossed ();
913 sched_init ();
915 /* Allocate memory to hold the DDG array one entry for each loop.
916 We use loop->num as index into this array. */
917 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
919 if (dump_file)
921 fprintf (dump_file, "\n\nSMS analysis phase\n");
922 fprintf (dump_file, "===================\n\n");
925 /* Build DDGs for all the relevant loops and hold them in G_ARR
926 indexed by the loop index. */
927 FOR_EACH_LOOP (li, loop, 0)
929 rtx head, tail;
930 rtx count_reg;
932 /* For debugging. */
933 if (dbg_cnt (sms_sched_loop) == false)
935 if (dump_file)
936 fprintf (dump_file, "SMS reached max limit... \n");
938 break;
941 if (dump_file)
943 rtx insn = BB_END (loop->header);
945 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
946 loop->num, insn_file (insn), insn_line (insn));
950 if (! loop_canon_p (loop))
951 continue;
953 if (! loop_single_full_bb_p (loop))
955 if (dump_file)
956 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
957 continue;
960 bb = loop->header;
962 get_ebb_head_tail (bb, bb, &head, &tail);
963 latch_edge = loop_latch_edge (loop);
964 gcc_assert (single_exit (loop));
965 if (single_exit (loop)->count)
966 trip_count = latch_edge->count / single_exit (loop)->count;
968 /* Perform SMS only on loops that their average count is above threshold. */
970 if ( latch_edge->count
971 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
973 if (dump_file)
975 fprintf (dump_file, " %s %d (file, line)\n",
976 insn_file (tail), insn_line (tail));
977 fprintf (dump_file, "SMS single-bb-loop\n");
978 if (profile_info && flag_branch_probabilities)
980 fprintf (dump_file, "SMS loop-count ");
981 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
982 (HOST_WIDEST_INT) bb->count);
983 fprintf (dump_file, "\n");
984 fprintf (dump_file, "SMS trip-count ");
985 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
986 (HOST_WIDEST_INT) trip_count);
987 fprintf (dump_file, "\n");
988 fprintf (dump_file, "SMS profile-sum-max ");
989 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
990 (HOST_WIDEST_INT) profile_info->sum_max);
991 fprintf (dump_file, "\n");
994 continue;
997 /* Make sure this is a doloop. */
998 if ( !(count_reg = doloop_register_get (head, tail)))
1000 if (dump_file)
1001 fprintf (dump_file, "SMS doloop_register_get failed\n");
1002 continue;
1005 /* Don't handle BBs with calls or barriers, or !single_set insns,
1006 or auto-increment insns (to avoid creating invalid reg-moves
1007 for the auto-increment insns).
1008 ??? Should handle auto-increment insns.
1009 ??? Should handle insns defining subregs. */
1010 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1012 rtx set;
1014 if (CALL_P (insn)
1015 || BARRIER_P (insn)
1016 || (INSN_P (insn) && !JUMP_P (insn)
1017 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
1018 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1019 || (INSN_P (insn) && (set = single_set (insn))
1020 && GET_CODE (SET_DEST (set)) == SUBREG))
1021 break;
1024 if (insn != NEXT_INSN (tail))
1026 if (dump_file)
1028 if (CALL_P (insn))
1029 fprintf (dump_file, "SMS loop-with-call\n");
1030 else if (BARRIER_P (insn))
1031 fprintf (dump_file, "SMS loop-with-barrier\n");
1032 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1033 fprintf (dump_file, "SMS reg inc\n");
1034 else if ((INSN_P (insn) && !JUMP_P (insn)
1035 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1036 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1037 else
1038 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1039 print_rtl_single (dump_file, insn);
1042 continue;
1045 if (! (g = create_ddg (bb, 0)))
1047 if (dump_file)
1048 fprintf (dump_file, "SMS create_ddg failed\n");
1049 continue;
1052 g_arr[loop->num] = g;
1053 if (dump_file)
1054 fprintf (dump_file, "...OK\n");
1057 if (dump_file)
1059 fprintf (dump_file, "\nSMS transformation phase\n");
1060 fprintf (dump_file, "=========================\n\n");
1063 /* We don't want to perform SMS on new loops - created by versioning. */
1064 FOR_EACH_LOOP (li, loop, 0)
1066 rtx head, tail;
1067 rtx count_reg, count_init;
1068 int mii, rec_mii;
1069 unsigned stage_count = 0;
1070 HOST_WIDEST_INT loop_count = 0;
1072 if (! (g = g_arr[loop->num]))
1073 continue;
1075 if (dump_file)
1077 rtx insn = BB_END (loop->header);
1079 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1080 loop->num, insn_file (insn), insn_line (insn));
1082 print_ddg (dump_file, g);
1085 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1087 latch_edge = loop_latch_edge (loop);
1088 gcc_assert (single_exit (loop));
1089 if (single_exit (loop)->count)
1090 trip_count = latch_edge->count / single_exit (loop)->count;
1092 if (dump_file)
1094 fprintf (dump_file, " %s %d (file, line)\n",
1095 insn_file (tail), insn_line (tail));
1096 fprintf (dump_file, "SMS single-bb-loop\n");
1097 if (profile_info && flag_branch_probabilities)
1099 fprintf (dump_file, "SMS loop-count ");
1100 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1101 (HOST_WIDEST_INT) bb->count);
1102 fprintf (dump_file, "\n");
1103 fprintf (dump_file, "SMS profile-sum-max ");
1104 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1105 (HOST_WIDEST_INT) profile_info->sum_max);
1106 fprintf (dump_file, "\n");
1108 fprintf (dump_file, "SMS doloop\n");
1109 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1110 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1111 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1115 /* In case of th loop have doloop register it gets special
1116 handling. */
1117 count_init = NULL_RTX;
1118 if ((count_reg = doloop_register_get (head, tail)))
1120 basic_block pre_header;
1122 pre_header = loop_preheader_edge (loop)->src;
1123 count_init = const_iteration_count (count_reg, pre_header,
1124 &loop_count);
1126 gcc_assert (count_reg);
1128 if (dump_file && count_init)
1130 fprintf (dump_file, "SMS const-doloop ");
1131 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1132 loop_count);
1133 fprintf (dump_file, "\n");
1136 node_order = XNEWVEC (int, g->num_nodes);
1138 mii = 1; /* Need to pass some estimate of mii. */
1139 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1140 mii = MAX (res_MII (g), rec_mii);
1141 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1143 if (dump_file)
1144 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145 rec_mii, mii, maxii);
1147 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1148 ASAP. */
1149 set_node_sched_params (g);
1151 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1153 if (ps)
1154 stage_count = PS_STAGE_COUNT (ps);
1156 /* Stage count of 1 means that there is no interleaving between
1157 iterations, let the scheduling passes do the job. */
1158 if (stage_count < 1
1159 || (count_init && (loop_count <= stage_count))
1160 || (flag_branch_probabilities && (trip_count <= stage_count)))
1162 if (dump_file)
1164 fprintf (dump_file, "SMS failed... \n");
1165 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1166 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1167 fprintf (dump_file, ", trip-count=");
1168 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1169 fprintf (dump_file, ")\n");
1171 continue;
1173 else
1175 struct undo_replace_buff_elem *reg_move_replaces;
1177 if (dump_file)
1179 fprintf (dump_file,
1180 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1181 stage_count);
1182 print_partial_schedule (ps, dump_file);
1183 fprintf (dump_file,
1184 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1185 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1188 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1189 the closing_branch was scheduled and should appear in the last (ii-1)
1190 row. Otherwise, we are free to schedule the branch, and we let nodes
1191 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1192 row; this should reduce stage_count to minimum.
1193 TODO: Revisit the issue of scheduling the insns of the
1194 control part relative to the branch when the control part
1195 has more than one insn. */
1196 normalize_sched_times (ps);
1197 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1198 set_columns_for_ps (ps);
1200 canon_loop (loop);
1202 /* case the BCT count is not known , Do loop-versioning */
1203 if (count_reg && ! count_init)
1205 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1206 GEN_INT(stage_count));
1207 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1208 * REG_BR_PROB_BASE) / 100;
1210 loop_version (loop, comp_rtx, &condition_bb,
1211 prob, prob, REG_BR_PROB_BASE - prob,
1212 true);
1215 /* Set new iteration count of loop kernel. */
1216 if (count_reg && count_init)
1217 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1218 - stage_count + 1);
1220 /* Now apply the scheduled kernel to the RTL of the loop. */
1221 permute_partial_schedule (ps, g->closing_branch->first_note);
1223 /* Mark this loop as software pipelined so the later
1224 scheduling passes doesn't touch it. */
1225 if (! flag_resched_modulo_sched)
1226 g->bb->flags |= BB_DISABLE_SCHEDULE;
1227 /* The life-info is not valid any more. */
1228 df_set_bb_dirty (g->bb);
1230 reg_move_replaces = generate_reg_moves (ps, true);
1231 if (dump_file)
1232 print_node_sched_params (dump_file, g->num_nodes, g);
1233 /* Generate prolog and epilog. */
1234 generate_prolog_epilog (ps, loop, count_reg, count_init);
1236 free_undo_replace_buff (reg_move_replaces);
1239 free_partial_schedule (ps);
1240 free (node_sched_params);
1241 free (node_order);
1242 free_ddg (g);
1245 regstat_free_calls_crossed ();
1246 free (g_arr);
1248 /* Release scheduler data, needed until now because of DFA. */
1249 sched_finish ();
1250 loop_optimizer_finalize ();
1253 /* The SMS scheduling algorithm itself
1254 -----------------------------------
1255 Input: 'O' an ordered list of insns of a loop.
1256 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1258 'Q' is the empty Set
1259 'PS' is the partial schedule; it holds the currently scheduled nodes with
1260 their cycle/slot.
1261 'PSP' previously scheduled predecessors.
1262 'PSS' previously scheduled successors.
1263 't(u)' the cycle where u is scheduled.
1264 'l(u)' is the latency of u.
1265 'd(v,u)' is the dependence distance from v to u.
1266 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1267 the node ordering phase.
1268 'check_hardware_resources_conflicts(u, PS, c)'
1269 run a trace around cycle/slot through DFA model
1270 to check resource conflicts involving instruction u
1271 at cycle c given the partial schedule PS.
1272 'add_to_partial_schedule_at_time(u, PS, c)'
1273 Add the node/instruction u to the partial schedule
1274 PS at time c.
1275 'calculate_register_pressure(PS)'
1276 Given a schedule of instructions, calculate the register
1277 pressure it implies. One implementation could be the
1278 maximum number of overlapping live ranges.
1279 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1280 registers available in the hardware.
1282 1. II = MII.
1283 2. PS = empty list
1284 3. for each node u in O in pre-computed order
1285 4. if (PSP(u) != Q && PSS(u) == Q) then
1286 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1287 6. start = Early_start; end = Early_start + II - 1; step = 1
1288 11. else if (PSP(u) == Q && PSS(u) != Q) then
1289 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1290 13. start = Late_start; end = Late_start - II + 1; step = -1
1291 14. else if (PSP(u) != Q && PSS(u) != Q) then
1292 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1293 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1294 17. start = Early_start;
1295 18. end = min(Early_start + II - 1 , Late_start);
1296 19. step = 1
1297 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1298 21. start = ASAP(u); end = start + II - 1; step = 1
1299 22. endif
1301 23. success = false
1302 24. for (c = start ; c != end ; c += step)
1303 25. if check_hardware_resources_conflicts(u, PS, c) then
1304 26. add_to_partial_schedule_at_time(u, PS, c)
1305 27. success = true
1306 28. break
1307 29. endif
1308 30. endfor
1309 31. if (success == false) then
1310 32. II = II + 1
1311 33. if (II > maxII) then
1312 34. finish - failed to schedule
1313 35. endif
1314 36. goto 2.
1315 37. endif
1316 38. endfor
1317 39. if (calculate_register_pressure(PS) > maxRP) then
1318 40. goto 32.
1319 41. endif
1320 42. compute epilogue & prologue
1321 43. finish - succeeded to schedule
1324 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1325 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1326 set to 0 to save compile time. */
1327 #define DFA_HISTORY SMS_DFA_HISTORY
1329 /* A threshold for the number of repeated unsuccessful attempts to insert
1330 an empty row, before we flush the partial schedule and start over. */
1331 #define MAX_SPLIT_NUM 10
1332 /* Given the partial schedule PS, this function calculates and returns the
1333 cycles in which we can schedule the node with the given index I.
1334 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1335 noticed that there are several cases in which we fail to SMS the loop
1336 because the sched window of a node is empty due to tight data-deps. In
1337 such cases we want to unschedule some of the predecessors/successors
1338 until we get non-empty scheduling window. It returns -1 if the
1339 scheduling window is empty and zero otherwise. */
1341 static int
1342 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1343 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1345 int start, step, end;
1346 ddg_edge_ptr e;
1347 int u = nodes_order [i];
1348 ddg_node_ptr u_node = &ps->g->nodes[u];
1349 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1350 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1351 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1352 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1353 int psp_not_empty;
1354 int pss_not_empty;
1356 /* 1. compute sched window for u (start, end, step). */
1357 sbitmap_zero (psp);
1358 sbitmap_zero (pss);
1359 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1360 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1362 if (psp_not_empty && !pss_not_empty)
1364 int early_start = INT_MIN;
1366 end = INT_MAX;
1367 for (e = u_node->in; e != 0; e = e->next_in)
1369 ddg_node_ptr v_node = e->src;
1371 if (dump_file)
1373 fprintf (dump_file, "\nProcessing edge: ");
1374 print_ddg_edge (dump_file, e);
1375 fprintf (dump_file,
1376 "\nScheduling %d (%d) in psp_not_empty,"
1377 " checking p %d (%d): ", u_node->cuid,
1378 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1379 (v_node->insn));
1382 if (TEST_BIT (sched_nodes, v_node->cuid))
1384 int p_st = SCHED_TIME (v_node);
1386 early_start =
1387 MAX (early_start, p_st + e->latency - (e->distance * ii));
1389 if (dump_file)
1390 fprintf (dump_file,
1391 "pred st = %d; early_start = %d; latency: %d",
1392 p_st, early_start, e->latency);
1394 if (e->data_type == MEM_DEP)
1395 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1397 else if (dump_file)
1398 fprintf (dump_file, "the node is not scheduled\n");
1400 start = early_start;
1401 end = MIN (end, early_start + ii);
1402 /* Schedule the node close to it's predecessors. */
1403 step = 1;
1405 if (dump_file)
1406 fprintf (dump_file,
1407 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1408 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1411 else if (!psp_not_empty && pss_not_empty)
1413 int late_start = INT_MAX;
1415 end = INT_MIN;
1416 for (e = u_node->out; e != 0; e = e->next_out)
1418 ddg_node_ptr v_node = e->dest;
1420 if (dump_file)
1422 fprintf (dump_file, "\nProcessing edge:");
1423 print_ddg_edge (dump_file, e);
1424 fprintf (dump_file,
1425 "\nScheduling %d (%d) in pss_not_empty,"
1426 " checking s %d (%d): ", u_node->cuid,
1427 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1428 (v_node->insn));
1431 if (TEST_BIT (sched_nodes, v_node->cuid))
1433 int s_st = SCHED_TIME (v_node);
1435 late_start = MIN (late_start,
1436 s_st - e->latency + (e->distance * ii));
1438 if (dump_file)
1439 fprintf (dump_file,
1440 "succ st = %d; late_start = %d; latency = %d",
1441 s_st, late_start, e->latency);
1443 if (e->data_type == MEM_DEP)
1444 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1445 if (dump_file)
1446 fprintf (dump_file, "end = %d\n", end);
1449 else if (dump_file)
1450 fprintf (dump_file, "the node is not scheduled\n");
1453 start = late_start;
1454 end = MAX (end, late_start - ii);
1455 /* Schedule the node close to it's successors. */
1456 step = -1;
1458 if (dump_file)
1459 fprintf (dump_file,
1460 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1461 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1465 else if (psp_not_empty && pss_not_empty)
1467 int early_start = INT_MIN;
1468 int late_start = INT_MAX;
1469 int count_preds = 0;
1470 int count_succs = 0;
1472 start = INT_MIN;
1473 end = INT_MAX;
1474 for (e = u_node->in; e != 0; e = e->next_in)
1476 ddg_node_ptr v_node = e->src;
1478 if (dump_file)
1480 fprintf (dump_file, "\nProcessing edge:");
1481 print_ddg_edge (dump_file, e);
1482 fprintf (dump_file,
1483 "\nScheduling %d (%d) in psp_pss_not_empty,"
1484 " checking p %d (%d): ", u_node->cuid, INSN_UID
1485 (u_node->insn), v_node->cuid, INSN_UID
1486 (v_node->insn));
1489 if (TEST_BIT (sched_nodes, v_node->cuid))
1491 int p_st = SCHED_TIME (v_node);
1493 early_start = MAX (early_start,
1494 p_st + e->latency
1495 - (e->distance * ii));
1497 if (dump_file)
1498 fprintf (dump_file,
1499 "pred st = %d; early_start = %d; latency = %d",
1500 p_st, early_start, e->latency);
1502 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1503 count_preds++;
1505 if (e->data_type == MEM_DEP)
1506 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1508 else if (dump_file)
1509 fprintf (dump_file, "the node is not scheduled\n");
1512 for (e = u_node->out; e != 0; e = e->next_out)
1514 ddg_node_ptr v_node = e->dest;
1516 if (dump_file)
1518 fprintf (dump_file, "\nProcessing edge:");
1519 print_ddg_edge (dump_file, e);
1520 fprintf (dump_file,
1521 "\nScheduling %d (%d) in psp_pss_not_empty,"
1522 " checking s %d (%d): ", u_node->cuid, INSN_UID
1523 (u_node->insn), v_node->cuid, INSN_UID
1524 (v_node->insn));
1527 if (TEST_BIT (sched_nodes, v_node->cuid))
1529 int s_st = SCHED_TIME (v_node);
1531 late_start = MIN (late_start,
1532 s_st - e->latency
1533 + (e->distance * ii));
1535 if (dump_file)
1536 fprintf (dump_file,
1537 "succ st = %d; late_start = %d; latency = %d",
1538 s_st, late_start, e->latency);
1540 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1541 count_succs++;
1543 if (e->data_type == MEM_DEP)
1544 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1546 else if (dump_file)
1547 fprintf (dump_file, "the node is not scheduled\n");
1550 start = MAX (start, early_start);
1551 end = MIN (end, MIN (early_start + ii, late_start + 1));
1552 step = 1;
1553 /* If there are more successors than predecessors schedule the
1554 node close to it's successors. */
1555 if (count_succs >= count_preds)
1557 int old_start = start;
1559 start = end - 1;
1560 end = old_start - 1;
1561 step = -1;
1564 else /* psp is empty && pss is empty. */
1566 start = SCHED_ASAP (u_node);
1567 end = start + ii;
1568 step = 1;
1571 *start_p = start;
1572 *step_p = step;
1573 *end_p = end;
1574 sbitmap_free (psp);
1575 sbitmap_free (pss);
1577 if ((start >= end && step == 1) || (start <= end && step == -1))
1579 if (dump_file)
1580 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1581 start, end, step);
1582 return -1;
1585 return 0;
1588 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1589 node currently been scheduled. At the end of the calculation
1590 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1591 U_NODE which are (1) already scheduled in the first/last row of
1592 U_NODE's scheduling window, (2) whose dependence inequality with U
1593 becomes an equality when U is scheduled in this same row, and (3)
1594 whose dependence latency is zero.
1596 The first and last rows are calculated using the following parameters:
1597 START/END rows - The cycles that begins/ends the traversal on the window;
1598 searching for an empty cycle to schedule U_NODE.
1599 STEP - The direction in which we traverse the window.
1600 II - The initiation interval. */
1602 static void
1603 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1604 int step, int ii, sbitmap sched_nodes,
1605 sbitmap must_precede, sbitmap must_follow)
1607 ddg_edge_ptr e;
1608 int first_cycle_in_window, last_cycle_in_window;
1610 gcc_assert (must_precede && must_follow);
1612 /* Consider the following scheduling window:
1613 {first_cycle_in_window, first_cycle_in_window+1, ...,
1614 last_cycle_in_window}. If step is 1 then the following will be
1615 the order we traverse the window: {start=first_cycle_in_window,
1616 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1617 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1618 end=first_cycle_in_window-1} if step is -1. */
1619 first_cycle_in_window = (step == 1) ? start : end - step;
1620 last_cycle_in_window = (step == 1) ? end - step : start;
1622 sbitmap_zero (must_precede);
1623 sbitmap_zero (must_follow);
1625 if (dump_file)
1626 fprintf (dump_file, "\nmust_precede: ");
1628 /* Instead of checking if:
1629 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1630 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1631 first_cycle_in_window)
1632 && e->latency == 0
1633 we use the fact that latency is non-negative:
1634 SCHED_TIME (e->src) - (e->distance * ii) <=
1635 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1636 first_cycle_in_window
1637 and check only if
1638 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1639 for (e = u_node->in; e != 0; e = e->next_in)
1640 if (TEST_BIT (sched_nodes, e->src->cuid)
1641 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1642 first_cycle_in_window))
1644 if (dump_file)
1645 fprintf (dump_file, "%d ", e->src->cuid);
1647 SET_BIT (must_precede, e->src->cuid);
1650 if (dump_file)
1651 fprintf (dump_file, "\nmust_follow: ");
1653 /* Instead of checking if:
1654 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1655 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1656 last_cycle_in_window)
1657 && e->latency == 0
1658 we use the fact that latency is non-negative:
1659 SCHED_TIME (e->dest) + (e->distance * ii) >=
1660 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1661 last_cycle_in_window
1662 and check only if
1663 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1664 for (e = u_node->out; e != 0; e = e->next_out)
1665 if (TEST_BIT (sched_nodes, e->dest->cuid)
1666 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1667 last_cycle_in_window))
1669 if (dump_file)
1670 fprintf (dump_file, "%d ", e->dest->cuid);
1672 SET_BIT (must_follow, e->dest->cuid);
1675 if (dump_file)
1676 fprintf (dump_file, "\n");
1679 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1680 parameters to decide if that's possible:
1681 PS - The partial schedule.
1682 U - The serial number of U_NODE.
1683 NUM_SPLITS - The number of row splits made so far.
1684 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1685 the first row of the scheduling window)
1686 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1687 last row of the scheduling window) */
1689 static bool
1690 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1691 int u, int cycle, sbitmap sched_nodes,
1692 int *num_splits, sbitmap must_precede,
1693 sbitmap must_follow)
1695 ps_insn_ptr psi;
1696 bool success = 0;
1698 verify_partial_schedule (ps, sched_nodes);
1699 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1700 must_precede, must_follow);
1701 if (psi)
1703 SCHED_TIME (u_node) = cycle;
1704 SET_BIT (sched_nodes, u);
1705 success = 1;
1706 *num_splits = 0;
1707 if (dump_file)
1708 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1712 return success;
1715 /* This function implements the scheduling algorithm for SMS according to the
1716 above algorithm. */
1717 static partial_schedule_ptr
1718 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1720 int ii = mii;
1721 int i, c, success, num_splits = 0;
1722 int flush_and_start_over = true;
1723 int num_nodes = g->num_nodes;
1724 int start, end, step; /* Place together into one struct? */
1725 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1726 sbitmap must_precede = sbitmap_alloc (num_nodes);
1727 sbitmap must_follow = sbitmap_alloc (num_nodes);
1728 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1730 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1732 sbitmap_ones (tobe_scheduled);
1733 sbitmap_zero (sched_nodes);
1735 while (flush_and_start_over && (ii < maxii))
1738 if (dump_file)
1739 fprintf (dump_file, "Starting with ii=%d\n", ii);
1740 flush_and_start_over = false;
1741 sbitmap_zero (sched_nodes);
1743 for (i = 0; i < num_nodes; i++)
1745 int u = nodes_order[i];
1746 ddg_node_ptr u_node = &ps->g->nodes[u];
1747 rtx insn = u_node->insn;
1749 if (!INSN_P (insn))
1751 RESET_BIT (tobe_scheduled, u);
1752 continue;
1755 if (JUMP_P (insn)) /* Closing branch handled later. */
1757 RESET_BIT (tobe_scheduled, u);
1758 continue;
1761 if (TEST_BIT (sched_nodes, u))
1762 continue;
1764 /* Try to get non-empty scheduling window. */
1765 success = 0;
1766 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1767 &step, &end) == 0)
1769 if (dump_file)
1770 fprintf (dump_file, "\nTrying to schedule node %d \
1771 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1772 (g->nodes[u].insn)), start, end, step);
1774 gcc_assert ((step > 0 && start < end)
1775 || (step < 0 && start > end));
1777 calculate_must_precede_follow (u_node, start, end, step, ii,
1778 sched_nodes, must_precede,
1779 must_follow);
1781 for (c = start; c != end; c += step)
1783 sbitmap tmp_precede = NULL;
1784 sbitmap tmp_follow = NULL;
1786 if (c == start)
1788 if (step == 1)
1789 tmp_precede = must_precede;
1790 else /* step == -1. */
1791 tmp_follow = must_follow;
1793 if (c == end - step)
1795 if (step == 1)
1796 tmp_follow = must_follow;
1797 else /* step == -1. */
1798 tmp_precede = must_precede;
1801 success =
1802 try_scheduling_node_in_cycle (ps, u_node, u, c,
1803 sched_nodes,
1804 &num_splits, tmp_precede,
1805 tmp_follow);
1806 if (success)
1807 break;
1810 verify_partial_schedule (ps, sched_nodes);
1812 if (!success)
1814 int split_row;
1816 if (ii++ == maxii)
1817 break;
1819 if (num_splits >= MAX_SPLIT_NUM)
1821 num_splits = 0;
1822 flush_and_start_over = true;
1823 verify_partial_schedule (ps, sched_nodes);
1824 reset_partial_schedule (ps, ii);
1825 verify_partial_schedule (ps, sched_nodes);
1826 break;
1829 num_splits++;
1830 if (step == 1)
1831 split_row = compute_split_row (sched_nodes, start, end,
1832 ps->ii, u_node);
1833 else
1834 split_row = compute_split_row (sched_nodes, end, start,
1835 ps->ii, u_node);
1837 ps_insert_empty_row (ps, split_row, sched_nodes);
1838 i--; /* Go back and retry node i. */
1840 if (dump_file)
1841 fprintf (dump_file, "num_splits=%d\n", num_splits);
1844 /* ??? If (success), check register pressure estimates. */
1845 } /* Continue with next node. */
1846 } /* While flush_and_start_over. */
1847 if (ii >= maxii)
1849 free_partial_schedule (ps);
1850 ps = NULL;
1852 else
1853 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1855 sbitmap_free (sched_nodes);
1856 sbitmap_free (must_precede);
1857 sbitmap_free (must_follow);
1858 sbitmap_free (tobe_scheduled);
1860 return ps;
1863 /* This function inserts a new empty row into PS at the position
1864 according to SPLITROW, keeping all already scheduled instructions
1865 intact and updating their SCHED_TIME and cycle accordingly. */
1866 static void
1867 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1868 sbitmap sched_nodes)
1870 ps_insn_ptr crr_insn;
1871 ps_insn_ptr *rows_new;
1872 int ii = ps->ii;
1873 int new_ii = ii + 1;
1874 int row;
1876 verify_partial_schedule (ps, sched_nodes);
1878 /* We normalize sched_time and rotate ps to have only non-negative sched
1879 times, for simplicity of updating cycles after inserting new row. */
1880 split_row -= ps->min_cycle;
1881 split_row = SMODULO (split_row, ii);
1882 if (dump_file)
1883 fprintf (dump_file, "split_row=%d\n", split_row);
1885 normalize_sched_times (ps);
1886 rotate_partial_schedule (ps, ps->min_cycle);
1888 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1889 for (row = 0; row < split_row; row++)
1891 rows_new[row] = ps->rows[row];
1892 ps->rows[row] = NULL;
1893 for (crr_insn = rows_new[row];
1894 crr_insn; crr_insn = crr_insn->next_in_row)
1896 ddg_node_ptr u = crr_insn->node;
1897 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1899 SCHED_TIME (u) = new_time;
1900 crr_insn->cycle = new_time;
1901 SCHED_ROW (u) = new_time % new_ii;
1902 SCHED_STAGE (u) = new_time / new_ii;
1907 rows_new[split_row] = NULL;
1909 for (row = split_row; row < ii; row++)
1911 rows_new[row + 1] = ps->rows[row];
1912 ps->rows[row] = NULL;
1913 for (crr_insn = rows_new[row + 1];
1914 crr_insn; crr_insn = crr_insn->next_in_row)
1916 ddg_node_ptr u = crr_insn->node;
1917 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1919 SCHED_TIME (u) = new_time;
1920 crr_insn->cycle = new_time;
1921 SCHED_ROW (u) = new_time % new_ii;
1922 SCHED_STAGE (u) = new_time / new_ii;
1926 /* Updating ps. */
1927 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1928 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1929 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1930 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1931 free (ps->rows);
1932 ps->rows = rows_new;
1933 ps->ii = new_ii;
1934 gcc_assert (ps->min_cycle >= 0);
1936 verify_partial_schedule (ps, sched_nodes);
1938 if (dump_file)
1939 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1940 ps->max_cycle);
1943 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1944 UP which are the boundaries of it's scheduling window; compute using
1945 SCHED_NODES and II a row in the partial schedule that can be split
1946 which will separate a critical predecessor from a critical successor
1947 thereby expanding the window, and return it. */
1948 static int
1949 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1950 ddg_node_ptr u_node)
1952 ddg_edge_ptr e;
1953 int lower = INT_MIN, upper = INT_MAX;
1954 ddg_node_ptr crit_pred = NULL;
1955 ddg_node_ptr crit_succ = NULL;
1956 int crit_cycle;
1958 for (e = u_node->in; e != 0; e = e->next_in)
1960 ddg_node_ptr v_node = e->src;
1962 if (TEST_BIT (sched_nodes, v_node->cuid)
1963 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1964 if (SCHED_TIME (v_node) > lower)
1966 crit_pred = v_node;
1967 lower = SCHED_TIME (v_node);
1971 if (crit_pred != NULL)
1973 crit_cycle = SCHED_TIME (crit_pred) + 1;
1974 return SMODULO (crit_cycle, ii);
1977 for (e = u_node->out; e != 0; e = e->next_out)
1979 ddg_node_ptr v_node = e->dest;
1980 if (TEST_BIT (sched_nodes, v_node->cuid)
1981 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1982 if (SCHED_TIME (v_node) < upper)
1984 crit_succ = v_node;
1985 upper = SCHED_TIME (v_node);
1989 if (crit_succ != NULL)
1991 crit_cycle = SCHED_TIME (crit_succ);
1992 return SMODULO (crit_cycle, ii);
1995 if (dump_file)
1996 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1998 return SMODULO ((low + up + 1) / 2, ii);
2001 static void
2002 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2004 int row;
2005 ps_insn_ptr crr_insn;
2007 for (row = 0; row < ps->ii; row++)
2008 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2010 ddg_node_ptr u = crr_insn->node;
2012 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2013 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2014 popcount (sched_nodes) == number of insns in ps. */
2015 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2016 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2021 /* This page implements the algorithm for ordering the nodes of a DDG
2022 for modulo scheduling, activated through the
2023 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2025 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2026 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2027 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2028 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2029 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2030 #define DEPTH(x) (ASAP ((x)))
2032 typedef struct node_order_params * nopa;
2034 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2035 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2036 static nopa calculate_order_params (ddg_ptr, int, int *);
2037 static int find_max_asap (ddg_ptr, sbitmap);
2038 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2039 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2041 enum sms_direction {BOTTOMUP, TOPDOWN};
2043 struct node_order_params
2045 int asap;
2046 int alap;
2047 int height;
2050 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2051 static void
2052 check_nodes_order (int *node_order, int num_nodes)
2054 int i;
2055 sbitmap tmp = sbitmap_alloc (num_nodes);
2057 sbitmap_zero (tmp);
2059 if (dump_file)
2060 fprintf (dump_file, "SMS final nodes order: \n");
2062 for (i = 0; i < num_nodes; i++)
2064 int u = node_order[i];
2066 if (dump_file)
2067 fprintf (dump_file, "%d ", u);
2068 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2070 SET_BIT (tmp, u);
2073 if (dump_file)
2074 fprintf (dump_file, "\n");
2076 sbitmap_free (tmp);
2079 /* Order the nodes of G for scheduling and pass the result in
2080 NODE_ORDER. Also set aux.count of each node to ASAP.
2081 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2082 static int
2083 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2085 int i;
2086 int rec_mii = 0;
2087 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2089 nopa nops = calculate_order_params (g, mii, pmax_asap);
2091 if (dump_file)
2092 print_sccs (dump_file, sccs, g);
2094 order_nodes_of_sccs (sccs, node_order);
2096 if (sccs->num_sccs > 0)
2097 /* First SCC has the largest recurrence_length. */
2098 rec_mii = sccs->sccs[0]->recurrence_length;
2100 /* Save ASAP before destroying node_order_params. */
2101 for (i = 0; i < g->num_nodes; i++)
2103 ddg_node_ptr v = &g->nodes[i];
2104 v->aux.count = ASAP (v);
2107 free (nops);
2108 free_ddg_all_sccs (sccs);
2109 check_nodes_order (node_order, g->num_nodes);
2111 return rec_mii;
2114 static void
2115 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2117 int i, pos = 0;
2118 ddg_ptr g = all_sccs->ddg;
2119 int num_nodes = g->num_nodes;
2120 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2121 sbitmap on_path = sbitmap_alloc (num_nodes);
2122 sbitmap tmp = sbitmap_alloc (num_nodes);
2123 sbitmap ones = sbitmap_alloc (num_nodes);
2125 sbitmap_zero (prev_sccs);
2126 sbitmap_ones (ones);
2128 /* Perform the node ordering starting from the SCC with the highest recMII.
2129 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2130 for (i = 0; i < all_sccs->num_sccs; i++)
2132 ddg_scc_ptr scc = all_sccs->sccs[i];
2134 /* Add nodes on paths from previous SCCs to the current SCC. */
2135 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2136 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2138 /* Add nodes on paths from the current SCC to previous SCCs. */
2139 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2140 sbitmap_a_or_b (tmp, tmp, on_path);
2142 /* Remove nodes of previous SCCs from current extended SCC. */
2143 sbitmap_difference (tmp, tmp, prev_sccs);
2145 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2146 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2149 /* Handle the remaining nodes that do not belong to any scc. Each call
2150 to order_nodes_in_scc handles a single connected component. */
2151 while (pos < g->num_nodes)
2153 sbitmap_difference (tmp, ones, prev_sccs);
2154 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2156 sbitmap_free (prev_sccs);
2157 sbitmap_free (on_path);
2158 sbitmap_free (tmp);
2159 sbitmap_free (ones);
2162 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2163 static struct node_order_params *
2164 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2166 int u;
2167 int max_asap;
2168 int num_nodes = g->num_nodes;
2169 ddg_edge_ptr e;
2170 /* Allocate a place to hold ordering params for each node in the DDG. */
2171 nopa node_order_params_arr;
2173 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2174 node_order_params_arr = (nopa) xcalloc (num_nodes,
2175 sizeof (struct node_order_params));
2177 /* Set the aux pointer of each node to point to its order_params structure. */
2178 for (u = 0; u < num_nodes; u++)
2179 g->nodes[u].aux.info = &node_order_params_arr[u];
2181 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2182 calculate ASAP, ALAP, mobility, distance, and height for each node
2183 in the dependence (direct acyclic) graph. */
2185 /* We assume that the nodes in the array are in topological order. */
2187 max_asap = 0;
2188 for (u = 0; u < num_nodes; u++)
2190 ddg_node_ptr u_node = &g->nodes[u];
2192 ASAP (u_node) = 0;
2193 for (e = u_node->in; e; e = e->next_in)
2194 if (e->distance == 0)
2195 ASAP (u_node) = MAX (ASAP (u_node),
2196 ASAP (e->src) + e->latency);
2197 max_asap = MAX (max_asap, ASAP (u_node));
2200 for (u = num_nodes - 1; u > -1; u--)
2202 ddg_node_ptr u_node = &g->nodes[u];
2204 ALAP (u_node) = max_asap;
2205 HEIGHT (u_node) = 0;
2206 for (e = u_node->out; e; e = e->next_out)
2207 if (e->distance == 0)
2209 ALAP (u_node) = MIN (ALAP (u_node),
2210 ALAP (e->dest) - e->latency);
2211 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2212 HEIGHT (e->dest) + e->latency);
2215 if (dump_file)
2217 fprintf (dump_file, "\nOrder params\n");
2218 for (u = 0; u < num_nodes; u++)
2220 ddg_node_ptr u_node = &g->nodes[u];
2222 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2223 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2227 *pmax_asap = max_asap;
2228 return node_order_params_arr;
2231 static int
2232 find_max_asap (ddg_ptr g, sbitmap nodes)
2234 unsigned int u = 0;
2235 int max_asap = -1;
2236 int result = -1;
2237 sbitmap_iterator sbi;
2239 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2241 ddg_node_ptr u_node = &g->nodes[u];
2243 if (max_asap < ASAP (u_node))
2245 max_asap = ASAP (u_node);
2246 result = u;
2249 return result;
2252 static int
2253 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2255 unsigned int u = 0;
2256 int max_hv = -1;
2257 int min_mob = INT_MAX;
2258 int result = -1;
2259 sbitmap_iterator sbi;
2261 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2263 ddg_node_ptr u_node = &g->nodes[u];
2265 if (max_hv < HEIGHT (u_node))
2267 max_hv = HEIGHT (u_node);
2268 min_mob = MOB (u_node);
2269 result = u;
2271 else if ((max_hv == HEIGHT (u_node))
2272 && (min_mob > MOB (u_node)))
2274 min_mob = MOB (u_node);
2275 result = u;
2278 return result;
2281 static int
2282 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2284 unsigned int u = 0;
2285 int max_dv = -1;
2286 int min_mob = INT_MAX;
2287 int result = -1;
2288 sbitmap_iterator sbi;
2290 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2292 ddg_node_ptr u_node = &g->nodes[u];
2294 if (max_dv < DEPTH (u_node))
2296 max_dv = DEPTH (u_node);
2297 min_mob = MOB (u_node);
2298 result = u;
2300 else if ((max_dv == DEPTH (u_node))
2301 && (min_mob > MOB (u_node)))
2303 min_mob = MOB (u_node);
2304 result = u;
2307 return result;
2310 /* Places the nodes of SCC into the NODE_ORDER array starting
2311 at position POS, according to the SMS ordering algorithm.
2312 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2313 the NODE_ORDER array, starting from position zero. */
2314 static int
2315 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2316 int * node_order, int pos)
2318 enum sms_direction dir;
2319 int num_nodes = g->num_nodes;
2320 sbitmap workset = sbitmap_alloc (num_nodes);
2321 sbitmap tmp = sbitmap_alloc (num_nodes);
2322 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2323 sbitmap predecessors = sbitmap_alloc (num_nodes);
2324 sbitmap successors = sbitmap_alloc (num_nodes);
2326 sbitmap_zero (predecessors);
2327 find_predecessors (predecessors, g, nodes_ordered);
2329 sbitmap_zero (successors);
2330 find_successors (successors, g, nodes_ordered);
2332 sbitmap_zero (tmp);
2333 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2335 sbitmap_copy (workset, tmp);
2336 dir = BOTTOMUP;
2338 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2340 sbitmap_copy (workset, tmp);
2341 dir = TOPDOWN;
2343 else
2345 int u;
2347 sbitmap_zero (workset);
2348 if ((u = find_max_asap (g, scc)) >= 0)
2349 SET_BIT (workset, u);
2350 dir = BOTTOMUP;
2353 sbitmap_zero (zero_bitmap);
2354 while (!sbitmap_equal (workset, zero_bitmap))
2356 int v;
2357 ddg_node_ptr v_node;
2358 sbitmap v_node_preds;
2359 sbitmap v_node_succs;
2361 if (dir == TOPDOWN)
2363 while (!sbitmap_equal (workset, zero_bitmap))
2365 v = find_max_hv_min_mob (g, workset);
2366 v_node = &g->nodes[v];
2367 node_order[pos++] = v;
2368 v_node_succs = NODE_SUCCESSORS (v_node);
2369 sbitmap_a_and_b (tmp, v_node_succs, scc);
2371 /* Don't consider the already ordered successors again. */
2372 sbitmap_difference (tmp, tmp, nodes_ordered);
2373 sbitmap_a_or_b (workset, workset, tmp);
2374 RESET_BIT (workset, v);
2375 SET_BIT (nodes_ordered, v);
2377 dir = BOTTOMUP;
2378 sbitmap_zero (predecessors);
2379 find_predecessors (predecessors, g, nodes_ordered);
2380 sbitmap_a_and_b (workset, predecessors, scc);
2382 else
2384 while (!sbitmap_equal (workset, zero_bitmap))
2386 v = find_max_dv_min_mob (g, workset);
2387 v_node = &g->nodes[v];
2388 node_order[pos++] = v;
2389 v_node_preds = NODE_PREDECESSORS (v_node);
2390 sbitmap_a_and_b (tmp, v_node_preds, scc);
2392 /* Don't consider the already ordered predecessors again. */
2393 sbitmap_difference (tmp, tmp, nodes_ordered);
2394 sbitmap_a_or_b (workset, workset, tmp);
2395 RESET_BIT (workset, v);
2396 SET_BIT (nodes_ordered, v);
2398 dir = TOPDOWN;
2399 sbitmap_zero (successors);
2400 find_successors (successors, g, nodes_ordered);
2401 sbitmap_a_and_b (workset, successors, scc);
2404 sbitmap_free (tmp);
2405 sbitmap_free (workset);
2406 sbitmap_free (zero_bitmap);
2407 sbitmap_free (predecessors);
2408 sbitmap_free (successors);
2409 return pos;
2413 /* This page contains functions for manipulating partial-schedules during
2414 modulo scheduling. */
2416 /* Create a partial schedule and allocate a memory to hold II rows. */
2418 static partial_schedule_ptr
2419 create_partial_schedule (int ii, ddg_ptr g, int history)
2421 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2422 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2423 ps->ii = ii;
2424 ps->history = history;
2425 ps->min_cycle = INT_MAX;
2426 ps->max_cycle = INT_MIN;
2427 ps->g = g;
2429 return ps;
2432 /* Free the PS_INSNs in rows array of the given partial schedule.
2433 ??? Consider caching the PS_INSN's. */
2434 static void
2435 free_ps_insns (partial_schedule_ptr ps)
2437 int i;
2439 for (i = 0; i < ps->ii; i++)
2441 while (ps->rows[i])
2443 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2445 free (ps->rows[i]);
2446 ps->rows[i] = ps_insn;
2448 ps->rows[i] = NULL;
2452 /* Free all the memory allocated to the partial schedule. */
2454 static void
2455 free_partial_schedule (partial_schedule_ptr ps)
2457 if (!ps)
2458 return;
2459 free_ps_insns (ps);
2460 free (ps->rows);
2461 free (ps);
2464 /* Clear the rows array with its PS_INSNs, and create a new one with
2465 NEW_II rows. */
2467 static void
2468 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2470 if (!ps)
2471 return;
2472 free_ps_insns (ps);
2473 if (new_ii == ps->ii)
2474 return;
2475 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2476 * sizeof (ps_insn_ptr));
2477 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2478 ps->ii = new_ii;
2479 ps->min_cycle = INT_MAX;
2480 ps->max_cycle = INT_MIN;
2483 /* Prints the partial schedule as an ii rows array, for each rows
2484 print the ids of the insns in it. */
2485 void
2486 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2488 int i;
2490 for (i = 0; i < ps->ii; i++)
2492 ps_insn_ptr ps_i = ps->rows[i];
2494 fprintf (dump, "\n[ROW %d ]: ", i);
2495 while (ps_i)
2497 fprintf (dump, "%d, ",
2498 INSN_UID (ps_i->node->insn));
2499 ps_i = ps_i->next_in_row;
2504 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2505 static ps_insn_ptr
2506 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2508 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2510 ps_i->node = node;
2511 ps_i->next_in_row = NULL;
2512 ps_i->prev_in_row = NULL;
2513 ps_i->row_rest_count = rest_count;
2514 ps_i->cycle = cycle;
2516 return ps_i;
2520 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2521 node is not found in the partial schedule, else returns true. */
2522 static bool
2523 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2525 int row;
2527 if (!ps || !ps_i)
2528 return false;
2530 row = SMODULO (ps_i->cycle, ps->ii);
2531 if (! ps_i->prev_in_row)
2533 if (ps_i != ps->rows[row])
2534 return false;
2536 ps->rows[row] = ps_i->next_in_row;
2537 if (ps->rows[row])
2538 ps->rows[row]->prev_in_row = NULL;
2540 else
2542 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2543 if (ps_i->next_in_row)
2544 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2546 free (ps_i);
2547 return true;
2550 /* Unlike what literature describes for modulo scheduling (which focuses
2551 on VLIW machines) the order of the instructions inside a cycle is
2552 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2553 where the current instruction should go relative to the already
2554 scheduled instructions in the given cycle. Go over these
2555 instructions and find the first possible column to put it in. */
2556 static bool
2557 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2558 sbitmap must_precede, sbitmap must_follow)
2560 ps_insn_ptr next_ps_i;
2561 ps_insn_ptr first_must_follow = NULL;
2562 ps_insn_ptr last_must_precede = NULL;
2563 int row;
2565 if (! ps_i)
2566 return false;
2568 row = SMODULO (ps_i->cycle, ps->ii);
2570 /* Find the first must follow and the last must precede
2571 and insert the node immediately after the must precede
2572 but make sure that it there is no must follow after it. */
2573 for (next_ps_i = ps->rows[row];
2574 next_ps_i;
2575 next_ps_i = next_ps_i->next_in_row)
2577 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2578 && ! first_must_follow)
2579 first_must_follow = next_ps_i;
2580 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2582 /* If we have already met a node that must follow, then
2583 there is no possible column. */
2584 if (first_must_follow)
2585 return false;
2586 else
2587 last_must_precede = next_ps_i;
2591 /* Now insert the node after INSERT_AFTER_PSI. */
2593 if (! last_must_precede)
2595 ps_i->next_in_row = ps->rows[row];
2596 ps_i->prev_in_row = NULL;
2597 if (ps_i->next_in_row)
2598 ps_i->next_in_row->prev_in_row = ps_i;
2599 ps->rows[row] = ps_i;
2601 else
2603 ps_i->next_in_row = last_must_precede->next_in_row;
2604 last_must_precede->next_in_row = ps_i;
2605 ps_i->prev_in_row = last_must_precede;
2606 if (ps_i->next_in_row)
2607 ps_i->next_in_row->prev_in_row = ps_i;
2610 return true;
2613 /* Advances the PS_INSN one column in its current row; returns false
2614 in failure and true in success. Bit N is set in MUST_FOLLOW if
2615 the node with cuid N must be come after the node pointed to by
2616 PS_I when scheduled in the same cycle. */
2617 static int
2618 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2619 sbitmap must_follow)
2621 ps_insn_ptr prev, next;
2622 int row;
2623 ddg_node_ptr next_node;
2625 if (!ps || !ps_i)
2626 return false;
2628 row = SMODULO (ps_i->cycle, ps->ii);
2630 if (! ps_i->next_in_row)
2631 return false;
2633 next_node = ps_i->next_in_row->node;
2635 /* Check if next_in_row is dependent on ps_i, both having same sched
2636 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2637 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2638 return false;
2640 /* Advance PS_I over its next_in_row in the doubly linked list. */
2641 prev = ps_i->prev_in_row;
2642 next = ps_i->next_in_row;
2644 if (ps_i == ps->rows[row])
2645 ps->rows[row] = next;
2647 ps_i->next_in_row = next->next_in_row;
2649 if (next->next_in_row)
2650 next->next_in_row->prev_in_row = ps_i;
2652 next->next_in_row = ps_i;
2653 ps_i->prev_in_row = next;
2655 next->prev_in_row = prev;
2656 if (prev)
2657 prev->next_in_row = next;
2659 return true;
2662 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2663 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2664 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2665 before/after (respectively) the node pointed to by PS_I when scheduled
2666 in the same cycle. */
2667 static ps_insn_ptr
2668 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2669 sbitmap must_precede, sbitmap must_follow)
2671 ps_insn_ptr ps_i;
2672 int rest_count = 1;
2673 int row = SMODULO (cycle, ps->ii);
2675 if (ps->rows[row]
2676 && ps->rows[row]->row_rest_count >= issue_rate)
2677 return NULL;
2679 if (ps->rows[row])
2680 rest_count += ps->rows[row]->row_rest_count;
2682 ps_i = create_ps_insn (node, rest_count, cycle);
2684 /* Finds and inserts PS_I according to MUST_FOLLOW and
2685 MUST_PRECEDE. */
2686 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2688 free (ps_i);
2689 return NULL;
2692 return ps_i;
2695 /* Advance time one cycle. Assumes DFA is being used. */
2696 static void
2697 advance_one_cycle (void)
2699 if (targetm.sched.dfa_pre_cycle_insn)
2700 state_transition (curr_state,
2701 targetm.sched.dfa_pre_cycle_insn ());
2703 state_transition (curr_state, NULL);
2705 if (targetm.sched.dfa_post_cycle_insn)
2706 state_transition (curr_state,
2707 targetm.sched.dfa_post_cycle_insn ());
2712 /* Checks if PS has resource conflicts according to DFA, starting from
2713 FROM cycle to TO cycle; returns true if there are conflicts and false
2714 if there are no conflicts. Assumes DFA is being used. */
2715 static int
2716 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2718 int cycle;
2720 state_reset (curr_state);
2722 for (cycle = from; cycle <= to; cycle++)
2724 ps_insn_ptr crr_insn;
2725 /* Holds the remaining issue slots in the current row. */
2726 int can_issue_more = issue_rate;
2728 /* Walk through the DFA for the current row. */
2729 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2730 crr_insn;
2731 crr_insn = crr_insn->next_in_row)
2733 rtx insn = crr_insn->node->insn;
2735 if (!INSN_P (insn))
2736 continue;
2738 /* Check if there is room for the current insn. */
2739 if (!can_issue_more || state_dead_lock_p (curr_state))
2740 return true;
2742 /* Update the DFA state and return with failure if the DFA found
2743 resource conflicts. */
2744 if (state_transition (curr_state, insn) >= 0)
2745 return true;
2747 if (targetm.sched.variable_issue)
2748 can_issue_more =
2749 targetm.sched.variable_issue (sched_dump, sched_verbose,
2750 insn, can_issue_more);
2751 /* A naked CLOBBER or USE generates no instruction, so don't
2752 let them consume issue slots. */
2753 else if (GET_CODE (PATTERN (insn)) != USE
2754 && GET_CODE (PATTERN (insn)) != CLOBBER)
2755 can_issue_more--;
2758 /* Advance the DFA to the next cycle. */
2759 advance_one_cycle ();
2761 return false;
2764 /* Checks if the given node causes resource conflicts when added to PS at
2765 cycle C. If not the node is added to PS and returned; otherwise zero
2766 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2767 cuid N must be come before/after (respectively) the node pointed to by
2768 PS_I when scheduled in the same cycle. */
2769 ps_insn_ptr
2770 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2771 int c, sbitmap must_precede,
2772 sbitmap must_follow)
2774 int has_conflicts = 0;
2775 ps_insn_ptr ps_i;
2777 /* First add the node to the PS, if this succeeds check for
2778 conflicts, trying different issue slots in the same row. */
2779 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2780 return NULL; /* Failed to insert the node at the given cycle. */
2782 has_conflicts = ps_has_conflicts (ps, c, c)
2783 || (ps->history > 0
2784 && ps_has_conflicts (ps,
2785 c - ps->history,
2786 c + ps->history));
2788 /* Try different issue slots to find one that the given node can be
2789 scheduled in without conflicts. */
2790 while (has_conflicts)
2792 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2793 break;
2794 has_conflicts = ps_has_conflicts (ps, c, c)
2795 || (ps->history > 0
2796 && ps_has_conflicts (ps,
2797 c - ps->history,
2798 c + ps->history));
2801 if (has_conflicts)
2803 remove_node_from_ps (ps, ps_i);
2804 return NULL;
2807 ps->min_cycle = MIN (ps->min_cycle, c);
2808 ps->max_cycle = MAX (ps->max_cycle, c);
2809 return ps_i;
2812 /* Rotate the rows of PS such that insns scheduled at time
2813 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2814 void
2815 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2817 int i, row, backward_rotates;
2818 int last_row = ps->ii - 1;
2820 if (start_cycle == 0)
2821 return;
2823 backward_rotates = SMODULO (start_cycle, ps->ii);
2825 /* Revisit later and optimize this into a single loop. */
2826 for (i = 0; i < backward_rotates; i++)
2828 ps_insn_ptr first_row = ps->rows[0];
2830 for (row = 0; row < last_row; row++)
2831 ps->rows[row] = ps->rows[row+1];
2833 ps->rows[last_row] = first_row;
2836 ps->max_cycle -= start_cycle;
2837 ps->min_cycle -= start_cycle;
2840 #endif /* INSN_SCHEDULING */
2842 static bool
2843 gate_handle_sms (void)
2845 return (optimize > 0 && flag_modulo_sched);
2849 /* Run instruction scheduler. */
2850 /* Perform SMS module scheduling. */
2851 static unsigned int
2852 rest_of_handle_sms (void)
2854 #ifdef INSN_SCHEDULING
2855 basic_block bb;
2857 /* Collect loop information to be used in SMS. */
2858 cfg_layout_initialize (0);
2859 sms_schedule ();
2861 /* Update the life information, because we add pseudos. */
2862 max_regno = max_reg_num ();
2864 /* Finalize layout changes. */
2865 FOR_EACH_BB (bb)
2866 if (bb->next_bb != EXIT_BLOCK_PTR)
2867 bb->aux = bb->next_bb;
2868 free_dominance_info (CDI_DOMINATORS);
2869 cfg_layout_finalize ();
2870 #endif /* INSN_SCHEDULING */
2871 return 0;
2874 struct rtl_opt_pass pass_sms =
2877 RTL_PASS,
2878 "sms", /* name */
2879 gate_handle_sms, /* gate */
2880 rest_of_handle_sms, /* execute */
2881 NULL, /* sub */
2882 NULL, /* next */
2883 0, /* static_pass_number */
2884 TV_SMS, /* tv_id */
2885 0, /* properties_required */
2886 0, /* properties_provided */
2887 0, /* properties_destroyed */
2888 TODO_dump_func, /* todo_flags_start */
2889 TODO_df_finish | TODO_verify_rtl_sharing |
2890 TODO_dump_func |
2891 TODO_ggc_collect /* todo_flags_finish */