2008-01-26 Jerry DeLisle <jvdelisle@gcc.gnu.org>
[official-gcc.git] / gcc / modulo-sched.c
bloba203664526a14de8b7072ca34754996352e7a8c3
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate;
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
247 static char tmp[80];
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
261 static struct sched_info sms_sched_info =
263 NULL,
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 sms_print_insn,
269 NULL,
270 compute_jump_reg_dependencies,
271 NULL, NULL,
272 NULL, NULL,
273 0, 0, 0,
275 NULL, NULL, NULL, NULL, NULL,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
284 static rtx
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
287 #ifdef HAVE_doloop_end
288 rtx reg, condition, insn, first_insn_not_to_check;
290 if (!JUMP_P (tail))
291 return NULL_RTX;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition = doloop_condition_get (tail);
295 if (! condition)
296 return NULL_RTX;
298 if (REG_P (XEXP (condition, 0)))
299 reg = XEXP (condition, 0);
300 else if (GET_CODE (XEXP (condition, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition, 0), 0)))
302 reg = XEXP (XEXP (condition, 0), 0);
303 else
304 gcc_unreachable ();
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
311 : PREV_INSN (tail));
313 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
314 if (reg_mentioned_p (reg, insn))
316 if (dump_file)
318 fprintf (dump_file, "SMS count_reg found ");
319 print_rtl_single (dump_file, reg);
320 fprintf (dump_file, " outside control in insn:\n");
321 print_rtl_single (dump_file, insn);
324 return NULL_RTX;
327 return reg;
328 #else
329 return NULL_RTX;
330 #endif
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
337 static rtx
338 const_iteration_count (rtx count_reg, basic_block pre_header,
339 HOST_WIDEST_INT * count)
341 rtx insn;
342 rtx head, tail;
344 if (! pre_header)
345 return NULL_RTX;
347 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
349 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
350 if (INSN_P (insn) && single_set (insn) &&
351 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
353 rtx pat = single_set (insn);
355 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
357 *count = INTVAL (SET_SRC (pat));
358 return insn;
361 return NULL_RTX;
364 return NULL_RTX;
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
370 static int
371 res_MII (ddg_ptr g)
373 if (targetm.sched.sms_res_mii)
374 return targetm.sched.sms_res_mii (g);
376 return (g->num_nodes / issue_rate);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
386 static void
387 set_node_sched_params (ddg_ptr g)
389 int i;
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params = (node_sched_params_ptr)
394 xcalloc (g->num_nodes,
395 sizeof (struct node_sched_params));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i = 0; i < g->num_nodes; i++)
401 /* Watch out for aliasing problems? */
402 node_sched_params[i].asap = g->nodes[i].aux.count;
403 g->nodes[i].aux.info = &node_sched_params[i];
407 static void
408 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
410 int i;
412 if (! file)
413 return;
414 for (i = 0; i < num_nodes; i++)
416 node_sched_params_ptr nsp = &node_sched_params[i];
417 rtx reg_move = nsp->first_reg_move;
418 int j;
420 fprintf (file, "Node = %d; INSN = %d\n", i,
421 (INSN_UID (g->nodes[i].insn)));
422 fprintf (file, " asap = %d:\n", nsp->asap);
423 fprintf (file, " time = %d:\n", nsp->time);
424 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
425 for (j = 0; j < nsp->nreg_moves; j++)
427 fprintf (file, " reg_move = ");
428 print_rtl_single (file, reg_move);
429 reg_move = PREV_INSN (reg_move);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
443 ii { 1 if not.
445 static struct undo_replace_buff_elem *
446 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
448 ddg_ptr g = ps->g;
449 int ii = ps->ii;
450 int i;
451 struct undo_replace_buff_elem *reg_move_replaces = NULL;
453 for (i = 0; i < g->num_nodes; i++)
455 ddg_node_ptr u = &g->nodes[i];
456 ddg_edge_ptr e;
457 int nreg_moves = 0, i_reg_move;
458 sbitmap *uses_of_defs;
459 rtx last_reg_move;
460 rtx prev_reg, old_reg;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e = u->out; e; e = e->next_out)
465 if (e->type == TRUE_DEP && e->dest != e->src)
467 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
469 if (e->distance == 1)
470 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
475 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
476 nreg_moves4e--;
478 nreg_moves = MAX (nreg_moves, nreg_moves4e);
481 if (nreg_moves == 0)
482 continue;
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
487 register. */
488 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
489 sbitmap_vector_zero (uses_of_defs, nreg_moves);
490 for (e = u->out; e; e = e->next_out)
491 if (e->type == TRUE_DEP && e->dest != e->src)
493 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
495 if (e->distance == 1)
496 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 /* Insert the reg-moves right before the notes which precede
510 the insn they relates to. */
511 last_reg_move = u->first_note;
513 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
515 unsigned int i_use = 0;
516 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
517 rtx reg_move = gen_move_insn (new_reg, prev_reg);
518 sbitmap_iterator sbi;
520 add_insn_before (reg_move, last_reg_move, NULL);
521 last_reg_move = reg_move;
523 if (!SCHED_FIRST_REG_MOVE (u))
524 SCHED_FIRST_REG_MOVE (u) = reg_move;
526 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
528 struct undo_replace_buff_elem *rep;
530 rep = (struct undo_replace_buff_elem *)
531 xcalloc (1, sizeof (struct undo_replace_buff_elem));
532 rep->insn = g->nodes[i_use].insn;
533 rep->orig_reg = old_reg;
534 rep->new_reg = new_reg;
536 if (! reg_move_replaces)
537 reg_move_replaces = rep;
538 else
540 rep->next = reg_move_replaces;
541 reg_move_replaces = rep;
544 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
545 if (rescan)
546 df_insn_rescan (g->nodes[i_use].insn);
549 prev_reg = new_reg;
551 sbitmap_vector_free (uses_of_defs);
553 return reg_move_replaces;
556 /* Free memory allocated for the undo buffer. */
557 static void
558 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
561 while (reg_move_replaces)
563 struct undo_replace_buff_elem *rep = reg_move_replaces;
565 reg_move_replaces = reg_move_replaces->next;
566 free (rep);
570 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
571 of SCHED_ROW and SCHED_STAGE. */
572 static void
573 normalize_sched_times (partial_schedule_ptr ps)
575 int row;
576 int amount = PS_MIN_CYCLE (ps);
577 int ii = ps->ii;
578 ps_insn_ptr crr_insn;
580 for (row = 0; row < ii; row++)
581 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
583 ddg_node_ptr u = crr_insn->node;
584 int normalized_time = SCHED_TIME (u) - amount;
586 if (dump_file)
587 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
588 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
589 (u), ps->min_cycle);
590 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
591 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
592 SCHED_TIME (u) = normalized_time;
593 SCHED_ROW (u) = normalized_time % ii;
594 SCHED_STAGE (u) = normalized_time / ii;
598 /* Set SCHED_COLUMN of each node according to its position in PS. */
599 static void
600 set_columns_for_ps (partial_schedule_ptr ps)
602 int row;
604 for (row = 0; row < ps->ii; row++)
606 ps_insn_ptr cur_insn = ps->rows[row];
607 int column = 0;
609 for (; cur_insn; cur_insn = cur_insn->next_in_row)
610 SCHED_COLUMN (cur_insn->node) = column++;
614 /* Permute the insns according to their order in PS, from row 0 to
615 row ii-1, and position them right before LAST. This schedules
616 the insns of the loop kernel. */
617 static void
618 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
620 int ii = ps->ii;
621 int row;
622 ps_insn_ptr ps_ij;
624 for (row = 0; row < ii ; row++)
625 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
626 if (PREV_INSN (last) != ps_ij->node->insn)
627 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
628 PREV_INSN (last));
631 static void
632 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
633 int to_stage, int for_prolog, rtx count_reg)
635 int row;
636 ps_insn_ptr ps_ij;
638 for (row = 0; row < ps->ii; row++)
639 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
641 ddg_node_ptr u_node = ps_ij->node;
642 int j, i_reg_moves;
643 rtx reg_move = NULL_RTX;
645 /* Do not duplicate any insn which refers to count_reg as it
646 belongs to the control part.
647 TODO: This should be done by analyzing the control part of
648 the loop. */
649 if (reg_mentioned_p (count_reg, u_node->insn))
650 continue;
652 if (for_prolog)
654 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
655 number of reg_moves starting with the second occurrence of
656 u_node, which is generated if its SCHED_STAGE <= to_stage. */
657 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
658 i_reg_moves = MAX (i_reg_moves, 0);
659 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
661 /* The reg_moves start from the *first* reg_move backwards. */
662 if (i_reg_moves)
664 reg_move = SCHED_FIRST_REG_MOVE (u_node);
665 for (j = 1; j < i_reg_moves; j++)
666 reg_move = PREV_INSN (reg_move);
669 else /* It's for the epilog. */
671 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
672 starting to decrease one stage after u_node no longer occurs;
673 that is, generate all reg_moves until
674 SCHED_STAGE (u_node) == from_stage - 1. */
675 i_reg_moves = SCHED_NREG_MOVES (u_node)
676 - (from_stage - SCHED_STAGE (u_node) - 1);
677 i_reg_moves = MAX (i_reg_moves, 0);
678 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
680 /* The reg_moves start from the *last* reg_move forwards. */
681 if (i_reg_moves)
683 reg_move = SCHED_FIRST_REG_MOVE (u_node);
684 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
685 reg_move = PREV_INSN (reg_move);
689 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
690 emit_insn (copy_rtx (PATTERN (reg_move)));
691 if (SCHED_STAGE (u_node) >= from_stage
692 && SCHED_STAGE (u_node) <= to_stage)
693 duplicate_insn_chain (u_node->first_note, u_node->insn);
698 /* Generate the instructions (including reg_moves) for prolog & epilog. */
699 static void
700 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
701 rtx count_reg, rtx count_init)
703 int i;
704 int last_stage = PS_STAGE_COUNT (ps) - 1;
705 edge e;
707 /* Generate the prolog, inserting its insns on the loop-entry edge. */
708 start_sequence ();
710 if (!count_init)
712 /* Generate instructions at the beginning of the prolog to
713 adjust the loop count by STAGE_COUNT. If loop count is constant
714 (count_init), this constant is adjusted by STAGE_COUNT in
715 generate_prolog_epilog function. */
716 rtx sub_reg = NULL_RTX;
718 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
719 count_reg, GEN_INT (last_stage),
720 count_reg, 1, OPTAB_DIRECT);
721 gcc_assert (REG_P (sub_reg));
722 if (REGNO (sub_reg) != REGNO (count_reg))
723 emit_move_insn (count_reg, sub_reg);
726 for (i = 0; i < last_stage; i++)
727 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
729 /* Put the prolog on the entry edge. */
730 e = loop_preheader_edge (loop);
731 split_edge_and_insert (e, get_insns ());
733 end_sequence ();
735 /* Generate the epilog, inserting its insns on the loop-exit edge. */
736 start_sequence ();
738 for (i = 0; i < last_stage; i++)
739 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
741 /* Put the epilogue on the exit edge. */
742 gcc_assert (single_exit (loop));
743 e = single_exit (loop);
744 split_edge_and_insert (e, get_insns ());
745 end_sequence ();
748 /* Return true if all the BBs of the loop are empty except the
749 loop header. */
750 static bool
751 loop_single_full_bb_p (struct loop *loop)
753 unsigned i;
754 basic_block *bbs = get_loop_body (loop);
756 for (i = 0; i < loop->num_nodes ; i++)
758 rtx head, tail;
759 bool empty_bb = true;
761 if (bbs[i] == loop->header)
762 continue;
764 /* Make sure that basic blocks other than the header
765 have only notes labels or jumps. */
766 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
767 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
769 if (NOTE_P (head) || LABEL_P (head)
770 || (INSN_P (head) && JUMP_P (head)))
771 continue;
772 empty_bb = false;
773 break;
776 if (! empty_bb)
778 free (bbs);
779 return false;
782 free (bbs);
783 return true;
786 /* A simple loop from SMS point of view; it is a loop that is composed of
787 either a single basic block or two BBs - a header and a latch. */
788 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
789 && (EDGE_COUNT (loop->latch->preds) == 1) \
790 && (EDGE_COUNT (loop->latch->succs) == 1))
792 /* Return true if the loop is in its canonical form and false if not.
793 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
794 static bool
795 loop_canon_p (struct loop *loop)
798 if (loop->inner || !loop_outer (loop))
800 if (dump_file)
801 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
802 return false;
805 if (!single_exit (loop))
807 if (dump_file)
809 rtx insn = BB_END (loop->header);
811 fprintf (dump_file, "SMS loop many exits ");
812 fprintf (dump_file, " %s %d (file, line)\n",
813 insn_file (insn), insn_line (insn));
815 return false;
818 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
820 if (dump_file)
822 rtx insn = BB_END (loop->header);
824 fprintf (dump_file, "SMS loop many BBs. ");
825 fprintf (dump_file, " %s %d (file, line)\n",
826 insn_file (insn), insn_line (insn));
828 return false;
831 return true;
834 /* If there are more than one entry for the loop,
835 make it one by splitting the first entry edge and
836 redirecting the others to the new BB. */
837 static void
838 canon_loop (struct loop *loop)
840 edge e;
841 edge_iterator i;
843 /* Avoid annoying special cases of edges going to exit
844 block. */
845 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
846 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
847 split_edge (e);
849 if (loop->latch == loop->header
850 || EDGE_COUNT (loop->latch->succs) > 1)
852 FOR_EACH_EDGE (e, i, loop->header->preds)
853 if (e->src == loop->latch)
854 break;
855 split_edge (e);
859 /* Probability in % that the sms-ed loop rolls enough so that optimized
860 version may be entered. Just a guess. */
861 #define PROB_SMS_ENOUGH_ITERATIONS 80
863 /* Used to calculate the upper bound of ii. */
864 #define MAXII_FACTOR 2
866 /* Main entry point, perform SMS scheduling on the loops of the function
867 that consist of single basic blocks. */
868 static void
869 sms_schedule (void)
871 rtx insn;
872 ddg_ptr *g_arr, g;
873 int * node_order;
874 int maxii, max_asap;
875 loop_iterator li;
876 partial_schedule_ptr ps;
877 basic_block bb = NULL;
878 struct loop *loop;
879 basic_block condition_bb = NULL;
880 edge latch_edge;
881 gcov_type trip_count = 0;
883 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
884 | LOOPS_HAVE_RECORDED_EXITS);
885 if (number_of_loops () <= 1)
887 loop_optimizer_finalize ();
888 return; /* There are no loops to schedule. */
891 /* Initialize issue_rate. */
892 if (targetm.sched.issue_rate)
894 int temp = reload_completed;
896 reload_completed = 1;
897 issue_rate = targetm.sched.issue_rate ();
898 reload_completed = temp;
900 else
901 issue_rate = 1;
903 /* Initialize the scheduler. */
904 current_sched_info = &sms_sched_info;
906 /* Init Data Flow analysis, to be used in interloop dep calculation. */
907 df_set_flags (DF_LR_RUN_DCE);
908 df_rd_add_problem ();
909 df_note_add_problem ();
910 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
911 df_analyze ();
912 regstat_compute_calls_crossed ();
913 sched_init ();
915 /* Allocate memory to hold the DDG array one entry for each loop.
916 We use loop->num as index into this array. */
917 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
919 if (dump_file)
921 fprintf (dump_file, "\n\nSMS analysis phase\n");
922 fprintf (dump_file, "===================\n\n");
925 /* Build DDGs for all the relevant loops and hold them in G_ARR
926 indexed by the loop index. */
927 FOR_EACH_LOOP (li, loop, 0)
929 rtx head, tail;
930 rtx count_reg;
932 /* For debugging. */
933 if (dbg_cnt (sms_sched_loop) == false)
935 if (dump_file)
936 fprintf (dump_file, "SMS reached max limit... \n");
938 break;
941 if (dump_file)
943 rtx insn = BB_END (loop->header);
945 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
946 loop->num, insn_file (insn), insn_line (insn));
950 if (! loop_canon_p (loop))
951 continue;
953 if (! loop_single_full_bb_p (loop))
955 if (dump_file)
956 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
957 continue;
960 bb = loop->header;
962 get_ebb_head_tail (bb, bb, &head, &tail);
963 latch_edge = loop_latch_edge (loop);
964 gcc_assert (single_exit (loop));
965 if (single_exit (loop)->count)
966 trip_count = latch_edge->count / single_exit (loop)->count;
968 /* Perfrom SMS only on loops that their average count is above threshold. */
970 if ( latch_edge->count
971 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
973 if (dump_file)
975 fprintf (dump_file, " %s %d (file, line)\n",
976 insn_file (tail), insn_line (tail));
977 fprintf (dump_file, "SMS single-bb-loop\n");
978 if (profile_info && flag_branch_probabilities)
980 fprintf (dump_file, "SMS loop-count ");
981 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
982 (HOST_WIDEST_INT) bb->count);
983 fprintf (dump_file, "\n");
984 fprintf (dump_file, "SMS trip-count ");
985 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
986 (HOST_WIDEST_INT) trip_count);
987 fprintf (dump_file, "\n");
988 fprintf (dump_file, "SMS profile-sum-max ");
989 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
990 (HOST_WIDEST_INT) profile_info->sum_max);
991 fprintf (dump_file, "\n");
994 continue;
997 /* Make sure this is a doloop. */
998 if ( !(count_reg = doloop_register_get (head, tail)))
1000 if (dump_file)
1001 fprintf (dump_file, "SMS doloop_register_get failed\n");
1002 continue;
1005 /* Don't handle BBs with calls or barriers, or !single_set insns,
1006 or auto-increment insns (to avoid creating invalid reg-moves
1007 for the auto-increment insns).
1008 ??? Should handle auto-increment insns.
1009 ??? Should handle insns defining subregs. */
1010 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1012 rtx set;
1014 if (CALL_P (insn)
1015 || BARRIER_P (insn)
1016 || (INSN_P (insn) && !JUMP_P (insn)
1017 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
1018 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1019 || (INSN_P (insn) && (set = single_set (insn))
1020 && GET_CODE (SET_DEST (set)) == SUBREG))
1021 break;
1024 if (insn != NEXT_INSN (tail))
1026 if (dump_file)
1028 if (CALL_P (insn))
1029 fprintf (dump_file, "SMS loop-with-call\n");
1030 else if (BARRIER_P (insn))
1031 fprintf (dump_file, "SMS loop-with-barrier\n");
1032 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1033 fprintf (dump_file, "SMS reg inc\n");
1034 else if ((INSN_P (insn) && !JUMP_P (insn)
1035 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1036 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1037 else
1038 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1039 print_rtl_single (dump_file, insn);
1042 continue;
1045 if (! (g = create_ddg (bb, 0)))
1047 if (dump_file)
1048 fprintf (dump_file, "SMS doloop\n");
1049 continue;
1052 g_arr[loop->num] = g;
1053 if (dump_file)
1054 fprintf (dump_file, "...OK\n");
1057 if (dump_file)
1059 fprintf (dump_file, "\nSMS transformation phase\n");
1060 fprintf (dump_file, "=========================\n\n");
1063 /* We don't want to perform SMS on new loops - created by versioning. */
1064 FOR_EACH_LOOP (li, loop, 0)
1066 rtx head, tail;
1067 rtx count_reg, count_init;
1068 int mii, rec_mii;
1069 unsigned stage_count = 0;
1070 HOST_WIDEST_INT loop_count = 0;
1072 if (! (g = g_arr[loop->num]))
1073 continue;
1075 if (dump_file)
1077 rtx insn = BB_END (loop->header);
1079 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1080 loop->num, insn_file (insn), insn_line (insn));
1082 print_ddg (dump_file, g);
1085 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1087 latch_edge = loop_latch_edge (loop);
1088 gcc_assert (single_exit (loop));
1089 if (single_exit (loop)->count)
1090 trip_count = latch_edge->count / single_exit (loop)->count;
1092 if (dump_file)
1094 fprintf (dump_file, " %s %d (file, line)\n",
1095 insn_file (tail), insn_line (tail));
1096 fprintf (dump_file, "SMS single-bb-loop\n");
1097 if (profile_info && flag_branch_probabilities)
1099 fprintf (dump_file, "SMS loop-count ");
1100 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1101 (HOST_WIDEST_INT) bb->count);
1102 fprintf (dump_file, "\n");
1103 fprintf (dump_file, "SMS profile-sum-max ");
1104 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1105 (HOST_WIDEST_INT) profile_info->sum_max);
1106 fprintf (dump_file, "\n");
1108 fprintf (dump_file, "SMS doloop\n");
1109 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1110 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1111 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1115 /* In case of th loop have doloop register it gets special
1116 handling. */
1117 count_init = NULL_RTX;
1118 if ((count_reg = doloop_register_get (head, tail)))
1120 basic_block pre_header;
1122 pre_header = loop_preheader_edge (loop)->src;
1123 count_init = const_iteration_count (count_reg, pre_header,
1124 &loop_count);
1126 gcc_assert (count_reg);
1128 if (dump_file && count_init)
1130 fprintf (dump_file, "SMS const-doloop ");
1131 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1132 loop_count);
1133 fprintf (dump_file, "\n");
1136 node_order = XNEWVEC (int, g->num_nodes);
1138 mii = 1; /* Need to pass some estimate of mii. */
1139 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1140 mii = MAX (res_MII (g), rec_mii);
1141 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1143 if (dump_file)
1144 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145 rec_mii, mii, maxii);
1147 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1148 ASAP. */
1149 set_node_sched_params (g);
1151 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1153 if (ps)
1154 stage_count = PS_STAGE_COUNT (ps);
1156 /* Stage count of 1 means that there is no interleaving between
1157 iterations, let the scheduling passes do the job. */
1158 if (stage_count < 1
1159 || (count_init && (loop_count <= stage_count))
1160 || (flag_branch_probabilities && (trip_count <= stage_count)))
1162 if (dump_file)
1164 fprintf (dump_file, "SMS failed... \n");
1165 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1166 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1167 fprintf (dump_file, ", trip-count=");
1168 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1169 fprintf (dump_file, ")\n");
1171 continue;
1173 else
1175 struct undo_replace_buff_elem *reg_move_replaces;
1177 if (dump_file)
1179 fprintf (dump_file,
1180 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1181 stage_count);
1182 print_partial_schedule (ps, dump_file);
1183 fprintf (dump_file,
1184 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1185 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1188 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1189 the closing_branch was scheduled and should appear in the last (ii-1)
1190 row. Otherwise, we are free to schedule the branch, and we let nodes
1191 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1192 row; this should reduce stage_count to minimum.
1193 TODO: Revisit the issue of scheduling the insns of the
1194 control part relative to the branch when the control part
1195 has more than one insn. */
1196 normalize_sched_times (ps);
1197 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1198 set_columns_for_ps (ps);
1200 canon_loop (loop);
1202 /* case the BCT count is not known , Do loop-versioning */
1203 if (count_reg && ! count_init)
1205 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1206 GEN_INT(stage_count));
1207 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1208 * REG_BR_PROB_BASE) / 100;
1210 loop_version (loop, comp_rtx, &condition_bb,
1211 prob, prob, REG_BR_PROB_BASE - prob,
1212 true);
1215 /* Set new iteration count of loop kernel. */
1216 if (count_reg && count_init)
1217 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1218 - stage_count + 1);
1220 /* Now apply the scheduled kernel to the RTL of the loop. */
1221 permute_partial_schedule (ps, g->closing_branch->first_note);
1223 /* Mark this loop as software pipelined so the later
1224 scheduling passes doesn't touch it. */
1225 if (! flag_resched_modulo_sched)
1226 g->bb->flags |= BB_DISABLE_SCHEDULE;
1227 /* The life-info is not valid any more. */
1228 df_set_bb_dirty (g->bb);
1230 reg_move_replaces = generate_reg_moves (ps, true);
1231 if (dump_file)
1232 print_node_sched_params (dump_file, g->num_nodes, g);
1233 /* Generate prolog and epilog. */
1234 generate_prolog_epilog (ps, loop, count_reg, count_init);
1236 free_undo_replace_buff (reg_move_replaces);
1239 free_partial_schedule (ps);
1240 free (node_sched_params);
1241 free (node_order);
1242 free_ddg (g);
1245 regstat_free_calls_crossed ();
1246 free (g_arr);
1248 /* Release scheduler data, needed until now because of DFA. */
1249 sched_finish ();
1250 loop_optimizer_finalize ();
1253 /* The SMS scheduling algorithm itself
1254 -----------------------------------
1255 Input: 'O' an ordered list of insns of a loop.
1256 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1258 'Q' is the empty Set
1259 'PS' is the partial schedule; it holds the currently scheduled nodes with
1260 their cycle/slot.
1261 'PSP' previously scheduled predecessors.
1262 'PSS' previously scheduled successors.
1263 't(u)' the cycle where u is scheduled.
1264 'l(u)' is the latency of u.
1265 'd(v,u)' is the dependence distance from v to u.
1266 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1267 the node ordering phase.
1268 'check_hardware_resources_conflicts(u, PS, c)'
1269 run a trace around cycle/slot through DFA model
1270 to check resource conflicts involving instruction u
1271 at cycle c given the partial schedule PS.
1272 'add_to_partial_schedule_at_time(u, PS, c)'
1273 Add the node/instruction u to the partial schedule
1274 PS at time c.
1275 'calculate_register_pressure(PS)'
1276 Given a schedule of instructions, calculate the register
1277 pressure it implies. One implementation could be the
1278 maximum number of overlapping live ranges.
1279 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1280 registers available in the hardware.
1282 1. II = MII.
1283 2. PS = empty list
1284 3. for each node u in O in pre-computed order
1285 4. if (PSP(u) != Q && PSS(u) == Q) then
1286 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1287 6. start = Early_start; end = Early_start + II - 1; step = 1
1288 11. else if (PSP(u) == Q && PSS(u) != Q) then
1289 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1290 13. start = Late_start; end = Late_start - II + 1; step = -1
1291 14. else if (PSP(u) != Q && PSS(u) != Q) then
1292 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1293 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1294 17. start = Early_start;
1295 18. end = min(Early_start + II - 1 , Late_start);
1296 19. step = 1
1297 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1298 21. start = ASAP(u); end = start + II - 1; step = 1
1299 22. endif
1301 23. success = false
1302 24. for (c = start ; c != end ; c += step)
1303 25. if check_hardware_resources_conflicts(u, PS, c) then
1304 26. add_to_partial_schedule_at_time(u, PS, c)
1305 27. success = true
1306 28. break
1307 29. endif
1308 30. endfor
1309 31. if (success == false) then
1310 32. II = II + 1
1311 33. if (II > maxII) then
1312 34. finish - failed to schedule
1313 35. endif
1314 36. goto 2.
1315 37. endif
1316 38. endfor
1317 39. if (calculate_register_pressure(PS) > maxRP) then
1318 40. goto 32.
1319 41. endif
1320 42. compute epilogue & prologue
1321 43. finish - succeeded to schedule
1324 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1325 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1326 set to 0 to save compile time. */
1327 #define DFA_HISTORY SMS_DFA_HISTORY
1329 /* A threshold for the number of repeated unsuccessful attempts to insert
1330 an empty row, before we flush the partial schedule and start over. */
1331 #define MAX_SPLIT_NUM 10
1332 /* Given the partial schedule PS, this function calculates and returns the
1333 cycles in which we can schedule the node with the given index I.
1334 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1335 noticed that there are several cases in which we fail to SMS the loop
1336 because the sched window of a node is empty due to tight data-deps. In
1337 such cases we want to unschedule some of the predecessors/successors
1338 until we get non-empty scheduling window. It returns -1 if the
1339 scheduling window is empty and zero otherwise. */
1341 static int
1342 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1343 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1345 int start, step, end;
1346 ddg_edge_ptr e;
1347 int u = nodes_order [i];
1348 ddg_node_ptr u_node = &ps->g->nodes[u];
1349 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1350 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1351 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1352 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1353 int psp_not_empty;
1354 int pss_not_empty;
1356 /* 1. compute sched window for u (start, end, step). */
1357 sbitmap_zero (psp);
1358 sbitmap_zero (pss);
1359 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1360 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1362 if (psp_not_empty && !pss_not_empty)
1364 int early_start = INT_MIN;
1366 end = INT_MAX;
1367 for (e = u_node->in; e != 0; e = e->next_in)
1369 ddg_node_ptr v_node = e->src;
1371 if (dump_file)
1373 fprintf (dump_file, "\nProcessing edge: ");
1374 print_ddg_edge (dump_file, e);
1375 fprintf (dump_file,
1376 "\nScheduling %d (%d) in psp_not_empty,"
1377 " checking p %d (%d): ", u_node->cuid,
1378 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1379 (v_node->insn));
1382 if (TEST_BIT (sched_nodes, v_node->cuid))
1384 int p_st = SCHED_TIME (v_node);
1386 early_start =
1387 MAX (early_start, p_st + e->latency - (e->distance * ii));
1389 if (dump_file)
1390 fprintf (dump_file,
1391 "pred st = %d; early_start = %d; latency: %d",
1392 p_st, early_start, e->latency);
1394 if (e->data_type == MEM_DEP)
1395 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1397 else if (dump_file)
1398 fprintf (dump_file, "the node is not scheduled\n");
1400 start = early_start;
1401 end = MIN (end, early_start + ii);
1402 /* Schedule the node close to it's predecessors. */
1403 step = 1;
1405 if (dump_file)
1406 fprintf (dump_file,
1407 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1408 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1411 else if (!psp_not_empty && pss_not_empty)
1413 int late_start = INT_MAX;
1415 end = INT_MIN;
1416 for (e = u_node->out; e != 0; e = e->next_out)
1418 ddg_node_ptr v_node = e->dest;
1420 if (dump_file)
1422 fprintf (dump_file, "\nProcessing edge:");
1423 print_ddg_edge (dump_file, e);
1424 fprintf (dump_file,
1425 "\nScheduling %d (%d) in pss_not_empty,"
1426 " checking s %d (%d): ", u_node->cuid,
1427 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1428 (v_node->insn));
1431 if (TEST_BIT (sched_nodes, v_node->cuid))
1433 int s_st = SCHED_TIME (v_node);
1435 late_start = MIN (late_start,
1436 s_st - e->latency + (e->distance * ii));
1438 if (dump_file)
1439 fprintf (dump_file,
1440 "succ st = %d; late_start = %d; latency = %d",
1441 s_st, late_start, e->latency);
1443 if (e->data_type == MEM_DEP)
1444 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1445 if (dump_file)
1446 fprintf (dump_file, "end = %d\n", end);
1449 else if (dump_file)
1450 fprintf (dump_file, "the node is not scheduled\n");
1453 start = late_start;
1454 end = MAX (end, late_start - ii);
1455 /* Schedule the node close to it's successors. */
1456 step = -1;
1458 if (dump_file)
1459 fprintf (dump_file,
1460 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1461 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1465 else if (psp_not_empty && pss_not_empty)
1467 int early_start = INT_MIN;
1468 int late_start = INT_MAX;
1469 int count_preds = 0;
1470 int count_succs = 0;
1472 start = INT_MIN;
1473 end = INT_MAX;
1474 for (e = u_node->in; e != 0; e = e->next_in)
1476 ddg_node_ptr v_node = e->src;
1478 if (dump_file)
1480 fprintf (dump_file, "\nProcessing edge:");
1481 print_ddg_edge (dump_file, e);
1482 fprintf (dump_file,
1483 "\nScheduling %d (%d) in psp_pss_not_empty,"
1484 " checking p %d (%d): ", u_node->cuid, INSN_UID
1485 (u_node->insn), v_node->cuid, INSN_UID
1486 (v_node->insn));
1489 if (TEST_BIT (sched_nodes, v_node->cuid))
1491 int p_st = SCHED_TIME (v_node);
1493 early_start = MAX (early_start,
1494 p_st + e->latency
1495 - (e->distance * ii));
1497 if (dump_file)
1498 fprintf (dump_file,
1499 "pred st = %d; early_start = %d; latency = %d",
1500 p_st, early_start, e->latency);
1502 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1503 count_preds++;
1505 if (e->data_type == MEM_DEP)
1506 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1508 else if (dump_file)
1509 fprintf (dump_file, "the node is not scheduled\n");
1512 for (e = u_node->out; e != 0; e = e->next_out)
1514 ddg_node_ptr v_node = e->dest;
1516 if (dump_file)
1518 fprintf (dump_file, "\nProcessing edge:");
1519 print_ddg_edge (dump_file, e);
1520 fprintf (dump_file,
1521 "\nScheduling %d (%d) in psp_pss_not_empty,"
1522 " checking s %d (%d): ", u_node->cuid, INSN_UID
1523 (u_node->insn), v_node->cuid, INSN_UID
1524 (v_node->insn));
1527 if (TEST_BIT (sched_nodes, v_node->cuid))
1529 int s_st = SCHED_TIME (v_node);
1531 late_start = MIN (late_start,
1532 s_st - e->latency
1533 + (e->distance * ii));
1535 if (dump_file)
1536 fprintf (dump_file,
1537 "succ st = %d; late_start = %d; latency = %d",
1538 s_st, late_start, e->latency);
1540 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1541 count_succs++;
1543 if (e->data_type == MEM_DEP)
1544 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1546 else if (dump_file)
1547 fprintf (dump_file, "the node is not scheduled\n");
1550 start = MAX (start, early_start);
1551 end = MIN (end, MIN (early_start + ii, late_start + 1));
1552 step = 1;
1553 /* If there are more successors than predecessors schedule the
1554 node close to it's successors. */
1555 if (count_succs >= count_preds)
1557 int old_start = start;
1559 start = end - 1;
1560 end = old_start - 1;
1561 step = -1;
1564 else /* psp is empty && pss is empty. */
1566 start = SCHED_ASAP (u_node);
1567 end = start + ii;
1568 step = 1;
1571 *start_p = start;
1572 *step_p = step;
1573 *end_p = end;
1574 sbitmap_free (psp);
1575 sbitmap_free (pss);
1577 if ((start >= end && step == 1) || (start <= end && step == -1))
1579 if (dump_file)
1580 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1581 start, end, step);
1582 return -1;
1585 return 0;
1588 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1589 node currently been scheduled. At the end of the calculation
1590 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
1591 which are in SCHED_NODES (already scheduled nodes) and scheduled at
1592 the same row as the first/last row of U_NODE's scheduling window.
1593 The first and last rows are calculated using the following parameters:
1594 START/END rows - The cycles that begins/ends the traversal on the window;
1595 searching for an empty cycle to schedule U_NODE.
1596 STEP - The direction in which we traverse the window.
1597 II - The initiation interval.
1598 TODO: We can add an insn to the must_precede/must_follow bitmap only
1599 if it has tight dependence to U and they are both scheduled in the
1600 same row. The current check is more conservative and content with
1601 the fact that both U and the insn are scheduled in the same row. */
1603 static void
1604 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1605 int step, int ii, sbitmap sched_nodes,
1606 sbitmap must_precede, sbitmap must_follow)
1608 ddg_edge_ptr e;
1609 int first_cycle_in_window, last_cycle_in_window;
1610 int first_row_in_window, last_row_in_window;
1612 gcc_assert (must_precede && must_follow);
1614 /* Consider the following scheduling window:
1615 {first_cycle_in_window, first_cycle_in_window+1, ...,
1616 last_cycle_in_window}. If step is 1 then the following will be
1617 the order we traverse the window: {start=first_cycle_in_window,
1618 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1619 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1620 end=first_cycle_in_window-1} if step is -1. */
1621 first_cycle_in_window = (step == 1) ? start : end - step;
1622 last_cycle_in_window = (step == 1) ? end - step : start;
1624 first_row_in_window = SMODULO (first_cycle_in_window, ii);
1625 last_row_in_window = SMODULO (last_cycle_in_window, ii);
1627 sbitmap_zero (must_precede);
1628 sbitmap_zero (must_follow);
1630 if (dump_file)
1631 fprintf (dump_file, "\nmust_precede: ");
1633 for (e = u_node->in; e != 0; e = e->next_in)
1634 if (TEST_BIT (sched_nodes, e->src->cuid)
1635 && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
1637 if (dump_file)
1638 fprintf (dump_file, "%d ", e->src->cuid);
1640 SET_BIT (must_precede, e->src->cuid);
1643 if (dump_file)
1644 fprintf (dump_file, "\nmust_follow: ");
1646 for (e = u_node->out; e != 0; e = e->next_out)
1647 if (TEST_BIT (sched_nodes, e->dest->cuid)
1648 && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
1650 if (dump_file)
1651 fprintf (dump_file, "%d ", e->dest->cuid);
1653 SET_BIT (must_follow, e->dest->cuid);
1656 if (dump_file)
1657 fprintf (dump_file, "\n");
1660 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1661 parameters to decide if that's possible:
1662 PS - The partial schedule.
1663 U - The serial number of U_NODE.
1664 NUM_SPLITS - The number of row spilts made so far.
1665 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1666 the first row of the scheduling window)
1667 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1668 last row of the scheduling window) */
1670 static bool
1671 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1672 int u, int row, sbitmap sched_nodes,
1673 int *num_splits, sbitmap must_precede,
1674 sbitmap must_follow)
1676 ps_insn_ptr psi;
1677 bool success = 0;
1679 verify_partial_schedule (ps, sched_nodes);
1680 psi = ps_add_node_check_conflicts (ps, u_node, row,
1681 must_precede, must_follow);
1682 if (psi)
1684 SCHED_TIME (u_node) = row;
1685 SET_BIT (sched_nodes, u);
1686 success = 1;
1687 *num_splits = 0;
1688 if (dump_file)
1689 fprintf (dump_file, "Scheduled w/o split in %d\n", row);
1693 return success;
1696 /* This function implements the scheduling algorithm for SMS according to the
1697 above algorithm. */
1698 static partial_schedule_ptr
1699 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1701 int ii = mii;
1702 int i, c, success, num_splits = 0;
1703 int flush_and_start_over = true;
1704 int num_nodes = g->num_nodes;
1705 int start, end, step; /* Place together into one struct? */
1706 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1707 sbitmap must_precede = sbitmap_alloc (num_nodes);
1708 sbitmap must_follow = sbitmap_alloc (num_nodes);
1709 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1711 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1713 sbitmap_ones (tobe_scheduled);
1714 sbitmap_zero (sched_nodes);
1716 while (flush_and_start_over && (ii < maxii))
1719 if (dump_file)
1720 fprintf (dump_file, "Starting with ii=%d\n", ii);
1721 flush_and_start_over = false;
1722 sbitmap_zero (sched_nodes);
1724 for (i = 0; i < num_nodes; i++)
1726 int u = nodes_order[i];
1727 ddg_node_ptr u_node = &ps->g->nodes[u];
1728 rtx insn = u_node->insn;
1730 if (!INSN_P (insn))
1732 RESET_BIT (tobe_scheduled, u);
1733 continue;
1736 if (JUMP_P (insn)) /* Closing branch handled later. */
1738 RESET_BIT (tobe_scheduled, u);
1739 continue;
1742 if (TEST_BIT (sched_nodes, u))
1743 continue;
1745 /* Try to get non-empty scheduling window. */
1746 success = 0;
1747 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1748 &step, &end) == 0)
1750 if (dump_file)
1751 fprintf (dump_file, "\nTrying to schedule node %d \
1752 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1753 (g->nodes[u].insn)), start, end, step);
1755 gcc_assert ((step > 0 && start < end)
1756 || (step < 0 && start > end));
1758 calculate_must_precede_follow (u_node, start, end, step, ii,
1759 sched_nodes, must_precede,
1760 must_follow);
1762 for (c = start; c != end; c += step)
1764 sbitmap tmp_precede = NULL;
1765 sbitmap tmp_follow = NULL;
1767 if (c == start)
1769 if (step == 1)
1770 tmp_precede = must_precede;
1771 else /* step == -1. */
1772 tmp_follow = must_follow;
1774 if (c == end - step)
1776 if (step == 1)
1777 tmp_follow = must_follow;
1778 else /* step == -1. */
1779 tmp_precede = must_precede;
1782 success =
1783 try_scheduling_node_in_cycle (ps, u_node, u, c,
1784 sched_nodes,
1785 &num_splits, tmp_precede,
1786 tmp_follow);
1787 if (success)
1788 break;
1791 verify_partial_schedule (ps, sched_nodes);
1793 if (!success)
1795 int split_row;
1797 if (ii++ == maxii)
1798 break;
1800 if (num_splits >= MAX_SPLIT_NUM)
1802 num_splits = 0;
1803 flush_and_start_over = true;
1804 verify_partial_schedule (ps, sched_nodes);
1805 reset_partial_schedule (ps, ii);
1806 verify_partial_schedule (ps, sched_nodes);
1807 break;
1810 num_splits++;
1811 if (step == 1)
1812 split_row = compute_split_row (sched_nodes, start, end,
1813 ps->ii, u_node);
1814 else
1815 split_row = compute_split_row (sched_nodes, end, start,
1816 ps->ii, u_node);
1818 ps_insert_empty_row (ps, split_row, sched_nodes);
1819 i--; /* Go back and retry node i. */
1821 if (dump_file)
1822 fprintf (dump_file, "num_splits=%d\n", num_splits);
1825 /* ??? If (success), check register pressure estimates. */
1826 } /* Continue with next node. */
1827 } /* While flush_and_start_over. */
1828 if (ii >= maxii)
1830 free_partial_schedule (ps);
1831 ps = NULL;
1833 else
1834 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1836 sbitmap_free (sched_nodes);
1837 sbitmap_free (must_precede);
1838 sbitmap_free (must_follow);
1839 sbitmap_free (tobe_scheduled);
1841 return ps;
1844 /* This function inserts a new empty row into PS at the position
1845 according to SPLITROW, keeping all already scheduled instructions
1846 intact and updating their SCHED_TIME and cycle accordingly. */
1847 static void
1848 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1849 sbitmap sched_nodes)
1851 ps_insn_ptr crr_insn;
1852 ps_insn_ptr *rows_new;
1853 int ii = ps->ii;
1854 int new_ii = ii + 1;
1855 int row;
1857 verify_partial_schedule (ps, sched_nodes);
1859 /* We normalize sched_time and rotate ps to have only non-negative sched
1860 times, for simplicity of updating cycles after inserting new row. */
1861 split_row -= ps->min_cycle;
1862 split_row = SMODULO (split_row, ii);
1863 if (dump_file)
1864 fprintf (dump_file, "split_row=%d\n", split_row);
1866 normalize_sched_times (ps);
1867 rotate_partial_schedule (ps, ps->min_cycle);
1869 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1870 for (row = 0; row < split_row; row++)
1872 rows_new[row] = ps->rows[row];
1873 ps->rows[row] = NULL;
1874 for (crr_insn = rows_new[row];
1875 crr_insn; crr_insn = crr_insn->next_in_row)
1877 ddg_node_ptr u = crr_insn->node;
1878 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1880 SCHED_TIME (u) = new_time;
1881 crr_insn->cycle = new_time;
1882 SCHED_ROW (u) = new_time % new_ii;
1883 SCHED_STAGE (u) = new_time / new_ii;
1888 rows_new[split_row] = NULL;
1890 for (row = split_row; row < ii; row++)
1892 rows_new[row + 1] = ps->rows[row];
1893 ps->rows[row] = NULL;
1894 for (crr_insn = rows_new[row + 1];
1895 crr_insn; crr_insn = crr_insn->next_in_row)
1897 ddg_node_ptr u = crr_insn->node;
1898 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1900 SCHED_TIME (u) = new_time;
1901 crr_insn->cycle = new_time;
1902 SCHED_ROW (u) = new_time % new_ii;
1903 SCHED_STAGE (u) = new_time / new_ii;
1907 /* Updating ps. */
1908 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1909 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1910 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1911 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1912 free (ps->rows);
1913 ps->rows = rows_new;
1914 ps->ii = new_ii;
1915 gcc_assert (ps->min_cycle >= 0);
1917 verify_partial_schedule (ps, sched_nodes);
1919 if (dump_file)
1920 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1921 ps->max_cycle);
1924 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1925 UP which are the boundaries of it's scheduling window; compute using
1926 SCHED_NODES and II a row in the partial schedule that can be split
1927 which will separate a critical predecessor from a critical successor
1928 thereby expanding the window, and return it. */
1929 static int
1930 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1931 ddg_node_ptr u_node)
1933 ddg_edge_ptr e;
1934 int lower = INT_MIN, upper = INT_MAX;
1935 ddg_node_ptr crit_pred = NULL;
1936 ddg_node_ptr crit_succ = NULL;
1937 int crit_cycle;
1939 for (e = u_node->in; e != 0; e = e->next_in)
1941 ddg_node_ptr v_node = e->src;
1943 if (TEST_BIT (sched_nodes, v_node->cuid)
1944 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1945 if (SCHED_TIME (v_node) > lower)
1947 crit_pred = v_node;
1948 lower = SCHED_TIME (v_node);
1952 if (crit_pred != NULL)
1954 crit_cycle = SCHED_TIME (crit_pred) + 1;
1955 return SMODULO (crit_cycle, ii);
1958 for (e = u_node->out; e != 0; e = e->next_out)
1960 ddg_node_ptr v_node = e->dest;
1961 if (TEST_BIT (sched_nodes, v_node->cuid)
1962 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1963 if (SCHED_TIME (v_node) < upper)
1965 crit_succ = v_node;
1966 upper = SCHED_TIME (v_node);
1970 if (crit_succ != NULL)
1972 crit_cycle = SCHED_TIME (crit_succ);
1973 return SMODULO (crit_cycle, ii);
1976 if (dump_file)
1977 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1979 return SMODULO ((low + up + 1) / 2, ii);
1982 static void
1983 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
1985 int row;
1986 ps_insn_ptr crr_insn;
1988 for (row = 0; row < ps->ii; row++)
1989 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1991 ddg_node_ptr u = crr_insn->node;
1993 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1994 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1995 popcount (sched_nodes) == number of insns in ps. */
1996 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1997 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2002 /* This page implements the algorithm for ordering the nodes of a DDG
2003 for modulo scheduling, activated through the
2004 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2006 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2007 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2008 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2009 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2010 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2011 #define DEPTH(x) (ASAP ((x)))
2013 typedef struct node_order_params * nopa;
2015 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2016 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2017 static nopa calculate_order_params (ddg_ptr, int, int *);
2018 static int find_max_asap (ddg_ptr, sbitmap);
2019 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2020 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2022 enum sms_direction {BOTTOMUP, TOPDOWN};
2024 struct node_order_params
2026 int asap;
2027 int alap;
2028 int height;
2031 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2032 static void
2033 check_nodes_order (int *node_order, int num_nodes)
2035 int i;
2036 sbitmap tmp = sbitmap_alloc (num_nodes);
2038 sbitmap_zero (tmp);
2040 if (dump_file)
2041 fprintf (dump_file, "SMS final nodes order: \n");
2043 for (i = 0; i < num_nodes; i++)
2045 int u = node_order[i];
2047 if (dump_file)
2048 fprintf (dump_file, "%d ", u);
2049 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2051 SET_BIT (tmp, u);
2054 if (dump_file)
2055 fprintf (dump_file, "\n");
2057 sbitmap_free (tmp);
2060 /* Order the nodes of G for scheduling and pass the result in
2061 NODE_ORDER. Also set aux.count of each node to ASAP.
2062 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2063 static int
2064 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2066 int i;
2067 int rec_mii = 0;
2068 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2070 nopa nops = calculate_order_params (g, mii, pmax_asap);
2072 if (dump_file)
2073 print_sccs (dump_file, sccs, g);
2075 order_nodes_of_sccs (sccs, node_order);
2077 if (sccs->num_sccs > 0)
2078 /* First SCC has the largest recurrence_length. */
2079 rec_mii = sccs->sccs[0]->recurrence_length;
2081 /* Save ASAP before destroying node_order_params. */
2082 for (i = 0; i < g->num_nodes; i++)
2084 ddg_node_ptr v = &g->nodes[i];
2085 v->aux.count = ASAP (v);
2088 free (nops);
2089 free_ddg_all_sccs (sccs);
2090 check_nodes_order (node_order, g->num_nodes);
2092 return rec_mii;
2095 static void
2096 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2098 int i, pos = 0;
2099 ddg_ptr g = all_sccs->ddg;
2100 int num_nodes = g->num_nodes;
2101 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2102 sbitmap on_path = sbitmap_alloc (num_nodes);
2103 sbitmap tmp = sbitmap_alloc (num_nodes);
2104 sbitmap ones = sbitmap_alloc (num_nodes);
2106 sbitmap_zero (prev_sccs);
2107 sbitmap_ones (ones);
2109 /* Perfrom the node ordering starting from the SCC with the highest recMII.
2110 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2111 for (i = 0; i < all_sccs->num_sccs; i++)
2113 ddg_scc_ptr scc = all_sccs->sccs[i];
2115 /* Add nodes on paths from previous SCCs to the current SCC. */
2116 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2117 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2119 /* Add nodes on paths from the current SCC to previous SCCs. */
2120 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2121 sbitmap_a_or_b (tmp, tmp, on_path);
2123 /* Remove nodes of previous SCCs from current extended SCC. */
2124 sbitmap_difference (tmp, tmp, prev_sccs);
2126 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2127 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2130 /* Handle the remaining nodes that do not belong to any scc. Each call
2131 to order_nodes_in_scc handles a single connected component. */
2132 while (pos < g->num_nodes)
2134 sbitmap_difference (tmp, ones, prev_sccs);
2135 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2137 sbitmap_free (prev_sccs);
2138 sbitmap_free (on_path);
2139 sbitmap_free (tmp);
2140 sbitmap_free (ones);
2143 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2144 static struct node_order_params *
2145 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2147 int u;
2148 int max_asap;
2149 int num_nodes = g->num_nodes;
2150 ddg_edge_ptr e;
2151 /* Allocate a place to hold ordering params for each node in the DDG. */
2152 nopa node_order_params_arr;
2154 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2155 node_order_params_arr = (nopa) xcalloc (num_nodes,
2156 sizeof (struct node_order_params));
2158 /* Set the aux pointer of each node to point to its order_params structure. */
2159 for (u = 0; u < num_nodes; u++)
2160 g->nodes[u].aux.info = &node_order_params_arr[u];
2162 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2163 calculate ASAP, ALAP, mobility, distance, and height for each node
2164 in the dependence (direct acyclic) graph. */
2166 /* We assume that the nodes in the array are in topological order. */
2168 max_asap = 0;
2169 for (u = 0; u < num_nodes; u++)
2171 ddg_node_ptr u_node = &g->nodes[u];
2173 ASAP (u_node) = 0;
2174 for (e = u_node->in; e; e = e->next_in)
2175 if (e->distance == 0)
2176 ASAP (u_node) = MAX (ASAP (u_node),
2177 ASAP (e->src) + e->latency);
2178 max_asap = MAX (max_asap, ASAP (u_node));
2181 for (u = num_nodes - 1; u > -1; u--)
2183 ddg_node_ptr u_node = &g->nodes[u];
2185 ALAP (u_node) = max_asap;
2186 HEIGHT (u_node) = 0;
2187 for (e = u_node->out; e; e = e->next_out)
2188 if (e->distance == 0)
2190 ALAP (u_node) = MIN (ALAP (u_node),
2191 ALAP (e->dest) - e->latency);
2192 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2193 HEIGHT (e->dest) + e->latency);
2196 if (dump_file)
2198 fprintf (dump_file, "\nOrder params\n");
2199 for (u = 0; u < num_nodes; u++)
2201 ddg_node_ptr u_node = &g->nodes[u];
2203 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2204 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2208 *pmax_asap = max_asap;
2209 return node_order_params_arr;
2212 static int
2213 find_max_asap (ddg_ptr g, sbitmap nodes)
2215 unsigned int u = 0;
2216 int max_asap = -1;
2217 int result = -1;
2218 sbitmap_iterator sbi;
2220 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2222 ddg_node_ptr u_node = &g->nodes[u];
2224 if (max_asap < ASAP (u_node))
2226 max_asap = ASAP (u_node);
2227 result = u;
2230 return result;
2233 static int
2234 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2236 unsigned int u = 0;
2237 int max_hv = -1;
2238 int min_mob = INT_MAX;
2239 int result = -1;
2240 sbitmap_iterator sbi;
2242 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2244 ddg_node_ptr u_node = &g->nodes[u];
2246 if (max_hv < HEIGHT (u_node))
2248 max_hv = HEIGHT (u_node);
2249 min_mob = MOB (u_node);
2250 result = u;
2252 else if ((max_hv == HEIGHT (u_node))
2253 && (min_mob > MOB (u_node)))
2255 min_mob = MOB (u_node);
2256 result = u;
2259 return result;
2262 static int
2263 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2265 unsigned int u = 0;
2266 int max_dv = -1;
2267 int min_mob = INT_MAX;
2268 int result = -1;
2269 sbitmap_iterator sbi;
2271 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2273 ddg_node_ptr u_node = &g->nodes[u];
2275 if (max_dv < DEPTH (u_node))
2277 max_dv = DEPTH (u_node);
2278 min_mob = MOB (u_node);
2279 result = u;
2281 else if ((max_dv == DEPTH (u_node))
2282 && (min_mob > MOB (u_node)))
2284 min_mob = MOB (u_node);
2285 result = u;
2288 return result;
2291 /* Places the nodes of SCC into the NODE_ORDER array starting
2292 at position POS, according to the SMS ordering algorithm.
2293 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2294 the NODE_ORDER array, starting from position zero. */
2295 static int
2296 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2297 int * node_order, int pos)
2299 enum sms_direction dir;
2300 int num_nodes = g->num_nodes;
2301 sbitmap workset = sbitmap_alloc (num_nodes);
2302 sbitmap tmp = sbitmap_alloc (num_nodes);
2303 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2304 sbitmap predecessors = sbitmap_alloc (num_nodes);
2305 sbitmap successors = sbitmap_alloc (num_nodes);
2307 sbitmap_zero (predecessors);
2308 find_predecessors (predecessors, g, nodes_ordered);
2310 sbitmap_zero (successors);
2311 find_successors (successors, g, nodes_ordered);
2313 sbitmap_zero (tmp);
2314 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2316 sbitmap_copy (workset, tmp);
2317 dir = BOTTOMUP;
2319 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2321 sbitmap_copy (workset, tmp);
2322 dir = TOPDOWN;
2324 else
2326 int u;
2328 sbitmap_zero (workset);
2329 if ((u = find_max_asap (g, scc)) >= 0)
2330 SET_BIT (workset, u);
2331 dir = BOTTOMUP;
2334 sbitmap_zero (zero_bitmap);
2335 while (!sbitmap_equal (workset, zero_bitmap))
2337 int v;
2338 ddg_node_ptr v_node;
2339 sbitmap v_node_preds;
2340 sbitmap v_node_succs;
2342 if (dir == TOPDOWN)
2344 while (!sbitmap_equal (workset, zero_bitmap))
2346 v = find_max_hv_min_mob (g, workset);
2347 v_node = &g->nodes[v];
2348 node_order[pos++] = v;
2349 v_node_succs = NODE_SUCCESSORS (v_node);
2350 sbitmap_a_and_b (tmp, v_node_succs, scc);
2352 /* Don't consider the already ordered successors again. */
2353 sbitmap_difference (tmp, tmp, nodes_ordered);
2354 sbitmap_a_or_b (workset, workset, tmp);
2355 RESET_BIT (workset, v);
2356 SET_BIT (nodes_ordered, v);
2358 dir = BOTTOMUP;
2359 sbitmap_zero (predecessors);
2360 find_predecessors (predecessors, g, nodes_ordered);
2361 sbitmap_a_and_b (workset, predecessors, scc);
2363 else
2365 while (!sbitmap_equal (workset, zero_bitmap))
2367 v = find_max_dv_min_mob (g, workset);
2368 v_node = &g->nodes[v];
2369 node_order[pos++] = v;
2370 v_node_preds = NODE_PREDECESSORS (v_node);
2371 sbitmap_a_and_b (tmp, v_node_preds, scc);
2373 /* Don't consider the already ordered predecessors again. */
2374 sbitmap_difference (tmp, tmp, nodes_ordered);
2375 sbitmap_a_or_b (workset, workset, tmp);
2376 RESET_BIT (workset, v);
2377 SET_BIT (nodes_ordered, v);
2379 dir = TOPDOWN;
2380 sbitmap_zero (successors);
2381 find_successors (successors, g, nodes_ordered);
2382 sbitmap_a_and_b (workset, successors, scc);
2385 sbitmap_free (tmp);
2386 sbitmap_free (workset);
2387 sbitmap_free (zero_bitmap);
2388 sbitmap_free (predecessors);
2389 sbitmap_free (successors);
2390 return pos;
2394 /* This page contains functions for manipulating partial-schedules during
2395 modulo scheduling. */
2397 /* Create a partial schedule and allocate a memory to hold II rows. */
2399 static partial_schedule_ptr
2400 create_partial_schedule (int ii, ddg_ptr g, int history)
2402 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2403 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2404 ps->ii = ii;
2405 ps->history = history;
2406 ps->min_cycle = INT_MAX;
2407 ps->max_cycle = INT_MIN;
2408 ps->g = g;
2410 return ps;
2413 /* Free the PS_INSNs in rows array of the given partial schedule.
2414 ??? Consider caching the PS_INSN's. */
2415 static void
2416 free_ps_insns (partial_schedule_ptr ps)
2418 int i;
2420 for (i = 0; i < ps->ii; i++)
2422 while (ps->rows[i])
2424 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2426 free (ps->rows[i]);
2427 ps->rows[i] = ps_insn;
2429 ps->rows[i] = NULL;
2433 /* Free all the memory allocated to the partial schedule. */
2435 static void
2436 free_partial_schedule (partial_schedule_ptr ps)
2438 if (!ps)
2439 return;
2440 free_ps_insns (ps);
2441 free (ps->rows);
2442 free (ps);
2445 /* Clear the rows array with its PS_INSNs, and create a new one with
2446 NEW_II rows. */
2448 static void
2449 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2451 if (!ps)
2452 return;
2453 free_ps_insns (ps);
2454 if (new_ii == ps->ii)
2455 return;
2456 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2457 * sizeof (ps_insn_ptr));
2458 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2459 ps->ii = new_ii;
2460 ps->min_cycle = INT_MAX;
2461 ps->max_cycle = INT_MIN;
2464 /* Prints the partial schedule as an ii rows array, for each rows
2465 print the ids of the insns in it. */
2466 void
2467 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2469 int i;
2471 for (i = 0; i < ps->ii; i++)
2473 ps_insn_ptr ps_i = ps->rows[i];
2475 fprintf (dump, "\n[CYCLE %d ]: ", i);
2476 while (ps_i)
2478 fprintf (dump, "%d, ",
2479 INSN_UID (ps_i->node->insn));
2480 ps_i = ps_i->next_in_row;
2485 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2486 static ps_insn_ptr
2487 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2489 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2491 ps_i->node = node;
2492 ps_i->next_in_row = NULL;
2493 ps_i->prev_in_row = NULL;
2494 ps_i->row_rest_count = rest_count;
2495 ps_i->cycle = cycle;
2497 return ps_i;
2501 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2502 node is not found in the partial schedule, else returns true. */
2503 static bool
2504 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2506 int row;
2508 if (!ps || !ps_i)
2509 return false;
2511 row = SMODULO (ps_i->cycle, ps->ii);
2512 if (! ps_i->prev_in_row)
2514 if (ps_i != ps->rows[row])
2515 return false;
2517 ps->rows[row] = ps_i->next_in_row;
2518 if (ps->rows[row])
2519 ps->rows[row]->prev_in_row = NULL;
2521 else
2523 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2524 if (ps_i->next_in_row)
2525 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2527 free (ps_i);
2528 return true;
2531 /* Unlike what literature describes for modulo scheduling (which focuses
2532 on VLIW machines) the order of the instructions inside a cycle is
2533 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2534 where the current instruction should go relative to the already
2535 scheduled instructions in the given cycle. Go over these
2536 instructions and find the first possible column to put it in. */
2537 static bool
2538 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2539 sbitmap must_precede, sbitmap must_follow)
2541 ps_insn_ptr next_ps_i;
2542 ps_insn_ptr first_must_follow = NULL;
2543 ps_insn_ptr last_must_precede = NULL;
2544 int row;
2546 if (! ps_i)
2547 return false;
2549 row = SMODULO (ps_i->cycle, ps->ii);
2551 /* Find the first must follow and the last must precede
2552 and insert the node immediately after the must precede
2553 but make sure that it there is no must follow after it. */
2554 for (next_ps_i = ps->rows[row];
2555 next_ps_i;
2556 next_ps_i = next_ps_i->next_in_row)
2558 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2559 && ! first_must_follow)
2560 first_must_follow = next_ps_i;
2561 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2563 /* If we have already met a node that must follow, then
2564 there is no possible column. */
2565 if (first_must_follow)
2566 return false;
2567 else
2568 last_must_precede = next_ps_i;
2572 /* Now insert the node after INSERT_AFTER_PSI. */
2574 if (! last_must_precede)
2576 ps_i->next_in_row = ps->rows[row];
2577 ps_i->prev_in_row = NULL;
2578 if (ps_i->next_in_row)
2579 ps_i->next_in_row->prev_in_row = ps_i;
2580 ps->rows[row] = ps_i;
2582 else
2584 ps_i->next_in_row = last_must_precede->next_in_row;
2585 last_must_precede->next_in_row = ps_i;
2586 ps_i->prev_in_row = last_must_precede;
2587 if (ps_i->next_in_row)
2588 ps_i->next_in_row->prev_in_row = ps_i;
2591 return true;
2594 /* Advances the PS_INSN one column in its current row; returns false
2595 in failure and true in success. Bit N is set in MUST_FOLLOW if
2596 the node with cuid N must be come after the node pointed to by
2597 PS_I when scheduled in the same cycle. */
2598 static int
2599 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2600 sbitmap must_follow)
2602 ps_insn_ptr prev, next;
2603 int row;
2604 ddg_node_ptr next_node;
2606 if (!ps || !ps_i)
2607 return false;
2609 row = SMODULO (ps_i->cycle, ps->ii);
2611 if (! ps_i->next_in_row)
2612 return false;
2614 next_node = ps_i->next_in_row->node;
2616 /* Check if next_in_row is dependent on ps_i, both having same sched
2617 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2618 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2619 return false;
2621 /* Advance PS_I over its next_in_row in the doubly linked list. */
2622 prev = ps_i->prev_in_row;
2623 next = ps_i->next_in_row;
2625 if (ps_i == ps->rows[row])
2626 ps->rows[row] = next;
2628 ps_i->next_in_row = next->next_in_row;
2630 if (next->next_in_row)
2631 next->next_in_row->prev_in_row = ps_i;
2633 next->next_in_row = ps_i;
2634 ps_i->prev_in_row = next;
2636 next->prev_in_row = prev;
2637 if (prev)
2638 prev->next_in_row = next;
2640 return true;
2643 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2644 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2645 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2646 before/after (respectively) the node pointed to by PS_I when scheduled
2647 in the same cycle. */
2648 static ps_insn_ptr
2649 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2650 sbitmap must_precede, sbitmap must_follow)
2652 ps_insn_ptr ps_i;
2653 int rest_count = 1;
2654 int row = SMODULO (cycle, ps->ii);
2656 if (ps->rows[row]
2657 && ps->rows[row]->row_rest_count >= issue_rate)
2658 return NULL;
2660 if (ps->rows[row])
2661 rest_count += ps->rows[row]->row_rest_count;
2663 ps_i = create_ps_insn (node, rest_count, cycle);
2665 /* Finds and inserts PS_I according to MUST_FOLLOW and
2666 MUST_PRECEDE. */
2667 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2669 free (ps_i);
2670 return NULL;
2673 return ps_i;
2676 /* Advance time one cycle. Assumes DFA is being used. */
2677 static void
2678 advance_one_cycle (void)
2680 if (targetm.sched.dfa_pre_cycle_insn)
2681 state_transition (curr_state,
2682 targetm.sched.dfa_pre_cycle_insn ());
2684 state_transition (curr_state, NULL);
2686 if (targetm.sched.dfa_post_cycle_insn)
2687 state_transition (curr_state,
2688 targetm.sched.dfa_post_cycle_insn ());
2693 /* Checks if PS has resource conflicts according to DFA, starting from
2694 FROM cycle to TO cycle; returns true if there are conflicts and false
2695 if there are no conflicts. Assumes DFA is being used. */
2696 static int
2697 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2699 int cycle;
2701 state_reset (curr_state);
2703 for (cycle = from; cycle <= to; cycle++)
2705 ps_insn_ptr crr_insn;
2706 /* Holds the remaining issue slots in the current row. */
2707 int can_issue_more = issue_rate;
2709 /* Walk through the DFA for the current row. */
2710 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2711 crr_insn;
2712 crr_insn = crr_insn->next_in_row)
2714 rtx insn = crr_insn->node->insn;
2716 if (!INSN_P (insn))
2717 continue;
2719 /* Check if there is room for the current insn. */
2720 if (!can_issue_more || state_dead_lock_p (curr_state))
2721 return true;
2723 /* Update the DFA state and return with failure if the DFA found
2724 recource conflicts. */
2725 if (state_transition (curr_state, insn) >= 0)
2726 return true;
2728 if (targetm.sched.variable_issue)
2729 can_issue_more =
2730 targetm.sched.variable_issue (sched_dump, sched_verbose,
2731 insn, can_issue_more);
2732 /* A naked CLOBBER or USE generates no instruction, so don't
2733 let them consume issue slots. */
2734 else if (GET_CODE (PATTERN (insn)) != USE
2735 && GET_CODE (PATTERN (insn)) != CLOBBER)
2736 can_issue_more--;
2739 /* Advance the DFA to the next cycle. */
2740 advance_one_cycle ();
2742 return false;
2745 /* Checks if the given node causes resource conflicts when added to PS at
2746 cycle C. If not the node is added to PS and returned; otherwise zero
2747 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2748 cuid N must be come before/after (respectively) the node pointed to by
2749 PS_I when scheduled in the same cycle. */
2750 ps_insn_ptr
2751 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2752 int c, sbitmap must_precede,
2753 sbitmap must_follow)
2755 int has_conflicts = 0;
2756 ps_insn_ptr ps_i;
2758 /* First add the node to the PS, if this succeeds check for
2759 conflicts, trying different issue slots in the same row. */
2760 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2761 return NULL; /* Failed to insert the node at the given cycle. */
2763 has_conflicts = ps_has_conflicts (ps, c, c)
2764 || (ps->history > 0
2765 && ps_has_conflicts (ps,
2766 c - ps->history,
2767 c + ps->history));
2769 /* Try different issue slots to find one that the given node can be
2770 scheduled in without conflicts. */
2771 while (has_conflicts)
2773 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2774 break;
2775 has_conflicts = ps_has_conflicts (ps, c, c)
2776 || (ps->history > 0
2777 && ps_has_conflicts (ps,
2778 c - ps->history,
2779 c + ps->history));
2782 if (has_conflicts)
2784 remove_node_from_ps (ps, ps_i);
2785 return NULL;
2788 ps->min_cycle = MIN (ps->min_cycle, c);
2789 ps->max_cycle = MAX (ps->max_cycle, c);
2790 return ps_i;
2793 /* Rotate the rows of PS such that insns scheduled at time
2794 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2795 void
2796 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2798 int i, row, backward_rotates;
2799 int last_row = ps->ii - 1;
2801 if (start_cycle == 0)
2802 return;
2804 backward_rotates = SMODULO (start_cycle, ps->ii);
2806 /* Revisit later and optimize this into a single loop. */
2807 for (i = 0; i < backward_rotates; i++)
2809 ps_insn_ptr first_row = ps->rows[0];
2811 for (row = 0; row < last_row; row++)
2812 ps->rows[row] = ps->rows[row+1];
2814 ps->rows[last_row] = first_row;
2817 ps->max_cycle -= start_cycle;
2818 ps->min_cycle -= start_cycle;
2821 #endif /* INSN_SCHEDULING */
2823 static bool
2824 gate_handle_sms (void)
2826 return (optimize > 0 && flag_modulo_sched);
2830 /* Run instruction scheduler. */
2831 /* Perform SMS module scheduling. */
2832 static unsigned int
2833 rest_of_handle_sms (void)
2835 #ifdef INSN_SCHEDULING
2836 basic_block bb;
2838 /* Collect loop information to be used in SMS. */
2839 cfg_layout_initialize (0);
2840 sms_schedule ();
2842 /* Update the life information, because we add pseudos. */
2843 max_regno = max_reg_num ();
2845 /* Finalize layout changes. */
2846 FOR_EACH_BB (bb)
2847 if (bb->next_bb != EXIT_BLOCK_PTR)
2848 bb->aux = bb->next_bb;
2849 free_dominance_info (CDI_DOMINATORS);
2850 cfg_layout_finalize ();
2851 #endif /* INSN_SCHEDULING */
2852 return 0;
2855 struct tree_opt_pass pass_sms =
2857 "sms", /* name */
2858 gate_handle_sms, /* gate */
2859 rest_of_handle_sms, /* execute */
2860 NULL, /* sub */
2861 NULL, /* next */
2862 0, /* static_pass_number */
2863 TV_SMS, /* tv_id */
2864 0, /* properties_required */
2865 0, /* properties_provided */
2866 0, /* properties_destroyed */
2867 TODO_dump_func, /* todo_flags_start */
2868 TODO_df_finish | TODO_verify_rtl_sharing |
2869 TODO_dump_func |
2870 TODO_ggc_collect, /* todo_flags_finish */
2871 'm' /* letter */