1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "diagnostic-core.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
38 #include "sched-int.h"
40 #include "cfglayout.h"
48 #include "tree-pass.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops whose loop count can be easily
88 adjusted. This is because we peel a constant number of iterations
89 into a prologue and epilogue for which we want to avoid emitting
90 the control part, and a kernel which is to iterate that constant
91 number of iterations less than the original loop. So the control
92 part should be a set of insns clearly identified and having its
93 own iv, not otherwise used in the loop (at-least for now), which
94 initializes a register before the loop to the number of iterations.
95 Currently SMS relies on the do-loop pattern to recognize such loops,
96 where (1) the control part comprises of all insns defining and/or
97 using a certain 'count' register and (2) the loop count can be
98 adjusted by modifying this register prior to the loop.
99 TODO: Rely on cfgloop analysis instead. */
101 /* This page defines partial-schedule structures and functions for
102 modulo scheduling. */
104 typedef struct partial_schedule
*partial_schedule_ptr
;
105 typedef struct ps_insn
*ps_insn_ptr
;
107 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
108 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
111 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113 /* Perform signed modulo, always returning a non-negative value. */
114 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116 /* The number of different iterations the nodes in ps span, assuming
117 the stage boundaries are placed efficiently. */
118 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
120 /* The stage count of ps. */
121 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123 /* A single instruction in the partial schedule. */
126 /* The corresponding DDG_NODE. */
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row
,
139 /* Holds the partial schedule as an array of II rows. Each entry of the
140 array points to a linked list of PS_INSNs, which represents the
141 instructions that are scheduled for that row. */
142 struct partial_schedule
144 int ii
; /* Number of rows in the partial schedule. */
145 int history
; /* Threshold for conflict checking using DFA. */
147 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
150 /* rows_length[i] holds the number of instructions in the row.
151 It is used only (as an optimization) to back off quickly from
152 trying to schedule a node in a full row; that is, to avoid running
153 through futile DFA state transitions. */
156 /* The earliest absolute cycle of an insn in the partial schedule. */
159 /* The latest absolute cycle of an insn in the partial schedule. */
162 ddg_ptr g
; /* The DDG of the insns in the partial schedule. */
164 int stage_count
; /* The stage count of the partial schedule. */
167 /* We use this to record all the register replacements we do in
168 the kernel so we can undo SMS if it is not profitable. */
169 struct undo_replace_buff_elem
174 struct undo_replace_buff_elem
*next
;
179 static partial_schedule_ptr
create_partial_schedule (int ii
, ddg_ptr
, int history
);
180 static void free_partial_schedule (partial_schedule_ptr
);
181 static void reset_partial_schedule (partial_schedule_ptr
, int new_ii
);
182 void print_partial_schedule (partial_schedule_ptr
, FILE *);
183 static void verify_partial_schedule (partial_schedule_ptr
, sbitmap
);
184 static ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr
,
185 ddg_node_ptr node
, int cycle
,
186 sbitmap must_precede
,
187 sbitmap must_follow
);
188 static void rotate_partial_schedule (partial_schedule_ptr
, int);
189 void set_row_column_for_ps (partial_schedule_ptr
);
190 static void ps_insert_empty_row (partial_schedule_ptr
, int, sbitmap
);
191 static int compute_split_row (sbitmap
, int, int, int, ddg_node_ptr
);
194 /* This page defines constants and structures for the modulo scheduling
197 static int sms_order_nodes (ddg_ptr
, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr
);
199 static partial_schedule_ptr
sms_schedule_by_order (ddg_ptr
, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr
, rtx
);
201 static void generate_prolog_epilog (partial_schedule_ptr
, struct loop
*,
203 static void duplicate_insns_of_cycles (partial_schedule_ptr
,
205 static int calculate_stage_count (partial_schedule_ptr ps
);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap
; /* A lower-bound on the absolute scheduling cycle. */
220 int time
; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
228 /* The number of register-move instructions added, immediately preceding
232 int row
; /* Holds time % ii. */
233 int stage
; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
238 } *node_sched_params_ptr
;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
245 sms_print_insn (const_rtx insn
, int aligned ATTRIBUTE_UNUSED
)
249 sprintf (tmp
, "i%4d", INSN_UID (insn
));
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED
,
255 regset used ATTRIBUTE_UNUSED
)
259 static struct common_sched_info_def sms_common_sched_info
;
261 static struct sched_deps_info_def sms_sched_deps_info
=
263 compute_jump_reg_dependencies
,
264 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
269 static struct haifa_sched_info sms_sched_info
=
278 NULL
, /* insn_finishes_block_p */
283 NULL
, NULL
, NULL
, NULL
,
288 /* Given HEAD and TAIL which are the first and last insns in a loop;
289 return the register which controls the loop. Return zero if it has
290 more than one occurrence in the loop besides the control part or the
291 do-loop pattern is not of the form we expect. */
293 doloop_register_get (rtx head ATTRIBUTE_UNUSED
, rtx tail ATTRIBUTE_UNUSED
)
295 #ifdef HAVE_doloop_end
296 rtx reg
, condition
, insn
, first_insn_not_to_check
;
301 /* TODO: Free SMS's dependence on doloop_condition_get. */
302 condition
= doloop_condition_get (tail
);
306 if (REG_P (XEXP (condition
, 0)))
307 reg
= XEXP (condition
, 0);
308 else if (GET_CODE (XEXP (condition
, 0)) == PLUS
309 && REG_P (XEXP (XEXP (condition
, 0), 0)))
310 reg
= XEXP (XEXP (condition
, 0), 0);
314 /* Check that the COUNT_REG has no other occurrences in the loop
315 until the decrement. We assume the control part consists of
316 either a single (parallel) branch-on-count or a (non-parallel)
317 branch immediately preceded by a single (decrement) insn. */
318 first_insn_not_to_check
= (GET_CODE (PATTERN (tail
)) == PARALLEL
? tail
319 : prev_nondebug_insn (tail
));
321 for (insn
= head
; insn
!= first_insn_not_to_check
; insn
= NEXT_INSN (insn
))
322 if (!DEBUG_INSN_P (insn
) && reg_mentioned_p (reg
, insn
))
326 fprintf (dump_file
, "SMS count_reg found ");
327 print_rtl_single (dump_file
, reg
);
328 fprintf (dump_file
, " outside control in insn:\n");
329 print_rtl_single (dump_file
, insn
);
341 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
342 that the number of iterations is a compile-time constant. If so,
343 return the rtx that sets COUNT_REG to a constant, and set COUNT to
344 this constant. Otherwise return 0. */
346 const_iteration_count (rtx count_reg
, basic_block pre_header
,
347 HOST_WIDEST_INT
* count
)
355 get_ebb_head_tail (pre_header
, pre_header
, &head
, &tail
);
357 for (insn
= tail
; insn
!= PREV_INSN (head
); insn
= PREV_INSN (insn
))
358 if (NONDEBUG_INSN_P (insn
) && single_set (insn
) &&
359 rtx_equal_p (count_reg
, SET_DEST (single_set (insn
))))
361 rtx pat
= single_set (insn
);
363 if (CONST_INT_P (SET_SRC (pat
)))
365 *count
= INTVAL (SET_SRC (pat
));
375 /* A very simple resource-based lower bound on the initiation interval.
376 ??? Improve the accuracy of this bound by considering the
377 utilization of various units. */
381 if (targetm
.sched
.sms_res_mii
)
382 return targetm
.sched
.sms_res_mii (g
);
384 return ((g
->num_nodes
- g
->num_debug
) / issue_rate
);
388 /* Points to the array that contains the sched data for each node. */
389 static node_sched_params_ptr node_sched_params
;
391 /* Allocate sched_params for each node and initialize it. Assumes that
392 the aux field of each node contain the asap bound (computed earlier),
393 and copies it into the sched_params field. */
395 set_node_sched_params (ddg_ptr g
)
399 /* Allocate for each node in the DDG a place to hold the "sched_data". */
400 /* Initialize ASAP/ALAP/HIGHT to zero. */
401 node_sched_params
= (node_sched_params_ptr
)
402 xcalloc (g
->num_nodes
,
403 sizeof (struct node_sched_params
));
405 /* Set the pointer of the general data of the node to point to the
406 appropriate sched_params structure. */
407 for (i
= 0; i
< g
->num_nodes
; i
++)
409 /* Watch out for aliasing problems? */
410 node_sched_params
[i
].asap
= g
->nodes
[i
].aux
.count
;
411 g
->nodes
[i
].aux
.info
= &node_sched_params
[i
];
416 print_node_sched_params (FILE *file
, int num_nodes
, ddg_ptr g
)
422 for (i
= 0; i
< num_nodes
; i
++)
424 node_sched_params_ptr nsp
= &node_sched_params
[i
];
425 rtx reg_move
= nsp
->first_reg_move
;
428 fprintf (file
, "Node = %d; INSN = %d\n", i
,
429 (INSN_UID (g
->nodes
[i
].insn
)));
430 fprintf (file
, " asap = %d:\n", nsp
->asap
);
431 fprintf (file
, " time = %d:\n", nsp
->time
);
432 fprintf (file
, " nreg_moves = %d:\n", nsp
->nreg_moves
);
433 for (j
= 0; j
< nsp
->nreg_moves
; j
++)
435 fprintf (file
, " reg_move = ");
436 print_rtl_single (file
, reg_move
);
437 reg_move
= PREV_INSN (reg_move
);
443 Breaking intra-loop register anti-dependences:
444 Each intra-loop register anti-dependence implies a cross-iteration true
445 dependence of distance 1. Therefore, we can remove such false dependencies
446 and figure out if the partial schedule broke them by checking if (for a
447 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
448 if so generate a register move. The number of such moves is equal to:
449 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
450 nreg_moves = ----------------------------------- + 1 - { dependence.
453 static struct undo_replace_buff_elem
*
454 generate_reg_moves (partial_schedule_ptr ps
, bool rescan
)
459 struct undo_replace_buff_elem
*reg_move_replaces
= NULL
;
461 for (i
= 0; i
< g
->num_nodes
; i
++)
463 ddg_node_ptr u
= &g
->nodes
[i
];
465 int nreg_moves
= 0, i_reg_move
;
466 sbitmap
*uses_of_defs
;
468 rtx prev_reg
, old_reg
;
470 /* Compute the number of reg_moves needed for u, by looking at life
471 ranges started at u (excluding self-loops). */
472 for (e
= u
->out
; e
; e
= e
->next_out
)
473 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
475 int nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
477 if (e
->distance
== 1)
478 nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
480 /* If dest precedes src in the schedule of the kernel, then dest
481 will read before src writes and we can save one reg_copy. */
482 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
483 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
486 nreg_moves
= MAX (nreg_moves
, nreg_moves4e
);
492 /* Every use of the register defined by node may require a different
493 copy of this register, depending on the time the use is scheduled.
494 Set a bitmap vector, telling which nodes use each copy of this
496 uses_of_defs
= sbitmap_vector_alloc (nreg_moves
, g
->num_nodes
);
497 sbitmap_vector_zero (uses_of_defs
, nreg_moves
);
498 for (e
= u
->out
; e
; e
= e
->next_out
)
499 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
501 int dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
503 if (e
->distance
== 1)
504 dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
506 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
507 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
511 SET_BIT (uses_of_defs
[dest_copy
- 1], e
->dest
->cuid
);
514 /* Now generate the reg_moves, attaching relevant uses to them. */
515 SCHED_NREG_MOVES (u
) = nreg_moves
;
516 old_reg
= prev_reg
= copy_rtx (SET_DEST (single_set (u
->insn
)));
517 /* Insert the reg-moves right before the notes which precede
518 the insn they relates to. */
519 last_reg_move
= u
->first_note
;
521 for (i_reg_move
= 0; i_reg_move
< nreg_moves
; i_reg_move
++)
523 unsigned int i_use
= 0;
524 rtx new_reg
= gen_reg_rtx (GET_MODE (prev_reg
));
525 rtx reg_move
= gen_move_insn (new_reg
, prev_reg
);
526 sbitmap_iterator sbi
;
528 add_insn_before (reg_move
, last_reg_move
, NULL
);
529 last_reg_move
= reg_move
;
531 if (!SCHED_FIRST_REG_MOVE (u
))
532 SCHED_FIRST_REG_MOVE (u
) = reg_move
;
534 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs
[i_reg_move
], 0, i_use
, sbi
)
536 struct undo_replace_buff_elem
*rep
;
538 rep
= (struct undo_replace_buff_elem
*)
539 xcalloc (1, sizeof (struct undo_replace_buff_elem
));
540 rep
->insn
= g
->nodes
[i_use
].insn
;
541 rep
->orig_reg
= old_reg
;
542 rep
->new_reg
= new_reg
;
544 if (! reg_move_replaces
)
545 reg_move_replaces
= rep
;
548 rep
->next
= reg_move_replaces
;
549 reg_move_replaces
= rep
;
552 replace_rtx (g
->nodes
[i_use
].insn
, old_reg
, new_reg
);
554 df_insn_rescan (g
->nodes
[i_use
].insn
);
559 sbitmap_vector_free (uses_of_defs
);
561 return reg_move_replaces
;
564 /* Free memory allocated for the undo buffer. */
566 free_undo_replace_buff (struct undo_replace_buff_elem
*reg_move_replaces
)
569 while (reg_move_replaces
)
571 struct undo_replace_buff_elem
*rep
= reg_move_replaces
;
573 reg_move_replaces
= reg_move_replaces
->next
;
578 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
579 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
580 will move to cycle zero. */
582 reset_sched_times (partial_schedule_ptr ps
, int amount
)
586 ps_insn_ptr crr_insn
;
588 for (row
= 0; row
< ii
; row
++)
589 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
591 ddg_node_ptr u
= crr_insn
->node
;
592 int normalized_time
= SCHED_TIME (u
) - amount
;
593 int new_min_cycle
= PS_MIN_CYCLE (ps
) - amount
;
594 int sc_until_cycle_zero
, stage
;
598 /* Print the scheduling times after the rotation. */
599 fprintf (dump_file
, "crr_insn->node=%d (insn id %d), "
600 "crr_insn->cycle=%d, min_cycle=%d", crr_insn
->node
->cuid
,
601 INSN_UID (crr_insn
->node
->insn
), SCHED_TIME (u
),
603 if (JUMP_P (crr_insn
->node
->insn
))
604 fprintf (dump_file
, " (branch)");
605 fprintf (dump_file
, "\n");
608 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
609 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
610 SCHED_TIME (u
) = normalized_time
;
611 SCHED_ROW (u
) = SMODULO (normalized_time
, ii
);
613 /* The calculation of stage count is done adding the number
614 of stages before cycle zero and after cycle zero. */
615 sc_until_cycle_zero
= CALC_STAGE_COUNT (-1, new_min_cycle
, ii
);
617 if (SCHED_TIME (u
) < 0)
619 stage
= CALC_STAGE_COUNT (-1, SCHED_TIME (u
), ii
);
620 SCHED_STAGE (u
) = sc_until_cycle_zero
- stage
;
624 stage
= CALC_STAGE_COUNT (SCHED_TIME (u
), 0, ii
);
625 SCHED_STAGE (u
) = sc_until_cycle_zero
+ stage
- 1;
630 /* Set SCHED_COLUMN of each node according to its position in PS. */
632 set_columns_for_ps (partial_schedule_ptr ps
)
636 for (row
= 0; row
< ps
->ii
; row
++)
638 ps_insn_ptr cur_insn
= ps
->rows
[row
];
641 for (; cur_insn
; cur_insn
= cur_insn
->next_in_row
)
642 SCHED_COLUMN (cur_insn
->node
) = column
++;
646 /* Permute the insns according to their order in PS, from row 0 to
647 row ii-1, and position them right before LAST. This schedules
648 the insns of the loop kernel. */
650 permute_partial_schedule (partial_schedule_ptr ps
, rtx last
)
656 for (row
= 0; row
< ii
; row
++)
657 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
658 if (PREV_INSN (last
) != ps_ij
->node
->insn
)
659 reorder_insns_nobb (ps_ij
->node
->first_note
, ps_ij
->node
->insn
,
664 duplicate_insns_of_cycles (partial_schedule_ptr ps
, int from_stage
,
665 int to_stage
, int for_prolog
, rtx count_reg
)
670 for (row
= 0; row
< ps
->ii
; row
++)
671 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
673 ddg_node_ptr u_node
= ps_ij
->node
;
675 rtx reg_move
= NULL_RTX
;
677 /* Do not duplicate any insn which refers to count_reg as it
678 belongs to the control part.
679 The closing branch is scheduled as well and thus should
681 TODO: This should be done by analyzing the control part of
683 if (reg_mentioned_p (count_reg
, u_node
->insn
)
684 || JUMP_P (ps_ij
->node
->insn
))
689 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
690 number of reg_moves starting with the second occurrence of
691 u_node, which is generated if its SCHED_STAGE <= to_stage. */
692 i_reg_moves
= to_stage
- SCHED_STAGE (u_node
) + 1;
693 i_reg_moves
= MAX (i_reg_moves
, 0);
694 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
696 /* The reg_moves start from the *first* reg_move backwards. */
699 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
700 for (j
= 1; j
< i_reg_moves
; j
++)
701 reg_move
= PREV_INSN (reg_move
);
704 else /* It's for the epilog. */
706 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
707 starting to decrease one stage after u_node no longer occurs;
708 that is, generate all reg_moves until
709 SCHED_STAGE (u_node) == from_stage - 1. */
710 i_reg_moves
= SCHED_NREG_MOVES (u_node
)
711 - (from_stage
- SCHED_STAGE (u_node
) - 1);
712 i_reg_moves
= MAX (i_reg_moves
, 0);
713 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
715 /* The reg_moves start from the *last* reg_move forwards. */
718 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
719 for (j
= 1; j
< SCHED_NREG_MOVES (u_node
); j
++)
720 reg_move
= PREV_INSN (reg_move
);
724 for (j
= 0; j
< i_reg_moves
; j
++, reg_move
= NEXT_INSN (reg_move
))
725 emit_insn (copy_rtx (PATTERN (reg_move
)));
726 if (SCHED_STAGE (u_node
) >= from_stage
727 && SCHED_STAGE (u_node
) <= to_stage
)
728 duplicate_insn_chain (u_node
->first_note
, u_node
->insn
);
733 /* Generate the instructions (including reg_moves) for prolog & epilog. */
735 generate_prolog_epilog (partial_schedule_ptr ps
, struct loop
*loop
,
736 rtx count_reg
, rtx count_init
)
739 int last_stage
= PS_STAGE_COUNT (ps
) - 1;
742 /* Generate the prolog, inserting its insns on the loop-entry edge. */
747 /* Generate instructions at the beginning of the prolog to
748 adjust the loop count by STAGE_COUNT. If loop count is constant
749 (count_init), this constant is adjusted by STAGE_COUNT in
750 generate_prolog_epilog function. */
751 rtx sub_reg
= NULL_RTX
;
753 sub_reg
= expand_simple_binop (GET_MODE (count_reg
), MINUS
,
754 count_reg
, GEN_INT (last_stage
),
755 count_reg
, 1, OPTAB_DIRECT
);
756 gcc_assert (REG_P (sub_reg
));
757 if (REGNO (sub_reg
) != REGNO (count_reg
))
758 emit_move_insn (count_reg
, sub_reg
);
761 for (i
= 0; i
< last_stage
; i
++)
762 duplicate_insns_of_cycles (ps
, 0, i
, 1, count_reg
);
764 /* Put the prolog on the entry edge. */
765 e
= loop_preheader_edge (loop
);
766 split_edge_and_insert (e
, get_insns ());
770 /* Generate the epilog, inserting its insns on the loop-exit edge. */
773 for (i
= 0; i
< last_stage
; i
++)
774 duplicate_insns_of_cycles (ps
, i
+ 1, last_stage
, 0, count_reg
);
776 /* Put the epilogue on the exit edge. */
777 gcc_assert (single_exit (loop
));
778 e
= single_exit (loop
);
779 split_edge_and_insert (e
, get_insns ());
783 /* Return true if all the BBs of the loop are empty except the
786 loop_single_full_bb_p (struct loop
*loop
)
789 basic_block
*bbs
= get_loop_body (loop
);
791 for (i
= 0; i
< loop
->num_nodes
; i
++)
794 bool empty_bb
= true;
796 if (bbs
[i
] == loop
->header
)
799 /* Make sure that basic blocks other than the header
800 have only notes labels or jumps. */
801 get_ebb_head_tail (bbs
[i
], bbs
[i
], &head
, &tail
);
802 for (; head
!= NEXT_INSN (tail
); head
= NEXT_INSN (head
))
804 if (NOTE_P (head
) || LABEL_P (head
)
805 || (INSN_P (head
) && (DEBUG_INSN_P (head
) || JUMP_P (head
))))
821 /* A simple loop from SMS point of view; it is a loop that is composed of
822 either a single basic block or two BBs - a header and a latch. */
823 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
824 && (EDGE_COUNT (loop->latch->preds) == 1) \
825 && (EDGE_COUNT (loop->latch->succs) == 1))
827 /* Return true if the loop is in its canonical form and false if not.
828 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
830 loop_canon_p (struct loop
*loop
)
833 if (loop
->inner
|| !loop_outer (loop
))
836 fprintf (dump_file
, "SMS loop inner or !loop_outer\n");
840 if (!single_exit (loop
))
844 rtx insn
= BB_END (loop
->header
);
846 fprintf (dump_file
, "SMS loop many exits ");
847 fprintf (dump_file
, " %s %d (file, line)\n",
848 insn_file (insn
), insn_line (insn
));
853 if (! SIMPLE_SMS_LOOP_P (loop
) && ! loop_single_full_bb_p (loop
))
857 rtx insn
= BB_END (loop
->header
);
859 fprintf (dump_file
, "SMS loop many BBs. ");
860 fprintf (dump_file
, " %s %d (file, line)\n",
861 insn_file (insn
), insn_line (insn
));
869 /* If there are more than one entry for the loop,
870 make it one by splitting the first entry edge and
871 redirecting the others to the new BB. */
873 canon_loop (struct loop
*loop
)
878 /* Avoid annoying special cases of edges going to exit
880 FOR_EACH_EDGE (e
, i
, EXIT_BLOCK_PTR
->preds
)
881 if ((e
->flags
& EDGE_FALLTHRU
) && (EDGE_COUNT (e
->src
->succs
) > 1))
884 if (loop
->latch
== loop
->header
885 || EDGE_COUNT (loop
->latch
->succs
) > 1)
887 FOR_EACH_EDGE (e
, i
, loop
->header
->preds
)
888 if (e
->src
== loop
->latch
)
896 setup_sched_infos (void)
898 memcpy (&sms_common_sched_info
, &haifa_common_sched_info
,
899 sizeof (sms_common_sched_info
));
900 sms_common_sched_info
.sched_pass_id
= SCHED_SMS_PASS
;
901 common_sched_info
= &sms_common_sched_info
;
903 sched_deps_info
= &sms_sched_deps_info
;
904 current_sched_info
= &sms_sched_info
;
907 /* Probability in % that the sms-ed loop rolls enough so that optimized
908 version may be entered. Just a guess. */
909 #define PROB_SMS_ENOUGH_ITERATIONS 80
911 /* Used to calculate the upper bound of ii. */
912 #define MAXII_FACTOR 2
914 /* Main entry point, perform SMS scheduling on the loops of the function
915 that consist of single basic blocks. */
924 partial_schedule_ptr ps
;
925 basic_block bb
= NULL
;
927 basic_block condition_bb
= NULL
;
929 gcov_type trip_count
= 0;
931 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
932 | LOOPS_HAVE_RECORDED_EXITS
);
933 if (number_of_loops () <= 1)
935 loop_optimizer_finalize ();
936 return; /* There are no loops to schedule. */
939 /* Initialize issue_rate. */
940 if (targetm
.sched
.issue_rate
)
942 int temp
= reload_completed
;
944 reload_completed
= 1;
945 issue_rate
= targetm
.sched
.issue_rate ();
946 reload_completed
= temp
;
951 /* Initialize the scheduler. */
952 setup_sched_infos ();
955 /* Allocate memory to hold the DDG array one entry for each loop.
956 We use loop->num as index into this array. */
957 g_arr
= XCNEWVEC (ddg_ptr
, number_of_loops ());
961 fprintf (dump_file
, "\n\nSMS analysis phase\n");
962 fprintf (dump_file
, "===================\n\n");
965 /* Build DDGs for all the relevant loops and hold them in G_ARR
966 indexed by the loop index. */
967 FOR_EACH_LOOP (li
, loop
, 0)
973 if (dbg_cnt (sms_sched_loop
) == false)
976 fprintf (dump_file
, "SMS reached max limit... \n");
983 rtx insn
= BB_END (loop
->header
);
985 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
986 loop
->num
, insn_file (insn
), insn_line (insn
));
990 if (! loop_canon_p (loop
))
993 if (! loop_single_full_bb_p (loop
))
996 fprintf (dump_file
, "SMS not loop_single_full_bb_p\n");
1002 get_ebb_head_tail (bb
, bb
, &head
, &tail
);
1003 latch_edge
= loop_latch_edge (loop
);
1004 gcc_assert (single_exit (loop
));
1005 if (single_exit (loop
)->count
)
1006 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
1008 /* Perform SMS only on loops that their average count is above threshold. */
1010 if ( latch_edge
->count
1011 && (latch_edge
->count
< single_exit (loop
)->count
* SMS_LOOP_AVERAGE_COUNT_THRESHOLD
))
1015 fprintf (dump_file
, " %s %d (file, line)\n",
1016 insn_file (tail
), insn_line (tail
));
1017 fprintf (dump_file
, "SMS single-bb-loop\n");
1018 if (profile_info
&& flag_branch_probabilities
)
1020 fprintf (dump_file
, "SMS loop-count ");
1021 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1022 (HOST_WIDEST_INT
) bb
->count
);
1023 fprintf (dump_file
, "\n");
1024 fprintf (dump_file
, "SMS trip-count ");
1025 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1026 (HOST_WIDEST_INT
) trip_count
);
1027 fprintf (dump_file
, "\n");
1028 fprintf (dump_file
, "SMS profile-sum-max ");
1029 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1030 (HOST_WIDEST_INT
) profile_info
->sum_max
);
1031 fprintf (dump_file
, "\n");
1037 /* Make sure this is a doloop. */
1038 if ( !(count_reg
= doloop_register_get (head
, tail
)))
1041 fprintf (dump_file
, "SMS doloop_register_get failed\n");
1045 /* Don't handle BBs with calls or barriers or auto-increment insns
1046 (to avoid creating invalid reg-moves for the auto-increment insns),
1047 or !single_set with the exception of instructions that include
1048 count_reg---these instructions are part of the control part
1049 that do-loop recognizes.
1050 ??? Should handle auto-increment insns.
1051 ??? Should handle insns defining subregs. */
1052 for (insn
= head
; insn
!= NEXT_INSN (tail
); insn
= NEXT_INSN (insn
))
1058 || (NONDEBUG_INSN_P (insn
) && !JUMP_P (insn
)
1059 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
1060 && !reg_mentioned_p (count_reg
, insn
))
1061 || (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1062 || (INSN_P (insn
) && (set
= single_set (insn
))
1063 && GET_CODE (SET_DEST (set
)) == SUBREG
))
1067 if (insn
!= NEXT_INSN (tail
))
1072 fprintf (dump_file
, "SMS loop-with-call\n");
1073 else if (BARRIER_P (insn
))
1074 fprintf (dump_file
, "SMS loop-with-barrier\n");
1075 else if (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1076 fprintf (dump_file
, "SMS reg inc\n");
1077 else if ((NONDEBUG_INSN_P (insn
) && !JUMP_P (insn
)
1078 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
))
1079 fprintf (dump_file
, "SMS loop-with-not-single-set\n");
1081 fprintf (dump_file
, "SMS loop with subreg in lhs\n");
1082 print_rtl_single (dump_file
, insn
);
1088 /* Always schedule the closing branch with the rest of the
1089 instructions. The branch is rotated to be in row ii-1 at the
1090 end of the scheduling procedure to make sure it's the last
1091 instruction in the iteration. */
1092 if (! (g
= create_ddg (bb
, 1)))
1095 fprintf (dump_file
, "SMS create_ddg failed\n");
1099 g_arr
[loop
->num
] = g
;
1101 fprintf (dump_file
, "...OK\n");
1106 fprintf (dump_file
, "\nSMS transformation phase\n");
1107 fprintf (dump_file
, "=========================\n\n");
1110 /* We don't want to perform SMS on new loops - created by versioning. */
1111 FOR_EACH_LOOP (li
, loop
, 0)
1114 rtx count_reg
, count_init
;
1116 unsigned stage_count
= 0;
1117 HOST_WIDEST_INT loop_count
= 0;
1119 if (! (g
= g_arr
[loop
->num
]))
1124 rtx insn
= BB_END (loop
->header
);
1126 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
1127 loop
->num
, insn_file (insn
), insn_line (insn
));
1129 print_ddg (dump_file
, g
);
1132 get_ebb_head_tail (loop
->header
, loop
->header
, &head
, &tail
);
1134 latch_edge
= loop_latch_edge (loop
);
1135 gcc_assert (single_exit (loop
));
1136 if (single_exit (loop
)->count
)
1137 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
1141 fprintf (dump_file
, " %s %d (file, line)\n",
1142 insn_file (tail
), insn_line (tail
));
1143 fprintf (dump_file
, "SMS single-bb-loop\n");
1144 if (profile_info
&& flag_branch_probabilities
)
1146 fprintf (dump_file
, "SMS loop-count ");
1147 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1148 (HOST_WIDEST_INT
) bb
->count
);
1149 fprintf (dump_file
, "\n");
1150 fprintf (dump_file
, "SMS profile-sum-max ");
1151 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1152 (HOST_WIDEST_INT
) profile_info
->sum_max
);
1153 fprintf (dump_file
, "\n");
1155 fprintf (dump_file
, "SMS doloop\n");
1156 fprintf (dump_file
, "SMS built-ddg %d\n", g
->num_nodes
);
1157 fprintf (dump_file
, "SMS num-loads %d\n", g
->num_loads
);
1158 fprintf (dump_file
, "SMS num-stores %d\n", g
->num_stores
);
1162 /* In case of th loop have doloop register it gets special
1164 count_init
= NULL_RTX
;
1165 if ((count_reg
= doloop_register_get (head
, tail
)))
1167 basic_block pre_header
;
1169 pre_header
= loop_preheader_edge (loop
)->src
;
1170 count_init
= const_iteration_count (count_reg
, pre_header
,
1173 gcc_assert (count_reg
);
1175 if (dump_file
&& count_init
)
1177 fprintf (dump_file
, "SMS const-doloop ");
1178 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1180 fprintf (dump_file
, "\n");
1183 node_order
= XNEWVEC (int, g
->num_nodes
);
1185 mii
= 1; /* Need to pass some estimate of mii. */
1186 rec_mii
= sms_order_nodes (g
, mii
, node_order
, &max_asap
);
1187 mii
= MAX (res_MII (g
), rec_mii
);
1188 maxii
= MAX (max_asap
, MAXII_FACTOR
* mii
);
1191 fprintf (dump_file
, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1192 rec_mii
, mii
, maxii
);
1194 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1196 set_node_sched_params (g
);
1198 ps
= sms_schedule_by_order (g
, mii
, maxii
, node_order
);
1202 stage_count
= calculate_stage_count (ps
);
1203 gcc_assert(stage_count
>= 1);
1204 PS_STAGE_COUNT(ps
) = stage_count
;
1207 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1208 1 means that there is no interleaving between iterations thus
1209 we let the scheduling passes do the job in this case. */
1210 if (stage_count
< (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC
)
1211 || (count_init
&& (loop_count
<= stage_count
))
1212 || (flag_branch_probabilities
&& (trip_count
<= stage_count
)))
1216 fprintf (dump_file
, "SMS failed... \n");
1217 fprintf (dump_file
, "SMS sched-failed (stage-count=%d, loop-count=", stage_count
);
1218 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, loop_count
);
1219 fprintf (dump_file
, ", trip-count=");
1220 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, trip_count
);
1221 fprintf (dump_file
, ")\n");
1226 struct undo_replace_buff_elem
*reg_move_replaces
;
1227 int amount
= SCHED_TIME (g
->closing_branch
) + 1;
1229 /* Set the stage boundaries. The closing_branch was scheduled
1230 and should appear in the last (ii-1) row. */
1231 reset_sched_times (ps
, amount
);
1232 rotate_partial_schedule (ps
, amount
);
1233 set_columns_for_ps (ps
);
1240 "SMS succeeded %d %d (with ii, sc)\n", ps
->ii
,
1242 print_partial_schedule (ps
, dump_file
);
1245 /* case the BCT count is not known , Do loop-versioning */
1246 if (count_reg
&& ! count_init
)
1248 rtx comp_rtx
= gen_rtx_fmt_ee (GT
, VOIDmode
, count_reg
,
1249 GEN_INT(stage_count
));
1250 unsigned prob
= (PROB_SMS_ENOUGH_ITERATIONS
1251 * REG_BR_PROB_BASE
) / 100;
1253 loop_version (loop
, comp_rtx
, &condition_bb
,
1254 prob
, prob
, REG_BR_PROB_BASE
- prob
,
1258 /* Set new iteration count of loop kernel. */
1259 if (count_reg
&& count_init
)
1260 SET_SRC (single_set (count_init
)) = GEN_INT (loop_count
1263 /* Now apply the scheduled kernel to the RTL of the loop. */
1264 permute_partial_schedule (ps
, g
->closing_branch
->first_note
);
1266 /* Mark this loop as software pipelined so the later
1267 scheduling passes doesn't touch it. */
1268 if (! flag_resched_modulo_sched
)
1269 g
->bb
->flags
|= BB_DISABLE_SCHEDULE
;
1270 /* The life-info is not valid any more. */
1271 df_set_bb_dirty (g
->bb
);
1273 reg_move_replaces
= generate_reg_moves (ps
, true);
1275 print_node_sched_params (dump_file
, g
->num_nodes
, g
);
1276 /* Generate prolog and epilog. */
1277 generate_prolog_epilog (ps
, loop
, count_reg
, count_init
);
1279 free_undo_replace_buff (reg_move_replaces
);
1282 free_partial_schedule (ps
);
1283 free (node_sched_params
);
1290 /* Release scheduler data, needed until now because of DFA. */
1291 haifa_sched_finish ();
1292 loop_optimizer_finalize ();
1295 /* The SMS scheduling algorithm itself
1296 -----------------------------------
1297 Input: 'O' an ordered list of insns of a loop.
1298 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1300 'Q' is the empty Set
1301 'PS' is the partial schedule; it holds the currently scheduled nodes with
1303 'PSP' previously scheduled predecessors.
1304 'PSS' previously scheduled successors.
1305 't(u)' the cycle where u is scheduled.
1306 'l(u)' is the latency of u.
1307 'd(v,u)' is the dependence distance from v to u.
1308 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1309 the node ordering phase.
1310 'check_hardware_resources_conflicts(u, PS, c)'
1311 run a trace around cycle/slot through DFA model
1312 to check resource conflicts involving instruction u
1313 at cycle c given the partial schedule PS.
1314 'add_to_partial_schedule_at_time(u, PS, c)'
1315 Add the node/instruction u to the partial schedule
1317 'calculate_register_pressure(PS)'
1318 Given a schedule of instructions, calculate the register
1319 pressure it implies. One implementation could be the
1320 maximum number of overlapping live ranges.
1321 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1322 registers available in the hardware.
1326 3. for each node u in O in pre-computed order
1327 4. if (PSP(u) != Q && PSS(u) == Q) then
1328 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1329 6. start = Early_start; end = Early_start + II - 1; step = 1
1330 11. else if (PSP(u) == Q && PSS(u) != Q) then
1331 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1332 13. start = Late_start; end = Late_start - II + 1; step = -1
1333 14. else if (PSP(u) != Q && PSS(u) != Q) then
1334 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1335 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1336 17. start = Early_start;
1337 18. end = min(Early_start + II - 1 , Late_start);
1339 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1340 21. start = ASAP(u); end = start + II - 1; step = 1
1344 24. for (c = start ; c != end ; c += step)
1345 25. if check_hardware_resources_conflicts(u, PS, c) then
1346 26. add_to_partial_schedule_at_time(u, PS, c)
1351 31. if (success == false) then
1353 33. if (II > maxII) then
1354 34. finish - failed to schedule
1359 39. if (calculate_register_pressure(PS) > maxRP) then
1362 42. compute epilogue & prologue
1363 43. finish - succeeded to schedule
1366 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1367 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1368 set to 0 to save compile time. */
1369 #define DFA_HISTORY SMS_DFA_HISTORY
1371 /* A threshold for the number of repeated unsuccessful attempts to insert
1372 an empty row, before we flush the partial schedule and start over. */
1373 #define MAX_SPLIT_NUM 10
1374 /* Given the partial schedule PS, this function calculates and returns the
1375 cycles in which we can schedule the node with the given index I.
1376 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1377 noticed that there are several cases in which we fail to SMS the loop
1378 because the sched window of a node is empty due to tight data-deps. In
1379 such cases we want to unschedule some of the predecessors/successors
1380 until we get non-empty scheduling window. It returns -1 if the
1381 scheduling window is empty and zero otherwise. */
1384 get_sched_window (partial_schedule_ptr ps
, int *nodes_order
, int i
,
1385 sbitmap sched_nodes
, int ii
, int *start_p
, int *step_p
, int *end_p
)
1387 int start
, step
, end
;
1389 int u
= nodes_order
[i
];
1390 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1391 sbitmap psp
= sbitmap_alloc (ps
->g
->num_nodes
);
1392 sbitmap pss
= sbitmap_alloc (ps
->g
->num_nodes
);
1393 sbitmap u_node_preds
= NODE_PREDECESSORS (u_node
);
1394 sbitmap u_node_succs
= NODE_SUCCESSORS (u_node
);
1398 /* 1. compute sched window for u (start, end, step). */
1401 psp_not_empty
= sbitmap_a_and_b_cg (psp
, u_node_preds
, sched_nodes
);
1402 pss_not_empty
= sbitmap_a_and_b_cg (pss
, u_node_succs
, sched_nodes
);
1404 if (psp_not_empty
&& !pss_not_empty
)
1406 int early_start
= INT_MIN
;
1409 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1411 ddg_node_ptr v_node
= e
->src
;
1415 fprintf (dump_file
, "\nProcessing edge: ");
1416 print_ddg_edge (dump_file
, e
);
1418 "\nScheduling %d (%d) in psp_not_empty,"
1419 " checking p %d (%d): ", u_node
->cuid
,
1420 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1424 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1426 int p_st
= SCHED_TIME (v_node
);
1429 MAX (early_start
, p_st
+ e
->latency
- (e
->distance
* ii
));
1433 "pred st = %d; early_start = %d; latency: %d",
1434 p_st
, early_start
, e
->latency
);
1436 if (e
->data_type
== MEM_DEP
)
1437 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1440 fprintf (dump_file
, "the node is not scheduled\n");
1442 start
= early_start
;
1443 end
= MIN (end
, early_start
+ ii
);
1444 /* Schedule the node close to it's predecessors. */
1449 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1450 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1453 else if (!psp_not_empty
&& pss_not_empty
)
1455 int late_start
= INT_MAX
;
1458 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1460 ddg_node_ptr v_node
= e
->dest
;
1464 fprintf (dump_file
, "\nProcessing edge:");
1465 print_ddg_edge (dump_file
, e
);
1467 "\nScheduling %d (%d) in pss_not_empty,"
1468 " checking s %d (%d): ", u_node
->cuid
,
1469 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1473 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1475 int s_st
= SCHED_TIME (v_node
);
1477 late_start
= MIN (late_start
,
1478 s_st
- e
->latency
+ (e
->distance
* ii
));
1482 "succ st = %d; late_start = %d; latency = %d",
1483 s_st
, late_start
, e
->latency
);
1485 if (e
->data_type
== MEM_DEP
)
1486 end
= MAX (end
, SCHED_TIME (v_node
) - ii
+ 1);
1488 fprintf (dump_file
, "end = %d\n", end
);
1492 fprintf (dump_file
, "the node is not scheduled\n");
1496 end
= MAX (end
, late_start
- ii
);
1497 /* Schedule the node close to it's successors. */
1502 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1503 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1507 else if (psp_not_empty
&& pss_not_empty
)
1509 int early_start
= INT_MIN
;
1510 int late_start
= INT_MAX
;
1511 int count_preds
= 0;
1512 int count_succs
= 0;
1516 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1518 ddg_node_ptr v_node
= e
->src
;
1522 fprintf (dump_file
, "\nProcessing edge:");
1523 print_ddg_edge (dump_file
, e
);
1525 "\nScheduling %d (%d) in psp_pss_not_empty,"
1526 " checking p %d (%d): ", u_node
->cuid
, INSN_UID
1527 (u_node
->insn
), v_node
->cuid
, INSN_UID
1531 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1533 int p_st
= SCHED_TIME (v_node
);
1535 early_start
= MAX (early_start
,
1537 - (e
->distance
* ii
));
1541 "pred st = %d; early_start = %d; latency = %d",
1542 p_st
, early_start
, e
->latency
);
1544 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1547 if (e
->data_type
== MEM_DEP
)
1548 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1551 fprintf (dump_file
, "the node is not scheduled\n");
1554 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1556 ddg_node_ptr v_node
= e
->dest
;
1560 fprintf (dump_file
, "\nProcessing edge:");
1561 print_ddg_edge (dump_file
, e
);
1563 "\nScheduling %d (%d) in psp_pss_not_empty,"
1564 " checking s %d (%d): ", u_node
->cuid
, INSN_UID
1565 (u_node
->insn
), v_node
->cuid
, INSN_UID
1569 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1571 int s_st
= SCHED_TIME (v_node
);
1573 late_start
= MIN (late_start
,
1575 + (e
->distance
* ii
));
1579 "succ st = %d; late_start = %d; latency = %d",
1580 s_st
, late_start
, e
->latency
);
1582 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1585 if (e
->data_type
== MEM_DEP
)
1586 start
= MAX (start
, SCHED_TIME (v_node
) - ii
+ 1);
1589 fprintf (dump_file
, "the node is not scheduled\n");
1592 start
= MAX (start
, early_start
);
1593 end
= MIN (end
, MIN (early_start
+ ii
, late_start
+ 1));
1595 /* If there are more successors than predecessors schedule the
1596 node close to it's successors. */
1597 if (count_succs
>= count_preds
)
1599 int old_start
= start
;
1602 end
= old_start
- 1;
1606 else /* psp is empty && pss is empty. */
1608 start
= SCHED_ASAP (u_node
);
1619 if ((start
>= end
&& step
== 1) || (start
<= end
&& step
== -1))
1622 fprintf (dump_file
, "\nEmpty window: start=%d, end=%d, step=%d\n",
1630 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1631 node currently been scheduled. At the end of the calculation
1632 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1633 U_NODE which are (1) already scheduled in the first/last row of
1634 U_NODE's scheduling window, (2) whose dependence inequality with U
1635 becomes an equality when U is scheduled in this same row, and (3)
1636 whose dependence latency is zero.
1638 The first and last rows are calculated using the following parameters:
1639 START/END rows - The cycles that begins/ends the traversal on the window;
1640 searching for an empty cycle to schedule U_NODE.
1641 STEP - The direction in which we traverse the window.
1642 II - The initiation interval. */
1645 calculate_must_precede_follow (ddg_node_ptr u_node
, int start
, int end
,
1646 int step
, int ii
, sbitmap sched_nodes
,
1647 sbitmap must_precede
, sbitmap must_follow
)
1650 int first_cycle_in_window
, last_cycle_in_window
;
1652 gcc_assert (must_precede
&& must_follow
);
1654 /* Consider the following scheduling window:
1655 {first_cycle_in_window, first_cycle_in_window+1, ...,
1656 last_cycle_in_window}. If step is 1 then the following will be
1657 the order we traverse the window: {start=first_cycle_in_window,
1658 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1659 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1660 end=first_cycle_in_window-1} if step is -1. */
1661 first_cycle_in_window
= (step
== 1) ? start
: end
- step
;
1662 last_cycle_in_window
= (step
== 1) ? end
- step
: start
;
1664 sbitmap_zero (must_precede
);
1665 sbitmap_zero (must_follow
);
1668 fprintf (dump_file
, "\nmust_precede: ");
1670 /* Instead of checking if:
1671 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1672 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1673 first_cycle_in_window)
1675 we use the fact that latency is non-negative:
1676 SCHED_TIME (e->src) - (e->distance * ii) <=
1677 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1678 first_cycle_in_window
1680 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1681 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1682 if (TEST_BIT (sched_nodes
, e
->src
->cuid
)
1683 && ((SCHED_TIME (e
->src
) - (e
->distance
* ii
)) ==
1684 first_cycle_in_window
))
1687 fprintf (dump_file
, "%d ", e
->src
->cuid
);
1689 SET_BIT (must_precede
, e
->src
->cuid
);
1693 fprintf (dump_file
, "\nmust_follow: ");
1695 /* Instead of checking if:
1696 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1697 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1698 last_cycle_in_window)
1700 we use the fact that latency is non-negative:
1701 SCHED_TIME (e->dest) + (e->distance * ii) >=
1702 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1703 last_cycle_in_window
1705 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1706 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1707 if (TEST_BIT (sched_nodes
, e
->dest
->cuid
)
1708 && ((SCHED_TIME (e
->dest
) + (e
->distance
* ii
)) ==
1709 last_cycle_in_window
))
1712 fprintf (dump_file
, "%d ", e
->dest
->cuid
);
1714 SET_BIT (must_follow
, e
->dest
->cuid
);
1718 fprintf (dump_file
, "\n");
1721 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1722 parameters to decide if that's possible:
1723 PS - The partial schedule.
1724 U - The serial number of U_NODE.
1725 NUM_SPLITS - The number of row splits made so far.
1726 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1727 the first row of the scheduling window)
1728 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1729 last row of the scheduling window) */
1732 try_scheduling_node_in_cycle (partial_schedule_ptr ps
, ddg_node_ptr u_node
,
1733 int u
, int cycle
, sbitmap sched_nodes
,
1734 int *num_splits
, sbitmap must_precede
,
1735 sbitmap must_follow
)
1740 verify_partial_schedule (ps
, sched_nodes
);
1741 psi
= ps_add_node_check_conflicts (ps
, u_node
, cycle
,
1742 must_precede
, must_follow
);
1745 SCHED_TIME (u_node
) = cycle
;
1746 SET_BIT (sched_nodes
, u
);
1750 fprintf (dump_file
, "Scheduled w/o split in %d\n", cycle
);
1757 /* This function implements the scheduling algorithm for SMS according to the
1759 static partial_schedule_ptr
1760 sms_schedule_by_order (ddg_ptr g
, int mii
, int maxii
, int *nodes_order
)
1763 int i
, c
, success
, num_splits
= 0;
1764 int flush_and_start_over
= true;
1765 int num_nodes
= g
->num_nodes
;
1766 int start
, end
, step
; /* Place together into one struct? */
1767 sbitmap sched_nodes
= sbitmap_alloc (num_nodes
);
1768 sbitmap must_precede
= sbitmap_alloc (num_nodes
);
1769 sbitmap must_follow
= sbitmap_alloc (num_nodes
);
1770 sbitmap tobe_scheduled
= sbitmap_alloc (num_nodes
);
1772 partial_schedule_ptr ps
= create_partial_schedule (ii
, g
, DFA_HISTORY
);
1774 sbitmap_ones (tobe_scheduled
);
1775 sbitmap_zero (sched_nodes
);
1777 while (flush_and_start_over
&& (ii
< maxii
))
1781 fprintf (dump_file
, "Starting with ii=%d\n", ii
);
1782 flush_and_start_over
= false;
1783 sbitmap_zero (sched_nodes
);
1785 for (i
= 0; i
< num_nodes
; i
++)
1787 int u
= nodes_order
[i
];
1788 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1789 rtx insn
= u_node
->insn
;
1791 if (!NONDEBUG_INSN_P (insn
))
1793 RESET_BIT (tobe_scheduled
, u
);
1797 if (TEST_BIT (sched_nodes
, u
))
1800 /* Try to get non-empty scheduling window. */
1802 if (get_sched_window (ps
, nodes_order
, i
, sched_nodes
, ii
, &start
,
1806 fprintf (dump_file
, "\nTrying to schedule node %d \
1807 INSN = %d in (%d .. %d) step %d\n", u
, (INSN_UID
1808 (g
->nodes
[u
].insn
)), start
, end
, step
);
1810 gcc_assert ((step
> 0 && start
< end
)
1811 || (step
< 0 && start
> end
));
1813 calculate_must_precede_follow (u_node
, start
, end
, step
, ii
,
1814 sched_nodes
, must_precede
,
1817 for (c
= start
; c
!= end
; c
+= step
)
1819 sbitmap tmp_precede
= NULL
;
1820 sbitmap tmp_follow
= NULL
;
1825 tmp_precede
= must_precede
;
1826 else /* step == -1. */
1827 tmp_follow
= must_follow
;
1829 if (c
== end
- step
)
1832 tmp_follow
= must_follow
;
1833 else /* step == -1. */
1834 tmp_precede
= must_precede
;
1838 try_scheduling_node_in_cycle (ps
, u_node
, u
, c
,
1840 &num_splits
, tmp_precede
,
1846 verify_partial_schedule (ps
, sched_nodes
);
1855 if (num_splits
>= MAX_SPLIT_NUM
)
1858 flush_and_start_over
= true;
1859 verify_partial_schedule (ps
, sched_nodes
);
1860 reset_partial_schedule (ps
, ii
);
1861 verify_partial_schedule (ps
, sched_nodes
);
1866 /* The scheduling window is exclusive of 'end'
1867 whereas compute_split_window() expects an inclusive,
1870 split_row
= compute_split_row (sched_nodes
, start
, end
- 1,
1873 split_row
= compute_split_row (sched_nodes
, end
+ 1, start
,
1876 ps_insert_empty_row (ps
, split_row
, sched_nodes
);
1877 i
--; /* Go back and retry node i. */
1880 fprintf (dump_file
, "num_splits=%d\n", num_splits
);
1883 /* ??? If (success), check register pressure estimates. */
1884 } /* Continue with next node. */
1885 } /* While flush_and_start_over. */
1888 free_partial_schedule (ps
);
1892 gcc_assert (sbitmap_equal (tobe_scheduled
, sched_nodes
));
1894 sbitmap_free (sched_nodes
);
1895 sbitmap_free (must_precede
);
1896 sbitmap_free (must_follow
);
1897 sbitmap_free (tobe_scheduled
);
1902 /* This function inserts a new empty row into PS at the position
1903 according to SPLITROW, keeping all already scheduled instructions
1904 intact and updating their SCHED_TIME and cycle accordingly. */
1906 ps_insert_empty_row (partial_schedule_ptr ps
, int split_row
,
1907 sbitmap sched_nodes
)
1909 ps_insn_ptr crr_insn
;
1910 ps_insn_ptr
*rows_new
;
1912 int new_ii
= ii
+ 1;
1914 int *rows_length_new
;
1916 verify_partial_schedule (ps
, sched_nodes
);
1918 /* We normalize sched_time and rotate ps to have only non-negative sched
1919 times, for simplicity of updating cycles after inserting new row. */
1920 split_row
-= ps
->min_cycle
;
1921 split_row
= SMODULO (split_row
, ii
);
1923 fprintf (dump_file
, "split_row=%d\n", split_row
);
1925 reset_sched_times (ps
, PS_MIN_CYCLE (ps
));
1926 rotate_partial_schedule (ps
, PS_MIN_CYCLE (ps
));
1928 rows_new
= (ps_insn_ptr
*) xcalloc (new_ii
, sizeof (ps_insn_ptr
));
1929 rows_length_new
= (int *) xcalloc (new_ii
, sizeof (int));
1930 for (row
= 0; row
< split_row
; row
++)
1932 rows_new
[row
] = ps
->rows
[row
];
1933 rows_length_new
[row
] = ps
->rows_length
[row
];
1934 ps
->rows
[row
] = NULL
;
1935 for (crr_insn
= rows_new
[row
];
1936 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1938 ddg_node_ptr u
= crr_insn
->node
;
1939 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
);
1941 SCHED_TIME (u
) = new_time
;
1942 crr_insn
->cycle
= new_time
;
1943 SCHED_ROW (u
) = new_time
% new_ii
;
1944 SCHED_STAGE (u
) = new_time
/ new_ii
;
1949 rows_new
[split_row
] = NULL
;
1951 for (row
= split_row
; row
< ii
; row
++)
1953 rows_new
[row
+ 1] = ps
->rows
[row
];
1954 rows_length_new
[row
+ 1] = ps
->rows_length
[row
];
1955 ps
->rows
[row
] = NULL
;
1956 for (crr_insn
= rows_new
[row
+ 1];
1957 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1959 ddg_node_ptr u
= crr_insn
->node
;
1960 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
) + 1;
1962 SCHED_TIME (u
) = new_time
;
1963 crr_insn
->cycle
= new_time
;
1964 SCHED_ROW (u
) = new_time
% new_ii
;
1965 SCHED_STAGE (u
) = new_time
/ new_ii
;
1970 ps
->min_cycle
= ps
->min_cycle
+ ps
->min_cycle
/ ii
1971 + (SMODULO (ps
->min_cycle
, ii
) >= split_row
? 1 : 0);
1972 ps
->max_cycle
= ps
->max_cycle
+ ps
->max_cycle
/ ii
1973 + (SMODULO (ps
->max_cycle
, ii
) >= split_row
? 1 : 0);
1975 ps
->rows
= rows_new
;
1976 free (ps
->rows_length
);
1977 ps
->rows_length
= rows_length_new
;
1979 gcc_assert (ps
->min_cycle
>= 0);
1981 verify_partial_schedule (ps
, sched_nodes
);
1984 fprintf (dump_file
, "min_cycle=%d, max_cycle=%d\n", ps
->min_cycle
,
1988 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1989 UP which are the boundaries of it's scheduling window; compute using
1990 SCHED_NODES and II a row in the partial schedule that can be split
1991 which will separate a critical predecessor from a critical successor
1992 thereby expanding the window, and return it. */
1994 compute_split_row (sbitmap sched_nodes
, int low
, int up
, int ii
,
1995 ddg_node_ptr u_node
)
1998 int lower
= INT_MIN
, upper
= INT_MAX
;
1999 ddg_node_ptr crit_pred
= NULL
;
2000 ddg_node_ptr crit_succ
= NULL
;
2003 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
2005 ddg_node_ptr v_node
= e
->src
;
2007 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
2008 && (low
== SCHED_TIME (v_node
) + e
->latency
- (e
->distance
* ii
)))
2009 if (SCHED_TIME (v_node
) > lower
)
2012 lower
= SCHED_TIME (v_node
);
2016 if (crit_pred
!= NULL
)
2018 crit_cycle
= SCHED_TIME (crit_pred
) + 1;
2019 return SMODULO (crit_cycle
, ii
);
2022 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
2024 ddg_node_ptr v_node
= e
->dest
;
2025 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
2026 && (up
== SCHED_TIME (v_node
) - e
->latency
+ (e
->distance
* ii
)))
2027 if (SCHED_TIME (v_node
) < upper
)
2030 upper
= SCHED_TIME (v_node
);
2034 if (crit_succ
!= NULL
)
2036 crit_cycle
= SCHED_TIME (crit_succ
);
2037 return SMODULO (crit_cycle
, ii
);
2041 fprintf (dump_file
, "Both crit_pred and crit_succ are NULL\n");
2043 return SMODULO ((low
+ up
+ 1) / 2, ii
);
2047 verify_partial_schedule (partial_schedule_ptr ps
, sbitmap sched_nodes
)
2050 ps_insn_ptr crr_insn
;
2052 for (row
= 0; row
< ps
->ii
; row
++)
2056 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
2058 ddg_node_ptr u
= crr_insn
->node
;
2061 gcc_assert (TEST_BIT (sched_nodes
, u
->cuid
));
2062 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2063 popcount (sched_nodes) == number of insns in ps. */
2064 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
2065 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
2068 gcc_assert (ps
->rows_length
[row
] == length
);
2073 /* This page implements the algorithm for ordering the nodes of a DDG
2074 for modulo scheduling, activated through the
2075 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2077 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2078 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2079 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2080 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2081 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2082 #define DEPTH(x) (ASAP ((x)))
2084 typedef struct node_order_params
* nopa
;
2086 static void order_nodes_of_sccs (ddg_all_sccs_ptr
, int * result
);
2087 static int order_nodes_in_scc (ddg_ptr
, sbitmap
, sbitmap
, int*, int);
2088 static nopa
calculate_order_params (ddg_ptr
, int, int *);
2089 static int find_max_asap (ddg_ptr
, sbitmap
);
2090 static int find_max_hv_min_mob (ddg_ptr
, sbitmap
);
2091 static int find_max_dv_min_mob (ddg_ptr
, sbitmap
);
2093 enum sms_direction
{BOTTOMUP
, TOPDOWN
};
2095 struct node_order_params
2102 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2104 check_nodes_order (int *node_order
, int num_nodes
)
2107 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2112 fprintf (dump_file
, "SMS final nodes order: \n");
2114 for (i
= 0; i
< num_nodes
; i
++)
2116 int u
= node_order
[i
];
2119 fprintf (dump_file
, "%d ", u
);
2120 gcc_assert (u
< num_nodes
&& u
>= 0 && !TEST_BIT (tmp
, u
));
2126 fprintf (dump_file
, "\n");
2131 /* Order the nodes of G for scheduling and pass the result in
2132 NODE_ORDER. Also set aux.count of each node to ASAP.
2133 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2135 sms_order_nodes (ddg_ptr g
, int mii
, int * node_order
, int *pmax_asap
)
2139 ddg_all_sccs_ptr sccs
= create_ddg_all_sccs (g
);
2141 nopa nops
= calculate_order_params (g
, mii
, pmax_asap
);
2144 print_sccs (dump_file
, sccs
, g
);
2146 order_nodes_of_sccs (sccs
, node_order
);
2148 if (sccs
->num_sccs
> 0)
2149 /* First SCC has the largest recurrence_length. */
2150 rec_mii
= sccs
->sccs
[0]->recurrence_length
;
2152 /* Save ASAP before destroying node_order_params. */
2153 for (i
= 0; i
< g
->num_nodes
; i
++)
2155 ddg_node_ptr v
= &g
->nodes
[i
];
2156 v
->aux
.count
= ASAP (v
);
2160 free_ddg_all_sccs (sccs
);
2161 check_nodes_order (node_order
, g
->num_nodes
);
2167 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs
, int * node_order
)
2170 ddg_ptr g
= all_sccs
->ddg
;
2171 int num_nodes
= g
->num_nodes
;
2172 sbitmap prev_sccs
= sbitmap_alloc (num_nodes
);
2173 sbitmap on_path
= sbitmap_alloc (num_nodes
);
2174 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2175 sbitmap ones
= sbitmap_alloc (num_nodes
);
2177 sbitmap_zero (prev_sccs
);
2178 sbitmap_ones (ones
);
2180 /* Perform the node ordering starting from the SCC with the highest recMII.
2181 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2182 for (i
= 0; i
< all_sccs
->num_sccs
; i
++)
2184 ddg_scc_ptr scc
= all_sccs
->sccs
[i
];
2186 /* Add nodes on paths from previous SCCs to the current SCC. */
2187 find_nodes_on_paths (on_path
, g
, prev_sccs
, scc
->nodes
);
2188 sbitmap_a_or_b (tmp
, scc
->nodes
, on_path
);
2190 /* Add nodes on paths from the current SCC to previous SCCs. */
2191 find_nodes_on_paths (on_path
, g
, scc
->nodes
, prev_sccs
);
2192 sbitmap_a_or_b (tmp
, tmp
, on_path
);
2194 /* Remove nodes of previous SCCs from current extended SCC. */
2195 sbitmap_difference (tmp
, tmp
, prev_sccs
);
2197 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2198 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2201 /* Handle the remaining nodes that do not belong to any scc. Each call
2202 to order_nodes_in_scc handles a single connected component. */
2203 while (pos
< g
->num_nodes
)
2205 sbitmap_difference (tmp
, ones
, prev_sccs
);
2206 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2208 sbitmap_free (prev_sccs
);
2209 sbitmap_free (on_path
);
2211 sbitmap_free (ones
);
2214 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2215 static struct node_order_params
*
2216 calculate_order_params (ddg_ptr g
, int mii ATTRIBUTE_UNUSED
, int *pmax_asap
)
2220 int num_nodes
= g
->num_nodes
;
2222 /* Allocate a place to hold ordering params for each node in the DDG. */
2223 nopa node_order_params_arr
;
2225 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2226 node_order_params_arr
= (nopa
) xcalloc (num_nodes
,
2227 sizeof (struct node_order_params
));
2229 /* Set the aux pointer of each node to point to its order_params structure. */
2230 for (u
= 0; u
< num_nodes
; u
++)
2231 g
->nodes
[u
].aux
.info
= &node_order_params_arr
[u
];
2233 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2234 calculate ASAP, ALAP, mobility, distance, and height for each node
2235 in the dependence (direct acyclic) graph. */
2237 /* We assume that the nodes in the array are in topological order. */
2240 for (u
= 0; u
< num_nodes
; u
++)
2242 ddg_node_ptr u_node
= &g
->nodes
[u
];
2245 for (e
= u_node
->in
; e
; e
= e
->next_in
)
2246 if (e
->distance
== 0)
2247 ASAP (u_node
) = MAX (ASAP (u_node
),
2248 ASAP (e
->src
) + e
->latency
);
2249 max_asap
= MAX (max_asap
, ASAP (u_node
));
2252 for (u
= num_nodes
- 1; u
> -1; u
--)
2254 ddg_node_ptr u_node
= &g
->nodes
[u
];
2256 ALAP (u_node
) = max_asap
;
2257 HEIGHT (u_node
) = 0;
2258 for (e
= u_node
->out
; e
; e
= e
->next_out
)
2259 if (e
->distance
== 0)
2261 ALAP (u_node
) = MIN (ALAP (u_node
),
2262 ALAP (e
->dest
) - e
->latency
);
2263 HEIGHT (u_node
) = MAX (HEIGHT (u_node
),
2264 HEIGHT (e
->dest
) + e
->latency
);
2269 fprintf (dump_file
, "\nOrder params\n");
2270 for (u
= 0; u
< num_nodes
; u
++)
2272 ddg_node_ptr u_node
= &g
->nodes
[u
];
2274 fprintf (dump_file
, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u
,
2275 ASAP (u_node
), ALAP (u_node
), HEIGHT (u_node
));
2279 *pmax_asap
= max_asap
;
2280 return node_order_params_arr
;
2284 find_max_asap (ddg_ptr g
, sbitmap nodes
)
2289 sbitmap_iterator sbi
;
2291 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2293 ddg_node_ptr u_node
= &g
->nodes
[u
];
2295 if (max_asap
< ASAP (u_node
))
2297 max_asap
= ASAP (u_node
);
2305 find_max_hv_min_mob (ddg_ptr g
, sbitmap nodes
)
2309 int min_mob
= INT_MAX
;
2311 sbitmap_iterator sbi
;
2313 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2315 ddg_node_ptr u_node
= &g
->nodes
[u
];
2317 if (max_hv
< HEIGHT (u_node
))
2319 max_hv
= HEIGHT (u_node
);
2320 min_mob
= MOB (u_node
);
2323 else if ((max_hv
== HEIGHT (u_node
))
2324 && (min_mob
> MOB (u_node
)))
2326 min_mob
= MOB (u_node
);
2334 find_max_dv_min_mob (ddg_ptr g
, sbitmap nodes
)
2338 int min_mob
= INT_MAX
;
2340 sbitmap_iterator sbi
;
2342 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2344 ddg_node_ptr u_node
= &g
->nodes
[u
];
2346 if (max_dv
< DEPTH (u_node
))
2348 max_dv
= DEPTH (u_node
);
2349 min_mob
= MOB (u_node
);
2352 else if ((max_dv
== DEPTH (u_node
))
2353 && (min_mob
> MOB (u_node
)))
2355 min_mob
= MOB (u_node
);
2362 /* Places the nodes of SCC into the NODE_ORDER array starting
2363 at position POS, according to the SMS ordering algorithm.
2364 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2365 the NODE_ORDER array, starting from position zero. */
2367 order_nodes_in_scc (ddg_ptr g
, sbitmap nodes_ordered
, sbitmap scc
,
2368 int * node_order
, int pos
)
2370 enum sms_direction dir
;
2371 int num_nodes
= g
->num_nodes
;
2372 sbitmap workset
= sbitmap_alloc (num_nodes
);
2373 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2374 sbitmap zero_bitmap
= sbitmap_alloc (num_nodes
);
2375 sbitmap predecessors
= sbitmap_alloc (num_nodes
);
2376 sbitmap successors
= sbitmap_alloc (num_nodes
);
2378 sbitmap_zero (predecessors
);
2379 find_predecessors (predecessors
, g
, nodes_ordered
);
2381 sbitmap_zero (successors
);
2382 find_successors (successors
, g
, nodes_ordered
);
2385 if (sbitmap_a_and_b_cg (tmp
, predecessors
, scc
))
2387 sbitmap_copy (workset
, tmp
);
2390 else if (sbitmap_a_and_b_cg (tmp
, successors
, scc
))
2392 sbitmap_copy (workset
, tmp
);
2399 sbitmap_zero (workset
);
2400 if ((u
= find_max_asap (g
, scc
)) >= 0)
2401 SET_BIT (workset
, u
);
2405 sbitmap_zero (zero_bitmap
);
2406 while (!sbitmap_equal (workset
, zero_bitmap
))
2409 ddg_node_ptr v_node
;
2410 sbitmap v_node_preds
;
2411 sbitmap v_node_succs
;
2415 while (!sbitmap_equal (workset
, zero_bitmap
))
2417 v
= find_max_hv_min_mob (g
, workset
);
2418 v_node
= &g
->nodes
[v
];
2419 node_order
[pos
++] = v
;
2420 v_node_succs
= NODE_SUCCESSORS (v_node
);
2421 sbitmap_a_and_b (tmp
, v_node_succs
, scc
);
2423 /* Don't consider the already ordered successors again. */
2424 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2425 sbitmap_a_or_b (workset
, workset
, tmp
);
2426 RESET_BIT (workset
, v
);
2427 SET_BIT (nodes_ordered
, v
);
2430 sbitmap_zero (predecessors
);
2431 find_predecessors (predecessors
, g
, nodes_ordered
);
2432 sbitmap_a_and_b (workset
, predecessors
, scc
);
2436 while (!sbitmap_equal (workset
, zero_bitmap
))
2438 v
= find_max_dv_min_mob (g
, workset
);
2439 v_node
= &g
->nodes
[v
];
2440 node_order
[pos
++] = v
;
2441 v_node_preds
= NODE_PREDECESSORS (v_node
);
2442 sbitmap_a_and_b (tmp
, v_node_preds
, scc
);
2444 /* Don't consider the already ordered predecessors again. */
2445 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2446 sbitmap_a_or_b (workset
, workset
, tmp
);
2447 RESET_BIT (workset
, v
);
2448 SET_BIT (nodes_ordered
, v
);
2451 sbitmap_zero (successors
);
2452 find_successors (successors
, g
, nodes_ordered
);
2453 sbitmap_a_and_b (workset
, successors
, scc
);
2457 sbitmap_free (workset
);
2458 sbitmap_free (zero_bitmap
);
2459 sbitmap_free (predecessors
);
2460 sbitmap_free (successors
);
2465 /* This page contains functions for manipulating partial-schedules during
2466 modulo scheduling. */
2468 /* Create a partial schedule and allocate a memory to hold II rows. */
2470 static partial_schedule_ptr
2471 create_partial_schedule (int ii
, ddg_ptr g
, int history
)
2473 partial_schedule_ptr ps
= XNEW (struct partial_schedule
);
2474 ps
->rows
= (ps_insn_ptr
*) xcalloc (ii
, sizeof (ps_insn_ptr
));
2475 ps
->rows_length
= (int *) xcalloc (ii
, sizeof (int));
2477 ps
->history
= history
;
2478 ps
->min_cycle
= INT_MAX
;
2479 ps
->max_cycle
= INT_MIN
;
2485 /* Free the PS_INSNs in rows array of the given partial schedule.
2486 ??? Consider caching the PS_INSN's. */
2488 free_ps_insns (partial_schedule_ptr ps
)
2492 for (i
= 0; i
< ps
->ii
; i
++)
2496 ps_insn_ptr ps_insn
= ps
->rows
[i
]->next_in_row
;
2499 ps
->rows
[i
] = ps_insn
;
2505 /* Free all the memory allocated to the partial schedule. */
2508 free_partial_schedule (partial_schedule_ptr ps
)
2514 free (ps
->rows_length
);
2518 /* Clear the rows array with its PS_INSNs, and create a new one with
2522 reset_partial_schedule (partial_schedule_ptr ps
, int new_ii
)
2527 if (new_ii
== ps
->ii
)
2529 ps
->rows
= (ps_insn_ptr
*) xrealloc (ps
->rows
, new_ii
2530 * sizeof (ps_insn_ptr
));
2531 memset (ps
->rows
, 0, new_ii
* sizeof (ps_insn_ptr
));
2532 ps
->rows_length
= (int *) xrealloc (ps
->rows_length
, new_ii
* sizeof (int));
2533 memset (ps
->rows_length
, 0, new_ii
* sizeof (int));
2535 ps
->min_cycle
= INT_MAX
;
2536 ps
->max_cycle
= INT_MIN
;
2539 /* Prints the partial schedule as an ii rows array, for each rows
2540 print the ids of the insns in it. */
2542 print_partial_schedule (partial_schedule_ptr ps
, FILE *dump
)
2546 for (i
= 0; i
< ps
->ii
; i
++)
2548 ps_insn_ptr ps_i
= ps
->rows
[i
];
2550 fprintf (dump
, "\n[ROW %d ]: ", i
);
2553 fprintf (dump
, "%d, ",
2554 INSN_UID (ps_i
->node
->insn
));
2555 ps_i
= ps_i
->next_in_row
;
2560 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2562 create_ps_insn (ddg_node_ptr node
, int cycle
)
2564 ps_insn_ptr ps_i
= XNEW (struct ps_insn
);
2567 ps_i
->next_in_row
= NULL
;
2568 ps_i
->prev_in_row
= NULL
;
2569 ps_i
->cycle
= cycle
;
2575 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2576 node is not found in the partial schedule, else returns true. */
2578 remove_node_from_ps (partial_schedule_ptr ps
, ps_insn_ptr ps_i
)
2585 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2586 if (! ps_i
->prev_in_row
)
2588 if (ps_i
!= ps
->rows
[row
])
2591 ps
->rows
[row
] = ps_i
->next_in_row
;
2593 ps
->rows
[row
]->prev_in_row
= NULL
;
2597 ps_i
->prev_in_row
->next_in_row
= ps_i
->next_in_row
;
2598 if (ps_i
->next_in_row
)
2599 ps_i
->next_in_row
->prev_in_row
= ps_i
->prev_in_row
;
2602 ps
->rows_length
[row
] -= 1;
2607 /* Unlike what literature describes for modulo scheduling (which focuses
2608 on VLIW machines) the order of the instructions inside a cycle is
2609 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2610 where the current instruction should go relative to the already
2611 scheduled instructions in the given cycle. Go over these
2612 instructions and find the first possible column to put it in. */
2614 ps_insn_find_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2615 sbitmap must_precede
, sbitmap must_follow
)
2617 ps_insn_ptr next_ps_i
;
2618 ps_insn_ptr first_must_follow
= NULL
;
2619 ps_insn_ptr last_must_precede
= NULL
;
2620 ps_insn_ptr last_in_row
= NULL
;
2626 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2628 /* Find the first must follow and the last must precede
2629 and insert the node immediately after the must precede
2630 but make sure that it there is no must follow after it. */
2631 for (next_ps_i
= ps
->rows
[row
];
2633 next_ps_i
= next_ps_i
->next_in_row
)
2635 if (must_follow
&& TEST_BIT (must_follow
, next_ps_i
->node
->cuid
)
2636 && ! first_must_follow
)
2637 first_must_follow
= next_ps_i
;
2638 if (must_precede
&& TEST_BIT (must_precede
, next_ps_i
->node
->cuid
))
2640 /* If we have already met a node that must follow, then
2641 there is no possible column. */
2642 if (first_must_follow
)
2645 last_must_precede
= next_ps_i
;
2647 /* The closing branch must be the last in the row. */
2649 && TEST_BIT (must_precede
, next_ps_i
->node
->cuid
)
2650 && JUMP_P (next_ps_i
->node
->insn
))
2653 last_in_row
= next_ps_i
;
2656 /* The closing branch is scheduled as well. Make sure there is no
2657 dependent instruction after it as the branch should be the last
2658 instruction in the row. */
2659 if (JUMP_P (ps_i
->node
->insn
))
2661 if (first_must_follow
)
2665 /* Make the branch the last in the row. New instructions
2666 will be inserted at the beginning of the row or after the
2667 last must_precede instruction thus the branch is guaranteed
2668 to remain the last instruction in the row. */
2669 last_in_row
->next_in_row
= ps_i
;
2670 ps_i
->prev_in_row
= last_in_row
;
2671 ps_i
->next_in_row
= NULL
;
2674 ps
->rows
[row
] = ps_i
;
2678 /* Now insert the node after INSERT_AFTER_PSI. */
2680 if (! last_must_precede
)
2682 ps_i
->next_in_row
= ps
->rows
[row
];
2683 ps_i
->prev_in_row
= NULL
;
2684 if (ps_i
->next_in_row
)
2685 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2686 ps
->rows
[row
] = ps_i
;
2690 ps_i
->next_in_row
= last_must_precede
->next_in_row
;
2691 last_must_precede
->next_in_row
= ps_i
;
2692 ps_i
->prev_in_row
= last_must_precede
;
2693 if (ps_i
->next_in_row
)
2694 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2700 /* Advances the PS_INSN one column in its current row; returns false
2701 in failure and true in success. Bit N is set in MUST_FOLLOW if
2702 the node with cuid N must be come after the node pointed to by
2703 PS_I when scheduled in the same cycle. */
2705 ps_insn_advance_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2706 sbitmap must_follow
)
2708 ps_insn_ptr prev
, next
;
2710 ddg_node_ptr next_node
;
2715 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2717 if (! ps_i
->next_in_row
)
2720 next_node
= ps_i
->next_in_row
->node
;
2722 /* Check if next_in_row is dependent on ps_i, both having same sched
2723 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2724 if (must_follow
&& TEST_BIT (must_follow
, next_node
->cuid
))
2727 /* Advance PS_I over its next_in_row in the doubly linked list. */
2728 prev
= ps_i
->prev_in_row
;
2729 next
= ps_i
->next_in_row
;
2731 if (ps_i
== ps
->rows
[row
])
2732 ps
->rows
[row
] = next
;
2734 ps_i
->next_in_row
= next
->next_in_row
;
2736 if (next
->next_in_row
)
2737 next
->next_in_row
->prev_in_row
= ps_i
;
2739 next
->next_in_row
= ps_i
;
2740 ps_i
->prev_in_row
= next
;
2742 next
->prev_in_row
= prev
;
2744 prev
->next_in_row
= next
;
2749 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2750 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2751 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2752 before/after (respectively) the node pointed to by PS_I when scheduled
2753 in the same cycle. */
2755 add_node_to_ps (partial_schedule_ptr ps
, ddg_node_ptr node
, int cycle
,
2756 sbitmap must_precede
, sbitmap must_follow
)
2759 int row
= SMODULO (cycle
, ps
->ii
);
2761 if (ps
->rows_length
[row
] >= issue_rate
)
2764 ps_i
= create_ps_insn (node
, cycle
);
2766 /* Finds and inserts PS_I according to MUST_FOLLOW and
2768 if (! ps_insn_find_column (ps
, ps_i
, must_precede
, must_follow
))
2774 ps
->rows_length
[row
] += 1;
2778 /* Advance time one cycle. Assumes DFA is being used. */
2780 advance_one_cycle (void)
2782 if (targetm
.sched
.dfa_pre_cycle_insn
)
2783 state_transition (curr_state
,
2784 targetm
.sched
.dfa_pre_cycle_insn ());
2786 state_transition (curr_state
, NULL
);
2788 if (targetm
.sched
.dfa_post_cycle_insn
)
2789 state_transition (curr_state
,
2790 targetm
.sched
.dfa_post_cycle_insn ());
2795 /* Checks if PS has resource conflicts according to DFA, starting from
2796 FROM cycle to TO cycle; returns true if there are conflicts and false
2797 if there are no conflicts. Assumes DFA is being used. */
2799 ps_has_conflicts (partial_schedule_ptr ps
, int from
, int to
)
2803 state_reset (curr_state
);
2805 for (cycle
= from
; cycle
<= to
; cycle
++)
2807 ps_insn_ptr crr_insn
;
2808 /* Holds the remaining issue slots in the current row. */
2809 int can_issue_more
= issue_rate
;
2811 /* Walk through the DFA for the current row. */
2812 for (crr_insn
= ps
->rows
[SMODULO (cycle
, ps
->ii
)];
2814 crr_insn
= crr_insn
->next_in_row
)
2816 rtx insn
= crr_insn
->node
->insn
;
2818 if (!NONDEBUG_INSN_P (insn
))
2821 /* Check if there is room for the current insn. */
2822 if (!can_issue_more
|| state_dead_lock_p (curr_state
))
2825 /* Update the DFA state and return with failure if the DFA found
2826 resource conflicts. */
2827 if (state_transition (curr_state
, insn
) >= 0)
2830 if (targetm
.sched
.variable_issue
)
2832 targetm
.sched
.variable_issue (sched_dump
, sched_verbose
,
2833 insn
, can_issue_more
);
2834 /* A naked CLOBBER or USE generates no instruction, so don't
2835 let them consume issue slots. */
2836 else if (GET_CODE (PATTERN (insn
)) != USE
2837 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
2841 /* Advance the DFA to the next cycle. */
2842 advance_one_cycle ();
2847 /* Checks if the given node causes resource conflicts when added to PS at
2848 cycle C. If not the node is added to PS and returned; otherwise zero
2849 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2850 cuid N must be come before/after (respectively) the node pointed to by
2851 PS_I when scheduled in the same cycle. */
2853 ps_add_node_check_conflicts (partial_schedule_ptr ps
, ddg_node_ptr n
,
2854 int c
, sbitmap must_precede
,
2855 sbitmap must_follow
)
2857 int has_conflicts
= 0;
2860 /* First add the node to the PS, if this succeeds check for
2861 conflicts, trying different issue slots in the same row. */
2862 if (! (ps_i
= add_node_to_ps (ps
, n
, c
, must_precede
, must_follow
)))
2863 return NULL
; /* Failed to insert the node at the given cycle. */
2865 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2867 && ps_has_conflicts (ps
,
2871 /* Try different issue slots to find one that the given node can be
2872 scheduled in without conflicts. */
2873 while (has_conflicts
)
2875 if (! ps_insn_advance_column (ps
, ps_i
, must_follow
))
2877 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2879 && ps_has_conflicts (ps
,
2886 remove_node_from_ps (ps
, ps_i
);
2890 ps
->min_cycle
= MIN (ps
->min_cycle
, c
);
2891 ps
->max_cycle
= MAX (ps
->max_cycle
, c
);
2895 /* Calculate the stage count of the partial schedule PS. The calculation
2896 takes into account the rotation to bring the closing branch to row
2899 calculate_stage_count (partial_schedule_ptr ps
)
2901 int rotation_amount
= (SCHED_TIME (ps
->g
->closing_branch
)) + 1;
2902 int new_min_cycle
= PS_MIN_CYCLE (ps
) - rotation_amount
;
2903 int new_max_cycle
= PS_MAX_CYCLE (ps
) - rotation_amount
;
2904 int stage_count
= CALC_STAGE_COUNT (-1, new_min_cycle
, ps
->ii
);
2906 /* The calculation of stage count is done adding the number of stages
2907 before cycle zero and after cycle zero. */
2908 stage_count
+= CALC_STAGE_COUNT (new_max_cycle
, 0, ps
->ii
);
2913 /* Rotate the rows of PS such that insns scheduled at time
2914 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2916 rotate_partial_schedule (partial_schedule_ptr ps
, int start_cycle
)
2918 int i
, row
, backward_rotates
;
2919 int last_row
= ps
->ii
- 1;
2921 if (start_cycle
== 0)
2924 backward_rotates
= SMODULO (start_cycle
, ps
->ii
);
2926 /* Revisit later and optimize this into a single loop. */
2927 for (i
= 0; i
< backward_rotates
; i
++)
2929 ps_insn_ptr first_row
= ps
->rows
[0];
2930 int first_row_length
= ps
->rows_length
[0];
2932 for (row
= 0; row
< last_row
; row
++)
2934 ps
->rows
[row
] = ps
->rows
[row
+ 1];
2935 ps
->rows_length
[row
] = ps
->rows_length
[row
+ 1];
2938 ps
->rows
[last_row
] = first_row
;
2939 ps
->rows_length
[last_row
] = first_row_length
;
2942 ps
->max_cycle
-= start_cycle
;
2943 ps
->min_cycle
-= start_cycle
;
2946 #endif /* INSN_SCHEDULING */
2949 gate_handle_sms (void)
2951 return (optimize
> 0 && flag_modulo_sched
);
2955 /* Run instruction scheduler. */
2956 /* Perform SMS module scheduling. */
2958 rest_of_handle_sms (void)
2960 #ifdef INSN_SCHEDULING
2963 /* Collect loop information to be used in SMS. */
2964 cfg_layout_initialize (0);
2967 /* Update the life information, because we add pseudos. */
2968 max_regno
= max_reg_num ();
2970 /* Finalize layout changes. */
2972 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2973 bb
->aux
= bb
->next_bb
;
2974 free_dominance_info (CDI_DOMINATORS
);
2975 cfg_layout_finalize ();
2976 #endif /* INSN_SCHEDULING */
2980 struct rtl_opt_pass pass_sms
=
2985 gate_handle_sms
, /* gate */
2986 rest_of_handle_sms
, /* execute */
2989 0, /* static_pass_number */
2991 0, /* properties_required */
2992 0, /* properties_provided */
2993 0, /* properties_destroyed */
2994 0, /* todo_flags_start */
2997 | TODO_verify_rtl_sharing
2998 | TODO_ggc_collect
/* todo_flags_finish */