1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
39 #include "sched-int.h"
41 #include "cfglayout.h"
49 #include "tree-pass.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
88 /* This page defines partial-schedule structures and functions for
91 typedef struct partial_schedule
*partial_schedule_ptr
;
92 typedef struct ps_insn
*ps_insn_ptr
;
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
108 /* A single instruction in the partial schedule. */
111 /* The corresponding DDG_NODE. */
114 /* The (absolute) cycle in which the PS instruction is scheduled.
115 Same as SCHED_TIME (node). */
118 /* The next/prev PS_INSN in the same row. */
119 ps_insn_ptr next_in_row
,
122 /* The number of nodes in the same row that come after this node. */
126 /* Holds the partial schedule as an array of II rows. Each entry of the
127 array points to a linked list of PS_INSNs, which represents the
128 instructions that are scheduled for that row. */
129 struct partial_schedule
131 int ii
; /* Number of rows in the partial schedule. */
132 int history
; /* Threshold for conflict checking using DFA. */
134 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 /* The earliest absolute cycle of an insn in the partial schedule. */
140 /* The latest absolute cycle of an insn in the partial schedule. */
143 ddg_ptr g
; /* The DDG of the insns in the partial schedule. */
146 /* We use this to record all the register replacements we do in
147 the kernel so we can undo SMS if it is not profitable. */
148 struct undo_replace_buff_elem
153 struct undo_replace_buff_elem
*next
;
158 static partial_schedule_ptr
create_partial_schedule (int ii
, ddg_ptr
, int history
);
159 static void free_partial_schedule (partial_schedule_ptr
);
160 static void reset_partial_schedule (partial_schedule_ptr
, int new_ii
);
161 void print_partial_schedule (partial_schedule_ptr
, FILE *);
162 static ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr
,
163 ddg_node_ptr node
, int cycle
,
164 sbitmap must_precede
,
165 sbitmap must_follow
);
166 static void rotate_partial_schedule (partial_schedule_ptr
, int);
167 void set_row_column_for_ps (partial_schedule_ptr
);
168 static bool ps_unschedule_node (partial_schedule_ptr
, ddg_node_ptr
);
171 /* This page defines constants and structures for the modulo scheduling
174 /* As in haifa-sched.c: */
175 /* issue_rate is the number of insns that can be scheduled in the same
176 machine cycle. It can be defined in the config/mach/mach.h file,
177 otherwise we set it to 1. */
179 static int issue_rate
;
181 static int sms_order_nodes (ddg_ptr
, int, int * result
);
182 static void set_node_sched_params (ddg_ptr
);
183 static partial_schedule_ptr
sms_schedule_by_order (ddg_ptr
, int, int, int *);
184 static void permute_partial_schedule (partial_schedule_ptr ps
, rtx last
);
185 static void generate_prolog_epilog (partial_schedule_ptr
,struct loop
* loop
, rtx
);
186 static void duplicate_insns_of_cycles (partial_schedule_ptr ps
,
187 int from_stage
, int to_stage
,
190 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
191 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
192 #define SCHED_FIRST_REG_MOVE(x) \
193 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
194 #define SCHED_NREG_MOVES(x) \
195 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
196 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
197 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
198 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
200 /* The scheduling parameters held for each node. */
201 typedef struct node_sched_params
203 int asap
; /* A lower-bound on the absolute scheduling cycle. */
204 int time
; /* The absolute scheduling cycle (time >= asap). */
206 /* The following field (first_reg_move) is a pointer to the first
207 register-move instruction added to handle the modulo-variable-expansion
208 of the register defined by this node. This register-move copies the
209 original register defined by the node. */
212 /* The number of register-move instructions added, immediately preceding
216 int row
; /* Holds time % ii. */
217 int stage
; /* Holds time / ii. */
219 /* The column of a node inside the ps. If nodes u, v are on the same row,
220 u will precede v if column (u) < column (v). */
222 } *node_sched_params_ptr
;
225 /* The following three functions are copied from the current scheduler
226 code in order to use sched_analyze() for computing the dependencies.
227 They are used when initializing the sched_info structure. */
229 sms_print_insn (rtx insn
, int aligned ATTRIBUTE_UNUSED
)
233 sprintf (tmp
, "i%4d", INSN_UID (insn
));
238 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED
,
239 regset cond_exec ATTRIBUTE_UNUSED
,
240 regset used ATTRIBUTE_UNUSED
,
241 regset set ATTRIBUTE_UNUSED
)
245 static struct sched_info sms_sched_info
=
254 compute_jump_reg_dependencies
,
259 NULL
, NULL
, NULL
, NULL
, NULL
,
264 /* Return the register decremented and tested in INSN,
265 or zero if it is not a decrement-and-branch insn. */
268 doloop_register_get (rtx insn ATTRIBUTE_UNUSED
)
270 #ifdef HAVE_doloop_end
271 rtx pattern
, reg
, condition
;
276 pattern
= PATTERN (insn
);
277 condition
= doloop_condition_get (pattern
);
281 if (REG_P (XEXP (condition
, 0)))
282 reg
= XEXP (condition
, 0);
283 else if (GET_CODE (XEXP (condition
, 0)) == PLUS
284 && REG_P (XEXP (XEXP (condition
, 0), 0)))
285 reg
= XEXP (XEXP (condition
, 0), 0);
295 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
296 that the number of iterations is a compile-time constant. If so,
297 return the rtx that sets COUNT_REG to a constant, and set COUNT to
298 this constant. Otherwise return 0. */
300 const_iteration_count (rtx count_reg
, basic_block pre_header
,
301 HOST_WIDEST_INT
* count
)
309 get_ebb_head_tail (pre_header
, pre_header
, &head
, &tail
);
311 for (insn
= tail
; insn
!= PREV_INSN (head
); insn
= PREV_INSN (insn
))
312 if (INSN_P (insn
) && single_set (insn
) &&
313 rtx_equal_p (count_reg
, SET_DEST (single_set (insn
))))
315 rtx pat
= single_set (insn
);
317 if (GET_CODE (SET_SRC (pat
)) == CONST_INT
)
319 *count
= INTVAL (SET_SRC (pat
));
329 /* A very simple resource-based lower bound on the initiation interval.
330 ??? Improve the accuracy of this bound by considering the
331 utilization of various units. */
335 return (g
->num_nodes
/ issue_rate
);
339 /* Points to the array that contains the sched data for each node. */
340 static node_sched_params_ptr node_sched_params
;
342 /* Allocate sched_params for each node and initialize it. Assumes that
343 the aux field of each node contain the asap bound (computed earlier),
344 and copies it into the sched_params field. */
346 set_node_sched_params (ddg_ptr g
)
350 /* Allocate for each node in the DDG a place to hold the "sched_data". */
351 /* Initialize ASAP/ALAP/HIGHT to zero. */
352 node_sched_params
= (node_sched_params_ptr
)
353 xcalloc (g
->num_nodes
,
354 sizeof (struct node_sched_params
));
356 /* Set the pointer of the general data of the node to point to the
357 appropriate sched_params structure. */
358 for (i
= 0; i
< g
->num_nodes
; i
++)
360 /* Watch out for aliasing problems? */
361 node_sched_params
[i
].asap
= g
->nodes
[i
].aux
.count
;
362 g
->nodes
[i
].aux
.info
= &node_sched_params
[i
];
367 print_node_sched_params (FILE *file
, int num_nodes
, ddg_ptr g
)
373 for (i
= 0; i
< num_nodes
; i
++)
375 node_sched_params_ptr nsp
= &node_sched_params
[i
];
376 rtx reg_move
= nsp
->first_reg_move
;
379 fprintf (file
, "Node = %d; INSN = %d\n", i
,
380 (INSN_UID (g
->nodes
[i
].insn
)));
381 fprintf (file
, " asap = %d:\n", nsp
->asap
);
382 fprintf (file
, " time = %d:\n", nsp
->time
);
383 fprintf (file
, " nreg_moves = %d:\n", nsp
->nreg_moves
);
384 for (j
= 0; j
< nsp
->nreg_moves
; j
++)
386 fprintf (file
, " reg_move = ");
387 print_rtl_single (file
, reg_move
);
388 reg_move
= PREV_INSN (reg_move
);
394 Breaking intra-loop register anti-dependences:
395 Each intra-loop register anti-dependence implies a cross-iteration true
396 dependence of distance 1. Therefore, we can remove such false dependencies
397 and figure out if the partial schedule broke them by checking if (for a
398 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
399 if so generate a register move. The number of such moves is equal to:
400 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
401 nreg_moves = ----------------------------------- + 1 - { dependence.
404 static struct undo_replace_buff_elem
*
405 generate_reg_moves (partial_schedule_ptr ps
, bool rescan
)
410 struct undo_replace_buff_elem
*reg_move_replaces
= NULL
;
412 for (i
= 0; i
< g
->num_nodes
; i
++)
414 ddg_node_ptr u
= &g
->nodes
[i
];
416 int nreg_moves
= 0, i_reg_move
;
417 sbitmap
*uses_of_defs
;
419 rtx prev_reg
, old_reg
;
421 /* Compute the number of reg_moves needed for u, by looking at life
422 ranges started at u (excluding self-loops). */
423 for (e
= u
->out
; e
; e
= e
->next_out
)
424 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
426 int nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
428 if (e
->distance
== 1)
429 nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
431 /* If dest precedes src in the schedule of the kernel, then dest
432 will read before src writes and we can save one reg_copy. */
433 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
434 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
437 nreg_moves
= MAX (nreg_moves
, nreg_moves4e
);
443 /* Every use of the register defined by node may require a different
444 copy of this register, depending on the time the use is scheduled.
445 Set a bitmap vector, telling which nodes use each copy of this
447 uses_of_defs
= sbitmap_vector_alloc (nreg_moves
, g
->num_nodes
);
448 sbitmap_vector_zero (uses_of_defs
, nreg_moves
);
449 for (e
= u
->out
; e
; e
= e
->next_out
)
450 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
452 int dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
454 if (e
->distance
== 1)
455 dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
457 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
458 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
462 SET_BIT (uses_of_defs
[dest_copy
- 1], e
->dest
->cuid
);
465 /* Now generate the reg_moves, attaching relevant uses to them. */
466 SCHED_NREG_MOVES (u
) = nreg_moves
;
467 old_reg
= prev_reg
= copy_rtx (SET_DEST (single_set (u
->insn
)));
468 last_reg_move
= u
->insn
;
470 for (i_reg_move
= 0; i_reg_move
< nreg_moves
; i_reg_move
++)
472 unsigned int i_use
= 0;
473 rtx new_reg
= gen_reg_rtx (GET_MODE (prev_reg
));
474 rtx reg_move
= gen_move_insn (new_reg
, prev_reg
);
475 sbitmap_iterator sbi
;
477 add_insn_before (reg_move
, last_reg_move
, NULL
);
478 last_reg_move
= reg_move
;
480 if (!SCHED_FIRST_REG_MOVE (u
))
481 SCHED_FIRST_REG_MOVE (u
) = reg_move
;
483 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs
[i_reg_move
], 0, i_use
, sbi
)
485 struct undo_replace_buff_elem
*rep
;
487 rep
= (struct undo_replace_buff_elem
*)
488 xcalloc (1, sizeof (struct undo_replace_buff_elem
));
489 rep
->insn
= g
->nodes
[i_use
].insn
;
490 rep
->orig_reg
= old_reg
;
491 rep
->new_reg
= new_reg
;
493 if (! reg_move_replaces
)
494 reg_move_replaces
= rep
;
497 rep
->next
= reg_move_replaces
;
498 reg_move_replaces
= rep
;
501 replace_rtx (g
->nodes
[i_use
].insn
, old_reg
, new_reg
);
503 df_insn_rescan (g
->nodes
[i_use
].insn
);
508 sbitmap_vector_free (uses_of_defs
);
510 return reg_move_replaces
;
513 /* Free memory allocated for the undo buffer. */
515 free_undo_replace_buff (struct undo_replace_buff_elem
*reg_move_replaces
)
518 while (reg_move_replaces
)
520 struct undo_replace_buff_elem
*rep
= reg_move_replaces
;
522 reg_move_replaces
= reg_move_replaces
->next
;
527 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
528 of SCHED_ROW and SCHED_STAGE. */
530 normalize_sched_times (partial_schedule_ptr ps
)
534 int amount
= PS_MIN_CYCLE (ps
);
537 /* Don't include the closing branch assuming that it is the last node. */
538 for (i
= 0; i
< g
->num_nodes
- 1; i
++)
540 ddg_node_ptr u
= &g
->nodes
[i
];
541 int normalized_time
= SCHED_TIME (u
) - amount
;
543 gcc_assert (normalized_time
>= 0);
545 SCHED_TIME (u
) = normalized_time
;
546 SCHED_ROW (u
) = normalized_time
% ii
;
547 SCHED_STAGE (u
) = normalized_time
/ ii
;
551 /* Set SCHED_COLUMN of each node according to its position in PS. */
553 set_columns_for_ps (partial_schedule_ptr ps
)
557 for (row
= 0; row
< ps
->ii
; row
++)
559 ps_insn_ptr cur_insn
= ps
->rows
[row
];
562 for (; cur_insn
; cur_insn
= cur_insn
->next_in_row
)
563 SCHED_COLUMN (cur_insn
->node
) = column
++;
567 /* Permute the insns according to their order in PS, from row 0 to
568 row ii-1, and position them right before LAST. This schedules
569 the insns of the loop kernel. */
571 permute_partial_schedule (partial_schedule_ptr ps
, rtx last
)
577 for (row
= 0; row
< ii
; row
++)
578 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
579 if (PREV_INSN (last
) != ps_ij
->node
->insn
)
580 reorder_insns_nobb (ps_ij
->node
->first_note
, ps_ij
->node
->insn
,
585 duplicate_insns_of_cycles (partial_schedule_ptr ps
, int from_stage
,
586 int to_stage
, int for_prolog
)
591 for (row
= 0; row
< ps
->ii
; row
++)
592 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
594 ddg_node_ptr u_node
= ps_ij
->node
;
596 rtx reg_move
= NULL_RTX
;
600 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
601 number of reg_moves starting with the second occurrence of
602 u_node, which is generated if its SCHED_STAGE <= to_stage. */
603 i_reg_moves
= to_stage
- SCHED_STAGE (u_node
) + 1;
604 i_reg_moves
= MAX (i_reg_moves
, 0);
605 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
607 /* The reg_moves start from the *first* reg_move backwards. */
610 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
611 for (j
= 1; j
< i_reg_moves
; j
++)
612 reg_move
= PREV_INSN (reg_move
);
615 else /* It's for the epilog. */
617 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
618 starting to decrease one stage after u_node no longer occurs;
619 that is, generate all reg_moves until
620 SCHED_STAGE (u_node) == from_stage - 1. */
621 i_reg_moves
= SCHED_NREG_MOVES (u_node
)
622 - (from_stage
- SCHED_STAGE (u_node
) - 1);
623 i_reg_moves
= MAX (i_reg_moves
, 0);
624 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
626 /* The reg_moves start from the *last* reg_move forwards. */
629 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
630 for (j
= 1; j
< SCHED_NREG_MOVES (u_node
); j
++)
631 reg_move
= PREV_INSN (reg_move
);
635 for (j
= 0; j
< i_reg_moves
; j
++, reg_move
= NEXT_INSN (reg_move
))
636 emit_insn (copy_rtx (PATTERN (reg_move
)));
637 if (SCHED_STAGE (u_node
) >= from_stage
638 && SCHED_STAGE (u_node
) <= to_stage
)
639 duplicate_insn_chain (u_node
->first_note
, u_node
->insn
);
644 /* Generate the instructions (including reg_moves) for prolog & epilog. */
646 generate_prolog_epilog (partial_schedule_ptr ps
, struct loop
* loop
, rtx count_reg
)
649 int last_stage
= PS_STAGE_COUNT (ps
) - 1;
652 /* Generate the prolog, inserting its insns on the loop-entry edge. */
656 /* Generate a subtract instruction at the beginning of the prolog to
657 adjust the loop count by STAGE_COUNT. */
658 emit_insn (gen_sub2_insn (count_reg
, GEN_INT (last_stage
)));
660 for (i
= 0; i
< last_stage
; i
++)
661 duplicate_insns_of_cycles (ps
, 0, i
, 1);
663 /* Put the prolog on the entry edge. */
664 e
= loop_preheader_edge (loop
);
665 split_edge_and_insert (e
, get_insns ());
669 /* Generate the epilog, inserting its insns on the loop-exit edge. */
672 for (i
= 0; i
< last_stage
; i
++)
673 duplicate_insns_of_cycles (ps
, i
+ 1, last_stage
, 0);
675 /* Put the epilogue on the exit edge. */
676 gcc_assert (single_exit (loop
));
677 e
= single_exit (loop
);
678 split_edge_and_insert (e
, get_insns ());
682 /* Return true if all the BBs of the loop are empty except the
685 loop_single_full_bb_p (struct loop
*loop
)
688 basic_block
*bbs
= get_loop_body (loop
);
690 for (i
= 0; i
< loop
->num_nodes
; i
++)
693 bool empty_bb
= true;
695 if (bbs
[i
] == loop
->header
)
698 /* Make sure that basic blocks other than the header
699 have only notes labels or jumps. */
700 get_ebb_head_tail (bbs
[i
], bbs
[i
], &head
, &tail
);
701 for (; head
!= NEXT_INSN (tail
); head
= NEXT_INSN (head
))
703 if (NOTE_P (head
) || LABEL_P (head
)
704 || (INSN_P (head
) && JUMP_P (head
)))
720 /* A simple loop from SMS point of view; it is a loop that is composed of
721 either a single basic block or two BBs - a header and a latch. */
722 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
723 && (EDGE_COUNT (loop->latch->preds) == 1) \
724 && (EDGE_COUNT (loop->latch->succs) == 1))
726 /* Return true if the loop is in its canonical form and false if not.
727 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
729 loop_canon_p (struct loop
*loop
)
732 if (loop
->inner
|| !loop_outer (loop
))
735 if (!single_exit (loop
))
739 rtx insn
= BB_END (loop
->header
);
741 fprintf (dump_file
, "SMS loop many exits ");
742 fprintf (dump_file
, " %s %d (file, line)\n",
743 insn_file (insn
), insn_line (insn
));
748 if (! SIMPLE_SMS_LOOP_P (loop
) && ! loop_single_full_bb_p (loop
))
752 rtx insn
= BB_END (loop
->header
);
754 fprintf (dump_file
, "SMS loop many BBs. ");
755 fprintf (dump_file
, " %s %d (file, line)\n",
756 insn_file (insn
), insn_line (insn
));
764 /* If there are more than one entry for the loop,
765 make it one by splitting the first entry edge and
766 redirecting the others to the new BB. */
768 canon_loop (struct loop
*loop
)
773 /* Avoid annoying special cases of edges going to exit
775 FOR_EACH_EDGE (e
, i
, EXIT_BLOCK_PTR
->preds
)
776 if ((e
->flags
& EDGE_FALLTHRU
) && (EDGE_COUNT (e
->src
->succs
) > 1))
779 if (loop
->latch
== loop
->header
780 || EDGE_COUNT (loop
->latch
->succs
) > 1)
782 FOR_EACH_EDGE (e
, i
, loop
->header
->preds
)
783 if (e
->src
== loop
->latch
)
789 /* Probability in % that the sms-ed loop rolls enough so that optimized
790 version may be entered. Just a guess. */
791 #define PROB_SMS_ENOUGH_ITERATIONS 80
793 /* Used to calculate the upper bound of ii. */
794 #define MAXII_FACTOR 2
796 /* Main entry point, perform SMS scheduling on the loops of the function
797 that consist of single basic blocks. */
801 static int passes
= 0;
807 partial_schedule_ptr ps
;
808 basic_block bb
= NULL
;
810 basic_block condition_bb
= NULL
;
812 gcov_type trip_count
= 0;
814 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
815 | LOOPS_HAVE_RECORDED_EXITS
);
816 if (number_of_loops () <= 1)
818 loop_optimizer_finalize ();
819 return; /* There are no loops to schedule. */
822 /* Initialize issue_rate. */
823 if (targetm
.sched
.issue_rate
)
825 int temp
= reload_completed
;
827 reload_completed
= 1;
828 issue_rate
= targetm
.sched
.issue_rate ();
829 reload_completed
= temp
;
834 /* Initialize the scheduler. */
835 current_sched_info
= &sms_sched_info
;
837 /* Init Data Flow analysis, to be used in interloop dep calculation. */
838 df_set_flags (DF_LR_RUN_DCE
);
839 df_rd_add_problem ();
840 df_note_add_problem ();
841 df_chain_add_problem (DF_DU_CHAIN
);
843 regstat_compute_calls_crossed ();
846 /* Allocate memory to hold the DDG array one entry for each loop.
847 We use loop->num as index into this array. */
848 g_arr
= XCNEWVEC (ddg_ptr
, number_of_loops ());
850 /* Build DDGs for all the relevant loops and hold them in G_ARR
851 indexed by the loop index. */
852 FOR_EACH_LOOP (li
, loop
, 0)
858 if ((passes
++ > MAX_SMS_LOOP_NUMBER
) && (MAX_SMS_LOOP_NUMBER
!= -1))
861 fprintf (dump_file
, "SMS reached MAX_PASSES... \n");
866 if (! loop_canon_p (loop
))
869 if (! loop_single_full_bb_p (loop
))
874 get_ebb_head_tail (bb
, bb
, &head
, &tail
);
875 latch_edge
= loop_latch_edge (loop
);
876 gcc_assert (single_exit (loop
));
877 if (single_exit (loop
)->count
)
878 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
880 /* Perfrom SMS only on loops that their average count is above threshold. */
882 if ( latch_edge
->count
883 && (latch_edge
->count
< single_exit (loop
)->count
* SMS_LOOP_AVERAGE_COUNT_THRESHOLD
))
887 fprintf (dump_file
, " %s %d (file, line)\n",
888 insn_file (tail
), insn_line (tail
));
889 fprintf (dump_file
, "SMS single-bb-loop\n");
890 if (profile_info
&& flag_branch_probabilities
)
892 fprintf (dump_file
, "SMS loop-count ");
893 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
894 (HOST_WIDEST_INT
) bb
->count
);
895 fprintf (dump_file
, "\n");
896 fprintf (dump_file
, "SMS trip-count ");
897 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
898 (HOST_WIDEST_INT
) trip_count
);
899 fprintf (dump_file
, "\n");
900 fprintf (dump_file
, "SMS profile-sum-max ");
901 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
902 (HOST_WIDEST_INT
) profile_info
->sum_max
);
903 fprintf (dump_file
, "\n");
909 /* Make sure this is a doloop. */
910 if ( !(count_reg
= doloop_register_get (tail
)))
913 /* Don't handle BBs with calls or barriers, or !single_set insns,
914 or auto-increment insns (to avoid creating invalid reg-moves
915 for the auto-increment insns).
916 ??? Should handle auto-increment insns. */
917 for (insn
= head
; insn
!= NEXT_INSN (tail
); insn
= NEXT_INSN (insn
))
920 || (INSN_P (insn
) && !JUMP_P (insn
)
921 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
)
922 || (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0))
925 if (insn
!= NEXT_INSN (tail
))
930 fprintf (dump_file
, "SMS loop-with-call\n");
931 else if (BARRIER_P (insn
))
932 fprintf (dump_file
, "SMS loop-with-barrier\n");
933 else if (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
934 fprintf (dump_file
, "SMS reg inc\n");
936 fprintf (dump_file
, "SMS loop-with-not-single-set\n");
937 print_rtl_single (dump_file
, insn
);
943 if (! (g
= create_ddg (bb
, 0)))
946 fprintf (dump_file
, "SMS doloop\n");
950 g_arr
[loop
->num
] = g
;
953 /* We don't want to perform SMS on new loops - created by versioning. */
954 FOR_EACH_LOOP (li
, loop
, 0)
957 rtx count_reg
, count_init
;
959 unsigned stage_count
= 0;
960 HOST_WIDEST_INT loop_count
= 0;
962 if (! (g
= g_arr
[loop
->num
]))
966 print_ddg (dump_file
, g
);
968 get_ebb_head_tail (loop
->header
, loop
->header
, &head
, &tail
);
970 latch_edge
= loop_latch_edge (loop
);
971 gcc_assert (single_exit (loop
));
972 if (single_exit (loop
)->count
)
973 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
977 fprintf (dump_file
, " %s %d (file, line)\n",
978 insn_file (tail
), insn_line (tail
));
979 fprintf (dump_file
, "SMS single-bb-loop\n");
980 if (profile_info
&& flag_branch_probabilities
)
982 fprintf (dump_file
, "SMS loop-count ");
983 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
984 (HOST_WIDEST_INT
) bb
->count
);
985 fprintf (dump_file
, "\n");
986 fprintf (dump_file
, "SMS profile-sum-max ");
987 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
988 (HOST_WIDEST_INT
) profile_info
->sum_max
);
989 fprintf (dump_file
, "\n");
991 fprintf (dump_file
, "SMS doloop\n");
992 fprintf (dump_file
, "SMS built-ddg %d\n", g
->num_nodes
);
993 fprintf (dump_file
, "SMS num-loads %d\n", g
->num_loads
);
994 fprintf (dump_file
, "SMS num-stores %d\n", g
->num_stores
);
998 /* In case of th loop have doloop register it gets special
1000 count_init
= NULL_RTX
;
1001 if ((count_reg
= doloop_register_get (tail
)))
1003 basic_block pre_header
;
1005 pre_header
= loop_preheader_edge (loop
)->src
;
1006 count_init
= const_iteration_count (count_reg
, pre_header
,
1009 gcc_assert (count_reg
);
1011 if (dump_file
&& count_init
)
1013 fprintf (dump_file
, "SMS const-doloop ");
1014 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1016 fprintf (dump_file
, "\n");
1019 node_order
= XNEWVEC (int, g
->num_nodes
);
1021 mii
= 1; /* Need to pass some estimate of mii. */
1022 rec_mii
= sms_order_nodes (g
, mii
, node_order
);
1023 mii
= MAX (res_MII (g
), rec_mii
);
1024 maxii
= MAXII_FACTOR
* mii
;
1027 fprintf (dump_file
, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1028 rec_mii
, mii
, maxii
);
1030 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1032 set_node_sched_params (g
);
1034 ps
= sms_schedule_by_order (g
, mii
, maxii
, node_order
);
1037 stage_count
= PS_STAGE_COUNT (ps
);
1039 /* Stage count of 1 means that there is no interleaving between
1040 iterations, let the scheduling passes do the job. */
1042 || (count_init
&& (loop_count
<= stage_count
))
1043 || (flag_branch_probabilities
&& (trip_count
<= stage_count
)))
1047 fprintf (dump_file
, "SMS failed... \n");
1048 fprintf (dump_file
, "SMS sched-failed (stage-count=%d, loop-count=", stage_count
);
1049 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, loop_count
);
1050 fprintf (dump_file
, ", trip-count=");
1051 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, trip_count
);
1052 fprintf (dump_file
, ")\n");
1058 struct undo_replace_buff_elem
*reg_move_replaces
;
1063 "SMS succeeded %d %d (with ii, sc)\n", ps
->ii
,
1065 print_partial_schedule (ps
, dump_file
);
1067 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1068 g
->closing_branch
->cuid
, PS_MIN_CYCLE (ps
) - 1);
1071 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1072 the closing_branch was scheduled and should appear in the last (ii-1)
1073 row. Otherwise, we are free to schedule the branch, and we let nodes
1074 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1075 row; this should reduce stage_count to minimum. */
1076 normalize_sched_times (ps
);
1077 rotate_partial_schedule (ps
, PS_MIN_CYCLE (ps
));
1078 set_columns_for_ps (ps
);
1082 /* case the BCT count is not known , Do loop-versioning */
1083 if (count_reg
&& ! count_init
)
1085 rtx comp_rtx
= gen_rtx_fmt_ee (GT
, VOIDmode
, count_reg
,
1086 GEN_INT(stage_count
));
1087 unsigned prob
= (PROB_SMS_ENOUGH_ITERATIONS
1088 * REG_BR_PROB_BASE
) / 100;
1090 loop_version (loop
, comp_rtx
, &condition_bb
,
1091 prob
, prob
, REG_BR_PROB_BASE
- prob
,
1095 /* Set new iteration count of loop kernel. */
1096 if (count_reg
&& count_init
)
1097 SET_SRC (single_set (count_init
)) = GEN_INT (loop_count
1100 /* Now apply the scheduled kernel to the RTL of the loop. */
1101 permute_partial_schedule (ps
, g
->closing_branch
->first_note
);
1103 /* Mark this loop as software pipelined so the later
1104 scheduling passes doesn't touch it. */
1105 if (! flag_resched_modulo_sched
)
1106 g
->bb
->flags
|= BB_DISABLE_SCHEDULE
;
1107 /* The life-info is not valid any more. */
1108 df_set_bb_dirty (g
->bb
);
1110 reg_move_replaces
= generate_reg_moves (ps
, true);
1112 print_node_sched_params (dump_file
, g
->num_nodes
, g
);
1113 /* Generate prolog and epilog. */
1114 if (count_reg
&& !count_init
)
1115 generate_prolog_epilog (ps
, loop
, count_reg
);
1117 generate_prolog_epilog (ps
, loop
, NULL_RTX
);
1119 free_undo_replace_buff (reg_move_replaces
);
1122 free_partial_schedule (ps
);
1123 free (node_sched_params
);
1128 regstat_free_calls_crossed ();
1131 /* Release scheduler data, needed until now because of DFA. */
1133 loop_optimizer_finalize ();
1136 /* The SMS scheduling algorithm itself
1137 -----------------------------------
1138 Input: 'O' an ordered list of insns of a loop.
1139 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1141 'Q' is the empty Set
1142 'PS' is the partial schedule; it holds the currently scheduled nodes with
1144 'PSP' previously scheduled predecessors.
1145 'PSS' previously scheduled successors.
1146 't(u)' the cycle where u is scheduled.
1147 'l(u)' is the latency of u.
1148 'd(v,u)' is the dependence distance from v to u.
1149 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1150 the node ordering phase.
1151 'check_hardware_resources_conflicts(u, PS, c)'
1152 run a trace around cycle/slot through DFA model
1153 to check resource conflicts involving instruction u
1154 at cycle c given the partial schedule PS.
1155 'add_to_partial_schedule_at_time(u, PS, c)'
1156 Add the node/instruction u to the partial schedule
1158 'calculate_register_pressure(PS)'
1159 Given a schedule of instructions, calculate the register
1160 pressure it implies. One implementation could be the
1161 maximum number of overlapping live ranges.
1162 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1163 registers available in the hardware.
1167 3. for each node u in O in pre-computed order
1168 4. if (PSP(u) != Q && PSS(u) == Q) then
1169 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1170 6. start = Early_start; end = Early_start + II - 1; step = 1
1171 11. else if (PSP(u) == Q && PSS(u) != Q) then
1172 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1173 13. start = Late_start; end = Late_start - II + 1; step = -1
1174 14. else if (PSP(u) != Q && PSS(u) != Q) then
1175 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1176 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1177 17. start = Early_start;
1178 18. end = min(Early_start + II - 1 , Late_start);
1180 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1181 21. start = ASAP(u); end = start + II - 1; step = 1
1185 24. for (c = start ; c != end ; c += step)
1186 25. if check_hardware_resources_conflicts(u, PS, c) then
1187 26. add_to_partial_schedule_at_time(u, PS, c)
1192 31. if (success == false) then
1194 33. if (II > maxII) then
1195 34. finish - failed to schedule
1200 39. if (calculate_register_pressure(PS) > maxRP) then
1203 42. compute epilogue & prologue
1204 43. finish - succeeded to schedule
1207 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1208 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1209 set to 0 to save compile time. */
1210 #define DFA_HISTORY SMS_DFA_HISTORY
1212 /* Given the partial schedule PS, this function calculates and returns the
1213 cycles in which we can schedule the node with the given index I.
1214 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1215 noticed that there are several cases in which we fail to SMS the loop
1216 because the sched window of a node is empty due to tight data-deps. In
1217 such cases we want to unschedule some of the predecessors/successors
1218 until we get non-empty scheduling window. It returns -1 if the
1219 scheduling window is empty and zero otherwise. */
1222 get_sched_window (partial_schedule_ptr ps
, int *nodes_order
, int i
,
1223 sbitmap sched_nodes
, int ii
, int *start_p
, int *step_p
, int *end_p
)
1225 int start
, step
, end
;
1227 int u
= nodes_order
[i
];
1228 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1229 sbitmap psp
= sbitmap_alloc (ps
->g
->num_nodes
);
1230 sbitmap pss
= sbitmap_alloc (ps
->g
->num_nodes
);
1231 sbitmap u_node_preds
= NODE_PREDECESSORS (u_node
);
1232 sbitmap u_node_succs
= NODE_SUCCESSORS (u_node
);
1236 /* 1. compute sched window for u (start, end, step). */
1239 psp_not_empty
= sbitmap_a_and_b_cg (psp
, u_node_preds
, sched_nodes
);
1240 pss_not_empty
= sbitmap_a_and_b_cg (pss
, u_node_succs
, sched_nodes
);
1242 if (psp_not_empty
&& !pss_not_empty
)
1244 int early_start
= INT_MIN
;
1247 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1249 ddg_node_ptr v_node
= e
->src
;
1250 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1252 int node_st
= SCHED_TIME (v_node
)
1253 + e
->latency
- (e
->distance
* ii
);
1255 early_start
= MAX (early_start
, node_st
);
1257 if (e
->data_type
== MEM_DEP
)
1258 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1261 start
= early_start
;
1262 end
= MIN (end
, early_start
+ ii
);
1266 else if (!psp_not_empty
&& pss_not_empty
)
1268 int late_start
= INT_MAX
;
1271 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1273 ddg_node_ptr v_node
= e
->dest
;
1274 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1276 late_start
= MIN (late_start
,
1277 SCHED_TIME (v_node
) - e
->latency
1278 + (e
->distance
* ii
));
1279 if (e
->data_type
== MEM_DEP
)
1280 end
= MAX (end
, SCHED_TIME (v_node
) - ii
+ 1);
1284 end
= MAX (end
, late_start
- ii
);
1288 else if (psp_not_empty
&& pss_not_empty
)
1290 int early_start
= INT_MIN
;
1291 int late_start
= INT_MAX
;
1295 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1297 ddg_node_ptr v_node
= e
->src
;
1299 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1301 early_start
= MAX (early_start
,
1302 SCHED_TIME (v_node
) + e
->latency
1303 - (e
->distance
* ii
));
1304 if (e
->data_type
== MEM_DEP
)
1305 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1308 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1310 ddg_node_ptr v_node
= e
->dest
;
1312 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1314 late_start
= MIN (late_start
,
1315 SCHED_TIME (v_node
) - e
->latency
1316 + (e
->distance
* ii
));
1317 if (e
->data_type
== MEM_DEP
)
1318 start
= MAX (start
, SCHED_TIME (v_node
) - ii
+ 1);
1321 start
= MAX (start
, early_start
);
1322 end
= MIN (end
, MIN (early_start
+ ii
, late_start
+ 1));
1325 else /* psp is empty && pss is empty. */
1327 start
= SCHED_ASAP (u_node
);
1338 if ((start
>= end
&& step
== 1) || (start
<= end
&& step
== -1))
1344 /* This function implements the scheduling algorithm for SMS according to the
1346 static partial_schedule_ptr
1347 sms_schedule_by_order (ddg_ptr g
, int mii
, int maxii
, int *nodes_order
)
1351 int try_again_with_larger_ii
= true;
1352 int num_nodes
= g
->num_nodes
;
1354 int start
, end
, step
; /* Place together into one struct? */
1355 sbitmap sched_nodes
= sbitmap_alloc (num_nodes
);
1356 sbitmap must_precede
= sbitmap_alloc (num_nodes
);
1357 sbitmap must_follow
= sbitmap_alloc (num_nodes
);
1358 sbitmap tobe_scheduled
= sbitmap_alloc (num_nodes
);
1360 partial_schedule_ptr ps
= create_partial_schedule (ii
, g
, DFA_HISTORY
);
1362 sbitmap_ones (tobe_scheduled
);
1363 sbitmap_zero (sched_nodes
);
1365 while ((! sbitmap_equal (tobe_scheduled
, sched_nodes
)
1366 || try_again_with_larger_ii
) && ii
< maxii
)
1369 bool unscheduled_nodes
= false;
1372 fprintf (dump_file
, "Starting with ii=%d\n", ii
);
1373 if (try_again_with_larger_ii
)
1375 try_again_with_larger_ii
= false;
1376 sbitmap_zero (sched_nodes
);
1379 for (i
= 0; i
< num_nodes
; i
++)
1381 int u
= nodes_order
[i
];
1382 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1383 rtx insn
= u_node
->insn
;
1387 RESET_BIT (tobe_scheduled
, u
);
1391 if (JUMP_P (insn
)) /* Closing branch handled later. */
1393 RESET_BIT (tobe_scheduled
, u
);
1397 if (TEST_BIT (sched_nodes
, u
))
1400 /* Try to get non-empty scheduling window. */
1402 while (get_sched_window (ps
, nodes_order
, i
, sched_nodes
, ii
, &start
, &step
, &end
) < 0
1405 unscheduled_nodes
= true;
1406 if (TEST_BIT (NODE_PREDECESSORS (u_node
), nodes_order
[j
- 1])
1407 || TEST_BIT (NODE_SUCCESSORS (u_node
), nodes_order
[j
- 1]))
1409 ps_unschedule_node (ps
, &ps
->g
->nodes
[nodes_order
[j
- 1]]);
1410 RESET_BIT (sched_nodes
, nodes_order
[j
- 1]);
1416 /* ??? Try backtracking instead of immediately ii++? */
1418 try_again_with_larger_ii
= true;
1419 reset_partial_schedule (ps
, ii
);
1422 /* 2. Try scheduling u in window. */
1425 "Trying to schedule node %d in (%d .. %d) step %d\n",
1426 u
, start
, end
, step
);
1428 /* use must_follow & must_precede bitmaps to determine order
1429 of nodes within the cycle. */
1430 sbitmap_zero (must_precede
);
1431 sbitmap_zero (must_follow
);
1432 /* TODO: We can add an insn to the must_precede or must_follow
1433 bitmaps only if it has tight dependence to U and they
1434 both scheduled in the same row. The current check is less
1435 conservative and content with the fact that both U and the
1436 insn are scheduled in the same row. */
1437 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1438 if (TEST_BIT (sched_nodes
, e
->src
->cuid
)
1439 && (SMODULO (SCHED_TIME (e
->src
), ii
) == SMODULO (start
, ii
)))
1440 SET_BIT (must_precede
, e
->src
->cuid
);
1442 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1443 if (TEST_BIT (sched_nodes
, e
->dest
->cuid
)
1444 && (SMODULO (SCHED_TIME (e
->dest
), ii
) ==
1445 SMODULO ((end
- step
), ii
)))
1446 SET_BIT (must_follow
, e
->dest
->cuid
);
1449 if ((step
> 0 && start
< end
) || (step
< 0 && start
> end
))
1450 for (c
= start
; c
!= end
; c
+= step
)
1454 psi
= ps_add_node_check_conflicts (ps
, u_node
, c
,
1460 SCHED_TIME (u_node
) = c
;
1461 SET_BIT (sched_nodes
, u
);
1464 fprintf (dump_file
, "Schedule in %d\n", c
);
1470 /* ??? Try backtracking instead of immediately ii++? */
1472 try_again_with_larger_ii
= true;
1473 reset_partial_schedule (ps
, ii
);
1476 if (unscheduled_nodes
)
1479 /* ??? If (success), check register pressure estimates. */
1480 } /* Continue with next node. */
1481 } /* While try_again_with_larger_ii. */
1483 sbitmap_free (sched_nodes
);
1484 sbitmap_free (must_precede
);
1485 sbitmap_free (must_follow
);
1486 sbitmap_free (tobe_scheduled
);
1490 free_partial_schedule (ps
);
1497 /* This page implements the algorithm for ordering the nodes of a DDG
1498 for modulo scheduling, activated through the
1499 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1501 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1502 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1503 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1504 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1505 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1506 #define DEPTH(x) (ASAP ((x)))
1508 typedef struct node_order_params
* nopa
;
1510 static void order_nodes_of_sccs (ddg_all_sccs_ptr
, int * result
);
1511 static int order_nodes_in_scc (ddg_ptr
, sbitmap
, sbitmap
, int*, int);
1512 static nopa
calculate_order_params (ddg_ptr
, int mii
);
1513 static int find_max_asap (ddg_ptr
, sbitmap
);
1514 static int find_max_hv_min_mob (ddg_ptr
, sbitmap
);
1515 static int find_max_dv_min_mob (ddg_ptr
, sbitmap
);
1517 enum sms_direction
{BOTTOMUP
, TOPDOWN
};
1519 struct node_order_params
1526 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1528 check_nodes_order (int *node_order
, int num_nodes
)
1531 sbitmap tmp
= sbitmap_alloc (num_nodes
);
1535 for (i
= 0; i
< num_nodes
; i
++)
1537 int u
= node_order
[i
];
1539 gcc_assert (u
< num_nodes
&& u
>= 0 && !TEST_BIT (tmp
, u
));
1547 /* Order the nodes of G for scheduling and pass the result in
1548 NODE_ORDER. Also set aux.count of each node to ASAP.
1549 Return the recMII for the given DDG. */
1551 sms_order_nodes (ddg_ptr g
, int mii
, int * node_order
)
1555 ddg_all_sccs_ptr sccs
= create_ddg_all_sccs (g
);
1557 nopa nops
= calculate_order_params (g
, mii
);
1560 print_sccs (dump_file
, sccs
, g
);
1562 order_nodes_of_sccs (sccs
, node_order
);
1564 if (sccs
->num_sccs
> 0)
1565 /* First SCC has the largest recurrence_length. */
1566 rec_mii
= sccs
->sccs
[0]->recurrence_length
;
1568 /* Save ASAP before destroying node_order_params. */
1569 for (i
= 0; i
< g
->num_nodes
; i
++)
1571 ddg_node_ptr v
= &g
->nodes
[i
];
1572 v
->aux
.count
= ASAP (v
);
1576 free_ddg_all_sccs (sccs
);
1577 check_nodes_order (node_order
, g
->num_nodes
);
1583 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs
, int * node_order
)
1586 ddg_ptr g
= all_sccs
->ddg
;
1587 int num_nodes
= g
->num_nodes
;
1588 sbitmap prev_sccs
= sbitmap_alloc (num_nodes
);
1589 sbitmap on_path
= sbitmap_alloc (num_nodes
);
1590 sbitmap tmp
= sbitmap_alloc (num_nodes
);
1591 sbitmap ones
= sbitmap_alloc (num_nodes
);
1593 sbitmap_zero (prev_sccs
);
1594 sbitmap_ones (ones
);
1596 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1597 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1598 for (i
= 0; i
< all_sccs
->num_sccs
; i
++)
1600 ddg_scc_ptr scc
= all_sccs
->sccs
[i
];
1602 /* Add nodes on paths from previous SCCs to the current SCC. */
1603 find_nodes_on_paths (on_path
, g
, prev_sccs
, scc
->nodes
);
1604 sbitmap_a_or_b (tmp
, scc
->nodes
, on_path
);
1606 /* Add nodes on paths from the current SCC to previous SCCs. */
1607 find_nodes_on_paths (on_path
, g
, scc
->nodes
, prev_sccs
);
1608 sbitmap_a_or_b (tmp
, tmp
, on_path
);
1610 /* Remove nodes of previous SCCs from current extended SCC. */
1611 sbitmap_difference (tmp
, tmp
, prev_sccs
);
1613 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
1614 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1617 /* Handle the remaining nodes that do not belong to any scc. Each call
1618 to order_nodes_in_scc handles a single connected component. */
1619 while (pos
< g
->num_nodes
)
1621 sbitmap_difference (tmp
, ones
, prev_sccs
);
1622 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
1624 sbitmap_free (prev_sccs
);
1625 sbitmap_free (on_path
);
1627 sbitmap_free (ones
);
1630 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1631 static struct node_order_params
*
1632 calculate_order_params (ddg_ptr g
, int mii ATTRIBUTE_UNUSED
)
1636 int num_nodes
= g
->num_nodes
;
1638 /* Allocate a place to hold ordering params for each node in the DDG. */
1639 nopa node_order_params_arr
;
1641 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1642 node_order_params_arr
= (nopa
) xcalloc (num_nodes
,
1643 sizeof (struct node_order_params
));
1645 /* Set the aux pointer of each node to point to its order_params structure. */
1646 for (u
= 0; u
< num_nodes
; u
++)
1647 g
->nodes
[u
].aux
.info
= &node_order_params_arr
[u
];
1649 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1650 calculate ASAP, ALAP, mobility, distance, and height for each node
1651 in the dependence (direct acyclic) graph. */
1653 /* We assume that the nodes in the array are in topological order. */
1656 for (u
= 0; u
< num_nodes
; u
++)
1658 ddg_node_ptr u_node
= &g
->nodes
[u
];
1661 for (e
= u_node
->in
; e
; e
= e
->next_in
)
1662 if (e
->distance
== 0)
1663 ASAP (u_node
) = MAX (ASAP (u_node
),
1664 ASAP (e
->src
) + e
->latency
);
1665 max_asap
= MAX (max_asap
, ASAP (u_node
));
1668 for (u
= num_nodes
- 1; u
> -1; u
--)
1670 ddg_node_ptr u_node
= &g
->nodes
[u
];
1672 ALAP (u_node
) = max_asap
;
1673 HEIGHT (u_node
) = 0;
1674 for (e
= u_node
->out
; e
; e
= e
->next_out
)
1675 if (e
->distance
== 0)
1677 ALAP (u_node
) = MIN (ALAP (u_node
),
1678 ALAP (e
->dest
) - e
->latency
);
1679 HEIGHT (u_node
) = MAX (HEIGHT (u_node
),
1680 HEIGHT (e
->dest
) + e
->latency
);
1684 return node_order_params_arr
;
1688 find_max_asap (ddg_ptr g
, sbitmap nodes
)
1693 sbitmap_iterator sbi
;
1695 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
1697 ddg_node_ptr u_node
= &g
->nodes
[u
];
1699 if (max_asap
< ASAP (u_node
))
1701 max_asap
= ASAP (u_node
);
1709 find_max_hv_min_mob (ddg_ptr g
, sbitmap nodes
)
1713 int min_mob
= INT_MAX
;
1715 sbitmap_iterator sbi
;
1717 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
1719 ddg_node_ptr u_node
= &g
->nodes
[u
];
1721 if (max_hv
< HEIGHT (u_node
))
1723 max_hv
= HEIGHT (u_node
);
1724 min_mob
= MOB (u_node
);
1727 else if ((max_hv
== HEIGHT (u_node
))
1728 && (min_mob
> MOB (u_node
)))
1730 min_mob
= MOB (u_node
);
1738 find_max_dv_min_mob (ddg_ptr g
, sbitmap nodes
)
1742 int min_mob
= INT_MAX
;
1744 sbitmap_iterator sbi
;
1746 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
1748 ddg_node_ptr u_node
= &g
->nodes
[u
];
1750 if (max_dv
< DEPTH (u_node
))
1752 max_dv
= DEPTH (u_node
);
1753 min_mob
= MOB (u_node
);
1756 else if ((max_dv
== DEPTH (u_node
))
1757 && (min_mob
> MOB (u_node
)))
1759 min_mob
= MOB (u_node
);
1766 /* Places the nodes of SCC into the NODE_ORDER array starting
1767 at position POS, according to the SMS ordering algorithm.
1768 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1769 the NODE_ORDER array, starting from position zero. */
1771 order_nodes_in_scc (ddg_ptr g
, sbitmap nodes_ordered
, sbitmap scc
,
1772 int * node_order
, int pos
)
1774 enum sms_direction dir
;
1775 int num_nodes
= g
->num_nodes
;
1776 sbitmap workset
= sbitmap_alloc (num_nodes
);
1777 sbitmap tmp
= sbitmap_alloc (num_nodes
);
1778 sbitmap zero_bitmap
= sbitmap_alloc (num_nodes
);
1779 sbitmap predecessors
= sbitmap_alloc (num_nodes
);
1780 sbitmap successors
= sbitmap_alloc (num_nodes
);
1782 sbitmap_zero (predecessors
);
1783 find_predecessors (predecessors
, g
, nodes_ordered
);
1785 sbitmap_zero (successors
);
1786 find_successors (successors
, g
, nodes_ordered
);
1789 if (sbitmap_a_and_b_cg (tmp
, predecessors
, scc
))
1791 sbitmap_copy (workset
, tmp
);
1794 else if (sbitmap_a_and_b_cg (tmp
, successors
, scc
))
1796 sbitmap_copy (workset
, tmp
);
1803 sbitmap_zero (workset
);
1804 if ((u
= find_max_asap (g
, scc
)) >= 0)
1805 SET_BIT (workset
, u
);
1809 sbitmap_zero (zero_bitmap
);
1810 while (!sbitmap_equal (workset
, zero_bitmap
))
1813 ddg_node_ptr v_node
;
1814 sbitmap v_node_preds
;
1815 sbitmap v_node_succs
;
1819 while (!sbitmap_equal (workset
, zero_bitmap
))
1821 v
= find_max_hv_min_mob (g
, workset
);
1822 v_node
= &g
->nodes
[v
];
1823 node_order
[pos
++] = v
;
1824 v_node_succs
= NODE_SUCCESSORS (v_node
);
1825 sbitmap_a_and_b (tmp
, v_node_succs
, scc
);
1827 /* Don't consider the already ordered successors again. */
1828 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
1829 sbitmap_a_or_b (workset
, workset
, tmp
);
1830 RESET_BIT (workset
, v
);
1831 SET_BIT (nodes_ordered
, v
);
1834 sbitmap_zero (predecessors
);
1835 find_predecessors (predecessors
, g
, nodes_ordered
);
1836 sbitmap_a_and_b (workset
, predecessors
, scc
);
1840 while (!sbitmap_equal (workset
, zero_bitmap
))
1842 v
= find_max_dv_min_mob (g
, workset
);
1843 v_node
= &g
->nodes
[v
];
1844 node_order
[pos
++] = v
;
1845 v_node_preds
= NODE_PREDECESSORS (v_node
);
1846 sbitmap_a_and_b (tmp
, v_node_preds
, scc
);
1848 /* Don't consider the already ordered predecessors again. */
1849 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
1850 sbitmap_a_or_b (workset
, workset
, tmp
);
1851 RESET_BIT (workset
, v
);
1852 SET_BIT (nodes_ordered
, v
);
1855 sbitmap_zero (successors
);
1856 find_successors (successors
, g
, nodes_ordered
);
1857 sbitmap_a_and_b (workset
, successors
, scc
);
1861 sbitmap_free (workset
);
1862 sbitmap_free (zero_bitmap
);
1863 sbitmap_free (predecessors
);
1864 sbitmap_free (successors
);
1869 /* This page contains functions for manipulating partial-schedules during
1870 modulo scheduling. */
1872 /* Create a partial schedule and allocate a memory to hold II rows. */
1874 static partial_schedule_ptr
1875 create_partial_schedule (int ii
, ddg_ptr g
, int history
)
1877 partial_schedule_ptr ps
= XNEW (struct partial_schedule
);
1878 ps
->rows
= (ps_insn_ptr
*) xcalloc (ii
, sizeof (ps_insn_ptr
));
1880 ps
->history
= history
;
1881 ps
->min_cycle
= INT_MAX
;
1882 ps
->max_cycle
= INT_MIN
;
1888 /* Free the PS_INSNs in rows array of the given partial schedule.
1889 ??? Consider caching the PS_INSN's. */
1891 free_ps_insns (partial_schedule_ptr ps
)
1895 for (i
= 0; i
< ps
->ii
; i
++)
1899 ps_insn_ptr ps_insn
= ps
->rows
[i
]->next_in_row
;
1902 ps
->rows
[i
] = ps_insn
;
1908 /* Free all the memory allocated to the partial schedule. */
1911 free_partial_schedule (partial_schedule_ptr ps
)
1920 /* Clear the rows array with its PS_INSNs, and create a new one with
1924 reset_partial_schedule (partial_schedule_ptr ps
, int new_ii
)
1929 if (new_ii
== ps
->ii
)
1931 ps
->rows
= (ps_insn_ptr
*) xrealloc (ps
->rows
, new_ii
1932 * sizeof (ps_insn_ptr
));
1933 memset (ps
->rows
, 0, new_ii
* sizeof (ps_insn_ptr
));
1935 ps
->min_cycle
= INT_MAX
;
1936 ps
->max_cycle
= INT_MIN
;
1939 /* Prints the partial schedule as an ii rows array, for each rows
1940 print the ids of the insns in it. */
1942 print_partial_schedule (partial_schedule_ptr ps
, FILE *dump
)
1946 for (i
= 0; i
< ps
->ii
; i
++)
1948 ps_insn_ptr ps_i
= ps
->rows
[i
];
1950 fprintf (dump
, "\n[CYCLE %d ]: ", i
);
1953 fprintf (dump
, "%d, ",
1954 INSN_UID (ps_i
->node
->insn
));
1955 ps_i
= ps_i
->next_in_row
;
1960 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1962 create_ps_insn (ddg_node_ptr node
, int rest_count
, int cycle
)
1964 ps_insn_ptr ps_i
= XNEW (struct ps_insn
);
1967 ps_i
->next_in_row
= NULL
;
1968 ps_i
->prev_in_row
= NULL
;
1969 ps_i
->row_rest_count
= rest_count
;
1970 ps_i
->cycle
= cycle
;
1976 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1977 node is not found in the partial schedule, else returns true. */
1979 remove_node_from_ps (partial_schedule_ptr ps
, ps_insn_ptr ps_i
)
1986 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
1987 if (! ps_i
->prev_in_row
)
1989 if (ps_i
!= ps
->rows
[row
])
1992 ps
->rows
[row
] = ps_i
->next_in_row
;
1994 ps
->rows
[row
]->prev_in_row
= NULL
;
1998 ps_i
->prev_in_row
->next_in_row
= ps_i
->next_in_row
;
1999 if (ps_i
->next_in_row
)
2000 ps_i
->next_in_row
->prev_in_row
= ps_i
->prev_in_row
;
2006 /* Unlike what literature describes for modulo scheduling (which focuses
2007 on VLIW machines) the order of the instructions inside a cycle is
2008 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2009 where the current instruction should go relative to the already
2010 scheduled instructions in the given cycle. Go over these
2011 instructions and find the first possible column to put it in. */
2013 ps_insn_find_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2014 sbitmap must_precede
, sbitmap must_follow
)
2016 ps_insn_ptr next_ps_i
;
2017 ps_insn_ptr first_must_follow
= NULL
;
2018 ps_insn_ptr last_must_precede
= NULL
;
2024 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2026 /* Find the first must follow and the last must precede
2027 and insert the node immediately after the must precede
2028 but make sure that it there is no must follow after it. */
2029 for (next_ps_i
= ps
->rows
[row
];
2031 next_ps_i
= next_ps_i
->next_in_row
)
2033 if (TEST_BIT (must_follow
, next_ps_i
->node
->cuid
)
2034 && ! first_must_follow
)
2035 first_must_follow
= next_ps_i
;
2036 if (TEST_BIT (must_precede
, next_ps_i
->node
->cuid
))
2038 /* If we have already met a node that must follow, then
2039 there is no possible column. */
2040 if (first_must_follow
)
2043 last_must_precede
= next_ps_i
;
2047 /* Now insert the node after INSERT_AFTER_PSI. */
2049 if (! last_must_precede
)
2051 ps_i
->next_in_row
= ps
->rows
[row
];
2052 ps_i
->prev_in_row
= NULL
;
2053 if (ps_i
->next_in_row
)
2054 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2055 ps
->rows
[row
] = ps_i
;
2059 ps_i
->next_in_row
= last_must_precede
->next_in_row
;
2060 last_must_precede
->next_in_row
= ps_i
;
2061 ps_i
->prev_in_row
= last_must_precede
;
2062 if (ps_i
->next_in_row
)
2063 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2069 /* Advances the PS_INSN one column in its current row; returns false
2070 in failure and true in success. Bit N is set in MUST_FOLLOW if
2071 the node with cuid N must be come after the node pointed to by
2072 PS_I when scheduled in the same cycle. */
2074 ps_insn_advance_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2075 sbitmap must_follow
)
2077 ps_insn_ptr prev
, next
;
2079 ddg_node_ptr next_node
;
2084 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2086 if (! ps_i
->next_in_row
)
2089 next_node
= ps_i
->next_in_row
->node
;
2091 /* Check if next_in_row is dependent on ps_i, both having same sched
2092 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2093 if (TEST_BIT (must_follow
, next_node
->cuid
))
2096 /* Advance PS_I over its next_in_row in the doubly linked list. */
2097 prev
= ps_i
->prev_in_row
;
2098 next
= ps_i
->next_in_row
;
2100 if (ps_i
== ps
->rows
[row
])
2101 ps
->rows
[row
] = next
;
2103 ps_i
->next_in_row
= next
->next_in_row
;
2105 if (next
->next_in_row
)
2106 next
->next_in_row
->prev_in_row
= ps_i
;
2108 next
->next_in_row
= ps_i
;
2109 ps_i
->prev_in_row
= next
;
2111 next
->prev_in_row
= prev
;
2113 prev
->next_in_row
= next
;
2118 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2119 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2120 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2121 before/after (respectively) the node pointed to by PS_I when scheduled
2122 in the same cycle. */
2124 add_node_to_ps (partial_schedule_ptr ps
, ddg_node_ptr node
, int cycle
,
2125 sbitmap must_precede
, sbitmap must_follow
)
2129 int row
= SMODULO (cycle
, ps
->ii
);
2132 && ps
->rows
[row
]->row_rest_count
>= issue_rate
)
2136 rest_count
+= ps
->rows
[row
]->row_rest_count
;
2138 ps_i
= create_ps_insn (node
, rest_count
, cycle
);
2140 /* Finds and inserts PS_I according to MUST_FOLLOW and
2142 if (! ps_insn_find_column (ps
, ps_i
, must_precede
, must_follow
))
2151 /* Advance time one cycle. Assumes DFA is being used. */
2153 advance_one_cycle (void)
2155 if (targetm
.sched
.dfa_pre_cycle_insn
)
2156 state_transition (curr_state
,
2157 targetm
.sched
.dfa_pre_cycle_insn ());
2159 state_transition (curr_state
, NULL
);
2161 if (targetm
.sched
.dfa_post_cycle_insn
)
2162 state_transition (curr_state
,
2163 targetm
.sched
.dfa_post_cycle_insn ());
2168 /* Checks if PS has resource conflicts according to DFA, starting from
2169 FROM cycle to TO cycle; returns true if there are conflicts and false
2170 if there are no conflicts. Assumes DFA is being used. */
2172 ps_has_conflicts (partial_schedule_ptr ps
, int from
, int to
)
2176 state_reset (curr_state
);
2178 for (cycle
= from
; cycle
<= to
; cycle
++)
2180 ps_insn_ptr crr_insn
;
2181 /* Holds the remaining issue slots in the current row. */
2182 int can_issue_more
= issue_rate
;
2184 /* Walk through the DFA for the current row. */
2185 for (crr_insn
= ps
->rows
[SMODULO (cycle
, ps
->ii
)];
2187 crr_insn
= crr_insn
->next_in_row
)
2189 rtx insn
= crr_insn
->node
->insn
;
2194 /* Check if there is room for the current insn. */
2195 if (!can_issue_more
|| state_dead_lock_p (curr_state
))
2198 /* Update the DFA state and return with failure if the DFA found
2199 recource conflicts. */
2200 if (state_transition (curr_state
, insn
) >= 0)
2203 if (targetm
.sched
.variable_issue
)
2205 targetm
.sched
.variable_issue (sched_dump
, sched_verbose
,
2206 insn
, can_issue_more
);
2207 /* A naked CLOBBER or USE generates no instruction, so don't
2208 let them consume issue slots. */
2209 else if (GET_CODE (PATTERN (insn
)) != USE
2210 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
2214 /* Advance the DFA to the next cycle. */
2215 advance_one_cycle ();
2220 /* Checks if the given node causes resource conflicts when added to PS at
2221 cycle C. If not the node is added to PS and returned; otherwise zero
2222 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2223 cuid N must be come before/after (respectively) the node pointed to by
2224 PS_I when scheduled in the same cycle. */
2226 ps_add_node_check_conflicts (partial_schedule_ptr ps
, ddg_node_ptr n
,
2227 int c
, sbitmap must_precede
,
2228 sbitmap must_follow
)
2230 int has_conflicts
= 0;
2233 /* First add the node to the PS, if this succeeds check for
2234 conflicts, trying different issue slots in the same row. */
2235 if (! (ps_i
= add_node_to_ps (ps
, n
, c
, must_precede
, must_follow
)))
2236 return NULL
; /* Failed to insert the node at the given cycle. */
2238 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2240 && ps_has_conflicts (ps
,
2244 /* Try different issue slots to find one that the given node can be
2245 scheduled in without conflicts. */
2246 while (has_conflicts
)
2248 if (! ps_insn_advance_column (ps
, ps_i
, must_follow
))
2250 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2252 && ps_has_conflicts (ps
,
2259 remove_node_from_ps (ps
, ps_i
);
2263 ps
->min_cycle
= MIN (ps
->min_cycle
, c
);
2264 ps
->max_cycle
= MAX (ps
->max_cycle
, c
);
2268 /* Rotate the rows of PS such that insns scheduled at time
2269 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2271 rotate_partial_schedule (partial_schedule_ptr ps
, int start_cycle
)
2273 int i
, row
, backward_rotates
;
2274 int last_row
= ps
->ii
- 1;
2276 if (start_cycle
== 0)
2279 backward_rotates
= SMODULO (start_cycle
, ps
->ii
);
2281 /* Revisit later and optimize this into a single loop. */
2282 for (i
= 0; i
< backward_rotates
; i
++)
2284 ps_insn_ptr first_row
= ps
->rows
[0];
2286 for (row
= 0; row
< last_row
; row
++)
2287 ps
->rows
[row
] = ps
->rows
[row
+1];
2289 ps
->rows
[last_row
] = first_row
;
2292 ps
->max_cycle
-= start_cycle
;
2293 ps
->min_cycle
-= start_cycle
;
2296 /* Remove the node N from the partial schedule PS; because we restart the DFA
2297 each time we want to check for resource conflicts; this is equivalent to
2298 unscheduling the node N. */
2300 ps_unschedule_node (partial_schedule_ptr ps
, ddg_node_ptr n
)
2303 int row
= SMODULO (SCHED_TIME (n
), ps
->ii
);
2305 if (row
< 0 || row
> ps
->ii
)
2308 for (ps_i
= ps
->rows
[row
];
2309 ps_i
&& ps_i
->node
!= n
;
2310 ps_i
= ps_i
->next_in_row
);
2314 return remove_node_from_ps (ps
, ps_i
);
2316 #endif /* INSN_SCHEDULING */
2319 gate_handle_sms (void)
2321 return (optimize
> 0 && flag_modulo_sched
);
2325 /* Run instruction scheduler. */
2326 /* Perform SMS module scheduling. */
2328 rest_of_handle_sms (void)
2330 #ifdef INSN_SCHEDULING
2333 /* Collect loop information to be used in SMS. */
2334 cfg_layout_initialize (0);
2337 /* Update the life information, because we add pseudos. */
2338 max_regno
= max_reg_num ();
2340 /* Finalize layout changes. */
2342 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2343 bb
->aux
= bb
->next_bb
;
2344 free_dominance_info (CDI_DOMINATORS
);
2345 cfg_layout_finalize ();
2346 #endif /* INSN_SCHEDULING */
2350 struct tree_opt_pass pass_sms
=
2353 gate_handle_sms
, /* gate */
2354 rest_of_handle_sms
, /* execute */
2357 0, /* static_pass_number */
2359 0, /* properties_required */
2360 0, /* properties_provided */
2361 0, /* properties_destroyed */
2362 TODO_dump_func
, /* todo_flags_start */
2365 TODO_ggc_collect
, /* todo_flags_finish */