1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
39 #include "sched-int.h"
41 #include "cfglayout.h"
49 #include "tree-pass.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule
*partial_schedule_ptr
;
106 typedef struct ps_insn
*ps_insn_ptr
;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
125 /* The corresponding DDG_NODE. */
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row
,
136 /* The number of nodes in the same row that come after this node. */
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii
; /* Number of rows in the partial schedule. */
146 int history
; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
151 /* The earliest absolute cycle of an insn in the partial schedule. */
154 /* The latest absolute cycle of an insn in the partial schedule. */
157 ddg_ptr g
; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
167 struct undo_replace_buff_elem
*next
;
172 static partial_schedule_ptr
create_partial_schedule (int ii
, ddg_ptr
, int history
);
173 static void free_partial_schedule (partial_schedule_ptr
);
174 static void reset_partial_schedule (partial_schedule_ptr
, int new_ii
);
175 void print_partial_schedule (partial_schedule_ptr
, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr
, sbitmap
);
177 static ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr
,
178 ddg_node_ptr node
, int cycle
,
179 sbitmap must_precede
,
180 sbitmap must_follow
);
181 static void rotate_partial_schedule (partial_schedule_ptr
, int);
182 void set_row_column_for_ps (partial_schedule_ptr
);
183 static void ps_insert_empty_row (partial_schedule_ptr
, int, sbitmap
);
184 static int compute_split_row (sbitmap
, int, int, int, ddg_node_ptr
);
187 /* This page defines constants and structures for the modulo scheduling
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate
;
197 static int sms_order_nodes (ddg_ptr
, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr
);
199 static partial_schedule_ptr
sms_schedule_by_order (ddg_ptr
, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr
, rtx
);
201 static void generate_prolog_epilog (partial_schedule_ptr
, struct loop
*,
203 static void duplicate_insns_of_cycles (partial_schedule_ptr
,
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap
; /* A lower-bound on the absolute scheduling cycle. */
220 int time
; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
228 /* The number of register-move instructions added, immediately preceding
232 int row
; /* Holds time % ii. */
233 int stage
; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
238 } *node_sched_params_ptr
;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
245 sms_print_insn (rtx insn
, int aligned ATTRIBUTE_UNUSED
)
249 sprintf (tmp
, "i%4d", INSN_UID (insn
));
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED
,
255 regset cond_exec ATTRIBUTE_UNUSED
,
256 regset used ATTRIBUTE_UNUSED
,
257 regset set ATTRIBUTE_UNUSED
)
261 static struct sched_info sms_sched_info
=
270 compute_jump_reg_dependencies
,
275 NULL
, NULL
, NULL
, NULL
, NULL
,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED
, rtx tail ATTRIBUTE_UNUSED
)
287 #ifdef HAVE_doloop_end
288 rtx reg
, condition
, insn
, first_insn_not_to_check
;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition
= doloop_condition_get (tail
);
298 if (REG_P (XEXP (condition
, 0)))
299 reg
= XEXP (condition
, 0);
300 else if (GET_CODE (XEXP (condition
, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition
, 0), 0)))
302 reg
= XEXP (XEXP (condition
, 0), 0);
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check
= (GET_CODE (PATTERN (tail
)) == PARALLEL
? tail
313 for (insn
= head
; insn
!= first_insn_not_to_check
; insn
= NEXT_INSN (insn
))
314 if (reg_mentioned_p (reg
, insn
))
318 fprintf (dump_file
, "SMS count_reg found ");
319 print_rtl_single (dump_file
, reg
);
320 fprintf (dump_file
, " outside control in insn:\n");
321 print_rtl_single (dump_file
, insn
);
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
338 const_iteration_count (rtx count_reg
, basic_block pre_header
,
339 HOST_WIDEST_INT
* count
)
347 get_ebb_head_tail (pre_header
, pre_header
, &head
, &tail
);
349 for (insn
= tail
; insn
!= PREV_INSN (head
); insn
= PREV_INSN (insn
))
350 if (INSN_P (insn
) && single_set (insn
) &&
351 rtx_equal_p (count_reg
, SET_DEST (single_set (insn
))))
353 rtx pat
= single_set (insn
);
355 if (GET_CODE (SET_SRC (pat
)) == CONST_INT
)
357 *count
= INTVAL (SET_SRC (pat
));
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
373 if (targetm
.sched
.sms_res_mii
)
374 return targetm
.sched
.sms_res_mii (g
);
376 return (g
->num_nodes
/ issue_rate
);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params
;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
387 set_node_sched_params (ddg_ptr g
)
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params
= (node_sched_params_ptr
)
394 xcalloc (g
->num_nodes
,
395 sizeof (struct node_sched_params
));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i
= 0; i
< g
->num_nodes
; i
++)
401 /* Watch out for aliasing problems? */
402 node_sched_params
[i
].asap
= g
->nodes
[i
].aux
.count
;
403 g
->nodes
[i
].aux
.info
= &node_sched_params
[i
];
408 print_node_sched_params (FILE *file
, int num_nodes
, ddg_ptr g
)
414 for (i
= 0; i
< num_nodes
; i
++)
416 node_sched_params_ptr nsp
= &node_sched_params
[i
];
417 rtx reg_move
= nsp
->first_reg_move
;
420 fprintf (file
, "Node = %d; INSN = %d\n", i
,
421 (INSN_UID (g
->nodes
[i
].insn
)));
422 fprintf (file
, " asap = %d:\n", nsp
->asap
);
423 fprintf (file
, " time = %d:\n", nsp
->time
);
424 fprintf (file
, " nreg_moves = %d:\n", nsp
->nreg_moves
);
425 for (j
= 0; j
< nsp
->nreg_moves
; j
++)
427 fprintf (file
, " reg_move = ");
428 print_rtl_single (file
, reg_move
);
429 reg_move
= PREV_INSN (reg_move
);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
445 static struct undo_replace_buff_elem
*
446 generate_reg_moves (partial_schedule_ptr ps
, bool rescan
)
451 struct undo_replace_buff_elem
*reg_move_replaces
= NULL
;
453 for (i
= 0; i
< g
->num_nodes
; i
++)
455 ddg_node_ptr u
= &g
->nodes
[i
];
457 int nreg_moves
= 0, i_reg_move
;
458 sbitmap
*uses_of_defs
;
460 rtx prev_reg
, old_reg
;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e
= u
->out
; e
; e
= e
->next_out
)
465 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
467 int nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
469 if (e
->distance
== 1)
470 nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
475 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
478 nreg_moves
= MAX (nreg_moves
, nreg_moves4e
);
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
488 uses_of_defs
= sbitmap_vector_alloc (nreg_moves
, g
->num_nodes
);
489 sbitmap_vector_zero (uses_of_defs
, nreg_moves
);
490 for (e
= u
->out
; e
; e
= e
->next_out
)
491 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
493 int dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
495 if (e
->distance
== 1)
496 dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
498 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
499 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
503 SET_BIT (uses_of_defs
[dest_copy
- 1], e
->dest
->cuid
);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u
) = nreg_moves
;
508 old_reg
= prev_reg
= copy_rtx (SET_DEST (single_set (u
->insn
)));
509 /* Insert the reg-moves right before the notes which precede
510 the insn they relates to. */
511 last_reg_move
= u
->first_note
;
513 for (i_reg_move
= 0; i_reg_move
< nreg_moves
; i_reg_move
++)
515 unsigned int i_use
= 0;
516 rtx new_reg
= gen_reg_rtx (GET_MODE (prev_reg
));
517 rtx reg_move
= gen_move_insn (new_reg
, prev_reg
);
518 sbitmap_iterator sbi
;
520 add_insn_before (reg_move
, last_reg_move
, NULL
);
521 last_reg_move
= reg_move
;
523 if (!SCHED_FIRST_REG_MOVE (u
))
524 SCHED_FIRST_REG_MOVE (u
) = reg_move
;
526 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs
[i_reg_move
], 0, i_use
, sbi
)
528 struct undo_replace_buff_elem
*rep
;
530 rep
= (struct undo_replace_buff_elem
*)
531 xcalloc (1, sizeof (struct undo_replace_buff_elem
));
532 rep
->insn
= g
->nodes
[i_use
].insn
;
533 rep
->orig_reg
= old_reg
;
534 rep
->new_reg
= new_reg
;
536 if (! reg_move_replaces
)
537 reg_move_replaces
= rep
;
540 rep
->next
= reg_move_replaces
;
541 reg_move_replaces
= rep
;
544 replace_rtx (g
->nodes
[i_use
].insn
, old_reg
, new_reg
);
546 df_insn_rescan (g
->nodes
[i_use
].insn
);
551 sbitmap_vector_free (uses_of_defs
);
553 return reg_move_replaces
;
556 /* Free memory allocated for the undo buffer. */
558 free_undo_replace_buff (struct undo_replace_buff_elem
*reg_move_replaces
)
561 while (reg_move_replaces
)
563 struct undo_replace_buff_elem
*rep
= reg_move_replaces
;
565 reg_move_replaces
= reg_move_replaces
->next
;
570 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
571 of SCHED_ROW and SCHED_STAGE. */
573 normalize_sched_times (partial_schedule_ptr ps
)
576 int amount
= PS_MIN_CYCLE (ps
);
578 ps_insn_ptr crr_insn
;
580 for (row
= 0; row
< ii
; row
++)
581 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
583 ddg_node_ptr u
= crr_insn
->node
;
584 int normalized_time
= SCHED_TIME (u
) - amount
;
587 fprintf (dump_file
, "crr_insn->node=%d, crr_insn->cycle=%d,\
588 min_cycle=%d\n", crr_insn
->node
->cuid
, SCHED_TIME
590 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
591 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
592 SCHED_TIME (u
) = normalized_time
;
593 SCHED_ROW (u
) = normalized_time
% ii
;
594 SCHED_STAGE (u
) = normalized_time
/ ii
;
598 /* Set SCHED_COLUMN of each node according to its position in PS. */
600 set_columns_for_ps (partial_schedule_ptr ps
)
604 for (row
= 0; row
< ps
->ii
; row
++)
606 ps_insn_ptr cur_insn
= ps
->rows
[row
];
609 for (; cur_insn
; cur_insn
= cur_insn
->next_in_row
)
610 SCHED_COLUMN (cur_insn
->node
) = column
++;
614 /* Permute the insns according to their order in PS, from row 0 to
615 row ii-1, and position them right before LAST. This schedules
616 the insns of the loop kernel. */
618 permute_partial_schedule (partial_schedule_ptr ps
, rtx last
)
624 for (row
= 0; row
< ii
; row
++)
625 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
626 if (PREV_INSN (last
) != ps_ij
->node
->insn
)
627 reorder_insns_nobb (ps_ij
->node
->first_note
, ps_ij
->node
->insn
,
632 duplicate_insns_of_cycles (partial_schedule_ptr ps
, int from_stage
,
633 int to_stage
, int for_prolog
, rtx count_reg
)
638 for (row
= 0; row
< ps
->ii
; row
++)
639 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
641 ddg_node_ptr u_node
= ps_ij
->node
;
643 rtx reg_move
= NULL_RTX
;
645 /* Do not duplicate any insn which refers to count_reg as it
646 belongs to the control part.
647 TODO: This should be done by analyzing the control part of
649 if (reg_mentioned_p (count_reg
, u_node
->insn
))
654 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
655 number of reg_moves starting with the second occurrence of
656 u_node, which is generated if its SCHED_STAGE <= to_stage. */
657 i_reg_moves
= to_stage
- SCHED_STAGE (u_node
) + 1;
658 i_reg_moves
= MAX (i_reg_moves
, 0);
659 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
661 /* The reg_moves start from the *first* reg_move backwards. */
664 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
665 for (j
= 1; j
< i_reg_moves
; j
++)
666 reg_move
= PREV_INSN (reg_move
);
669 else /* It's for the epilog. */
671 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
672 starting to decrease one stage after u_node no longer occurs;
673 that is, generate all reg_moves until
674 SCHED_STAGE (u_node) == from_stage - 1. */
675 i_reg_moves
= SCHED_NREG_MOVES (u_node
)
676 - (from_stage
- SCHED_STAGE (u_node
) - 1);
677 i_reg_moves
= MAX (i_reg_moves
, 0);
678 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
680 /* The reg_moves start from the *last* reg_move forwards. */
683 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
684 for (j
= 1; j
< SCHED_NREG_MOVES (u_node
); j
++)
685 reg_move
= PREV_INSN (reg_move
);
689 for (j
= 0; j
< i_reg_moves
; j
++, reg_move
= NEXT_INSN (reg_move
))
690 emit_insn (copy_rtx (PATTERN (reg_move
)));
691 if (SCHED_STAGE (u_node
) >= from_stage
692 && SCHED_STAGE (u_node
) <= to_stage
)
693 duplicate_insn_chain (u_node
->first_note
, u_node
->insn
);
698 /* Generate the instructions (including reg_moves) for prolog & epilog. */
700 generate_prolog_epilog (partial_schedule_ptr ps
, struct loop
*loop
,
701 rtx count_reg
, rtx count_init
)
704 int last_stage
= PS_STAGE_COUNT (ps
) - 1;
707 /* Generate the prolog, inserting its insns on the loop-entry edge. */
712 /* Generate instructions at the beginning of the prolog to
713 adjust the loop count by STAGE_COUNT. If loop count is constant
714 (count_init), this constant is adjusted by STAGE_COUNT in
715 generate_prolog_epilog function. */
716 rtx sub_reg
= NULL_RTX
;
718 sub_reg
= expand_simple_binop (GET_MODE (count_reg
), MINUS
,
719 count_reg
, GEN_INT (last_stage
),
720 count_reg
, 1, OPTAB_DIRECT
);
721 gcc_assert (REG_P (sub_reg
));
722 if (REGNO (sub_reg
) != REGNO (count_reg
))
723 emit_move_insn (count_reg
, sub_reg
);
726 for (i
= 0; i
< last_stage
; i
++)
727 duplicate_insns_of_cycles (ps
, 0, i
, 1, count_reg
);
729 /* Put the prolog on the entry edge. */
730 e
= loop_preheader_edge (loop
);
731 split_edge_and_insert (e
, get_insns ());
735 /* Generate the epilog, inserting its insns on the loop-exit edge. */
738 for (i
= 0; i
< last_stage
; i
++)
739 duplicate_insns_of_cycles (ps
, i
+ 1, last_stage
, 0, count_reg
);
741 /* Put the epilogue on the exit edge. */
742 gcc_assert (single_exit (loop
));
743 e
= single_exit (loop
);
744 split_edge_and_insert (e
, get_insns ());
748 /* Return true if all the BBs of the loop are empty except the
751 loop_single_full_bb_p (struct loop
*loop
)
754 basic_block
*bbs
= get_loop_body (loop
);
756 for (i
= 0; i
< loop
->num_nodes
; i
++)
759 bool empty_bb
= true;
761 if (bbs
[i
] == loop
->header
)
764 /* Make sure that basic blocks other than the header
765 have only notes labels or jumps. */
766 get_ebb_head_tail (bbs
[i
], bbs
[i
], &head
, &tail
);
767 for (; head
!= NEXT_INSN (tail
); head
= NEXT_INSN (head
))
769 if (NOTE_P (head
) || LABEL_P (head
)
770 || (INSN_P (head
) && JUMP_P (head
)))
786 /* A simple loop from SMS point of view; it is a loop that is composed of
787 either a single basic block or two BBs - a header and a latch. */
788 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
789 && (EDGE_COUNT (loop->latch->preds) == 1) \
790 && (EDGE_COUNT (loop->latch->succs) == 1))
792 /* Return true if the loop is in its canonical form and false if not.
793 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
795 loop_canon_p (struct loop
*loop
)
798 if (loop
->inner
|| !loop_outer (loop
))
801 fprintf (dump_file
, "SMS loop inner or !loop_outer\n");
805 if (!single_exit (loop
))
809 rtx insn
= BB_END (loop
->header
);
811 fprintf (dump_file
, "SMS loop many exits ");
812 fprintf (dump_file
, " %s %d (file, line)\n",
813 insn_file (insn
), insn_line (insn
));
818 if (! SIMPLE_SMS_LOOP_P (loop
) && ! loop_single_full_bb_p (loop
))
822 rtx insn
= BB_END (loop
->header
);
824 fprintf (dump_file
, "SMS loop many BBs. ");
825 fprintf (dump_file
, " %s %d (file, line)\n",
826 insn_file (insn
), insn_line (insn
));
834 /* If there are more than one entry for the loop,
835 make it one by splitting the first entry edge and
836 redirecting the others to the new BB. */
838 canon_loop (struct loop
*loop
)
843 /* Avoid annoying special cases of edges going to exit
845 FOR_EACH_EDGE (e
, i
, EXIT_BLOCK_PTR
->preds
)
846 if ((e
->flags
& EDGE_FALLTHRU
) && (EDGE_COUNT (e
->src
->succs
) > 1))
849 if (loop
->latch
== loop
->header
850 || EDGE_COUNT (loop
->latch
->succs
) > 1)
852 FOR_EACH_EDGE (e
, i
, loop
->header
->preds
)
853 if (e
->src
== loop
->latch
)
859 /* Probability in % that the sms-ed loop rolls enough so that optimized
860 version may be entered. Just a guess. */
861 #define PROB_SMS_ENOUGH_ITERATIONS 80
863 /* Used to calculate the upper bound of ii. */
864 #define MAXII_FACTOR 2
866 /* Main entry point, perform SMS scheduling on the loops of the function
867 that consist of single basic blocks. */
876 partial_schedule_ptr ps
;
877 basic_block bb
= NULL
;
879 basic_block condition_bb
= NULL
;
881 gcov_type trip_count
= 0;
883 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
884 | LOOPS_HAVE_RECORDED_EXITS
);
885 if (number_of_loops () <= 1)
887 loop_optimizer_finalize ();
888 return; /* There are no loops to schedule. */
891 /* Initialize issue_rate. */
892 if (targetm
.sched
.issue_rate
)
894 int temp
= reload_completed
;
896 reload_completed
= 1;
897 issue_rate
= targetm
.sched
.issue_rate ();
898 reload_completed
= temp
;
903 /* Initialize the scheduler. */
904 current_sched_info
= &sms_sched_info
;
906 /* Init Data Flow analysis, to be used in interloop dep calculation. */
907 df_set_flags (DF_LR_RUN_DCE
);
908 df_rd_add_problem ();
909 df_note_add_problem ();
910 df_chain_add_problem (DF_DU_CHAIN
+ DF_UD_CHAIN
);
912 regstat_compute_calls_crossed ();
915 /* Allocate memory to hold the DDG array one entry for each loop.
916 We use loop->num as index into this array. */
917 g_arr
= XCNEWVEC (ddg_ptr
, number_of_loops ());
921 fprintf (dump_file
, "\n\nSMS analysis phase\n");
922 fprintf (dump_file
, "===================\n\n");
925 /* Build DDGs for all the relevant loops and hold them in G_ARR
926 indexed by the loop index. */
927 FOR_EACH_LOOP (li
, loop
, 0)
933 if (dbg_cnt (sms_sched_loop
) == false)
936 fprintf (dump_file
, "SMS reached max limit... \n");
943 rtx insn
= BB_END (loop
->header
);
945 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
946 loop
->num
, insn_file (insn
), insn_line (insn
));
950 if (! loop_canon_p (loop
))
953 if (! loop_single_full_bb_p (loop
))
956 fprintf (dump_file
, "SMS not loop_single_full_bb_p\n");
962 get_ebb_head_tail (bb
, bb
, &head
, &tail
);
963 latch_edge
= loop_latch_edge (loop
);
964 gcc_assert (single_exit (loop
));
965 if (single_exit (loop
)->count
)
966 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
968 /* Perfrom SMS only on loops that their average count is above threshold. */
970 if ( latch_edge
->count
971 && (latch_edge
->count
< single_exit (loop
)->count
* SMS_LOOP_AVERAGE_COUNT_THRESHOLD
))
975 fprintf (dump_file
, " %s %d (file, line)\n",
976 insn_file (tail
), insn_line (tail
));
977 fprintf (dump_file
, "SMS single-bb-loop\n");
978 if (profile_info
&& flag_branch_probabilities
)
980 fprintf (dump_file
, "SMS loop-count ");
981 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
982 (HOST_WIDEST_INT
) bb
->count
);
983 fprintf (dump_file
, "\n");
984 fprintf (dump_file
, "SMS trip-count ");
985 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
986 (HOST_WIDEST_INT
) trip_count
);
987 fprintf (dump_file
, "\n");
988 fprintf (dump_file
, "SMS profile-sum-max ");
989 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
990 (HOST_WIDEST_INT
) profile_info
->sum_max
);
991 fprintf (dump_file
, "\n");
997 /* Make sure this is a doloop. */
998 if ( !(count_reg
= doloop_register_get (head
, tail
)))
1001 fprintf (dump_file
, "SMS doloop_register_get failed\n");
1005 /* Don't handle BBs with calls or barriers, or !single_set insns,
1006 or auto-increment insns (to avoid creating invalid reg-moves
1007 for the auto-increment insns).
1008 ??? Should handle auto-increment insns.
1009 ??? Should handle insns defining subregs. */
1010 for (insn
= head
; insn
!= NEXT_INSN (tail
); insn
= NEXT_INSN (insn
))
1016 || (INSN_P (insn
) && !JUMP_P (insn
)
1017 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
)
1018 || (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1019 || (INSN_P (insn
) && (set
= single_set (insn
))
1020 && GET_CODE (SET_DEST (set
)) == SUBREG
))
1024 if (insn
!= NEXT_INSN (tail
))
1029 fprintf (dump_file
, "SMS loop-with-call\n");
1030 else if (BARRIER_P (insn
))
1031 fprintf (dump_file
, "SMS loop-with-barrier\n");
1032 else if (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1033 fprintf (dump_file
, "SMS reg inc\n");
1034 else if ((INSN_P (insn
) && !JUMP_P (insn
)
1035 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
))
1036 fprintf (dump_file
, "SMS loop-with-not-single-set\n");
1038 fprintf (dump_file
, "SMS loop with subreg in lhs\n");
1039 print_rtl_single (dump_file
, insn
);
1045 if (! (g
= create_ddg (bb
, 0)))
1048 fprintf (dump_file
, "SMS doloop\n");
1052 g_arr
[loop
->num
] = g
;
1054 fprintf (dump_file
, "...OK\n");
1059 fprintf (dump_file
, "\nSMS transformation phase\n");
1060 fprintf (dump_file
, "=========================\n\n");
1063 /* We don't want to perform SMS on new loops - created by versioning. */
1064 FOR_EACH_LOOP (li
, loop
, 0)
1067 rtx count_reg
, count_init
;
1069 unsigned stage_count
= 0;
1070 HOST_WIDEST_INT loop_count
= 0;
1072 if (! (g
= g_arr
[loop
->num
]))
1077 rtx insn
= BB_END (loop
->header
);
1079 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
1080 loop
->num
, insn_file (insn
), insn_line (insn
));
1082 print_ddg (dump_file
, g
);
1085 get_ebb_head_tail (loop
->header
, loop
->header
, &head
, &tail
);
1087 latch_edge
= loop_latch_edge (loop
);
1088 gcc_assert (single_exit (loop
));
1089 if (single_exit (loop
)->count
)
1090 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
1094 fprintf (dump_file
, " %s %d (file, line)\n",
1095 insn_file (tail
), insn_line (tail
));
1096 fprintf (dump_file
, "SMS single-bb-loop\n");
1097 if (profile_info
&& flag_branch_probabilities
)
1099 fprintf (dump_file
, "SMS loop-count ");
1100 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1101 (HOST_WIDEST_INT
) bb
->count
);
1102 fprintf (dump_file
, "\n");
1103 fprintf (dump_file
, "SMS profile-sum-max ");
1104 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1105 (HOST_WIDEST_INT
) profile_info
->sum_max
);
1106 fprintf (dump_file
, "\n");
1108 fprintf (dump_file
, "SMS doloop\n");
1109 fprintf (dump_file
, "SMS built-ddg %d\n", g
->num_nodes
);
1110 fprintf (dump_file
, "SMS num-loads %d\n", g
->num_loads
);
1111 fprintf (dump_file
, "SMS num-stores %d\n", g
->num_stores
);
1115 /* In case of th loop have doloop register it gets special
1117 count_init
= NULL_RTX
;
1118 if ((count_reg
= doloop_register_get (head
, tail
)))
1120 basic_block pre_header
;
1122 pre_header
= loop_preheader_edge (loop
)->src
;
1123 count_init
= const_iteration_count (count_reg
, pre_header
,
1126 gcc_assert (count_reg
);
1128 if (dump_file
&& count_init
)
1130 fprintf (dump_file
, "SMS const-doloop ");
1131 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1133 fprintf (dump_file
, "\n");
1136 node_order
= XNEWVEC (int, g
->num_nodes
);
1138 mii
= 1; /* Need to pass some estimate of mii. */
1139 rec_mii
= sms_order_nodes (g
, mii
, node_order
, &max_asap
);
1140 mii
= MAX (res_MII (g
), rec_mii
);
1141 maxii
= MAX (max_asap
, MAXII_FACTOR
* mii
);
1144 fprintf (dump_file
, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145 rec_mii
, mii
, maxii
);
1147 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1149 set_node_sched_params (g
);
1151 ps
= sms_schedule_by_order (g
, mii
, maxii
, node_order
);
1154 stage_count
= PS_STAGE_COUNT (ps
);
1156 /* Stage count of 1 means that there is no interleaving between
1157 iterations, let the scheduling passes do the job. */
1159 || (count_init
&& (loop_count
<= stage_count
))
1160 || (flag_branch_probabilities
&& (trip_count
<= stage_count
)))
1164 fprintf (dump_file
, "SMS failed... \n");
1165 fprintf (dump_file
, "SMS sched-failed (stage-count=%d, loop-count=", stage_count
);
1166 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, loop_count
);
1167 fprintf (dump_file
, ", trip-count=");
1168 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, trip_count
);
1169 fprintf (dump_file
, ")\n");
1175 struct undo_replace_buff_elem
*reg_move_replaces
;
1180 "SMS succeeded %d %d (with ii, sc)\n", ps
->ii
,
1182 print_partial_schedule (ps
, dump_file
);
1184 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1185 g
->closing_branch
->cuid
, PS_MIN_CYCLE (ps
) - 1);
1188 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1189 the closing_branch was scheduled and should appear in the last (ii-1)
1190 row. Otherwise, we are free to schedule the branch, and we let nodes
1191 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1192 row; this should reduce stage_count to minimum.
1193 TODO: Revisit the issue of scheduling the insns of the
1194 control part relative to the branch when the control part
1195 has more than one insn. */
1196 normalize_sched_times (ps
);
1197 rotate_partial_schedule (ps
, PS_MIN_CYCLE (ps
));
1198 set_columns_for_ps (ps
);
1202 /* case the BCT count is not known , Do loop-versioning */
1203 if (count_reg
&& ! count_init
)
1205 rtx comp_rtx
= gen_rtx_fmt_ee (GT
, VOIDmode
, count_reg
,
1206 GEN_INT(stage_count
));
1207 unsigned prob
= (PROB_SMS_ENOUGH_ITERATIONS
1208 * REG_BR_PROB_BASE
) / 100;
1210 loop_version (loop
, comp_rtx
, &condition_bb
,
1211 prob
, prob
, REG_BR_PROB_BASE
- prob
,
1215 /* Set new iteration count of loop kernel. */
1216 if (count_reg
&& count_init
)
1217 SET_SRC (single_set (count_init
)) = GEN_INT (loop_count
1220 /* Now apply the scheduled kernel to the RTL of the loop. */
1221 permute_partial_schedule (ps
, g
->closing_branch
->first_note
);
1223 /* Mark this loop as software pipelined so the later
1224 scheduling passes doesn't touch it. */
1225 if (! flag_resched_modulo_sched
)
1226 g
->bb
->flags
|= BB_DISABLE_SCHEDULE
;
1227 /* The life-info is not valid any more. */
1228 df_set_bb_dirty (g
->bb
);
1230 reg_move_replaces
= generate_reg_moves (ps
, true);
1232 print_node_sched_params (dump_file
, g
->num_nodes
, g
);
1233 /* Generate prolog and epilog. */
1234 generate_prolog_epilog (ps
, loop
, count_reg
, count_init
);
1236 free_undo_replace_buff (reg_move_replaces
);
1239 free_partial_schedule (ps
);
1240 free (node_sched_params
);
1245 regstat_free_calls_crossed ();
1248 /* Release scheduler data, needed until now because of DFA. */
1250 loop_optimizer_finalize ();
1253 /* The SMS scheduling algorithm itself
1254 -----------------------------------
1255 Input: 'O' an ordered list of insns of a loop.
1256 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1258 'Q' is the empty Set
1259 'PS' is the partial schedule; it holds the currently scheduled nodes with
1261 'PSP' previously scheduled predecessors.
1262 'PSS' previously scheduled successors.
1263 't(u)' the cycle where u is scheduled.
1264 'l(u)' is the latency of u.
1265 'd(v,u)' is the dependence distance from v to u.
1266 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1267 the node ordering phase.
1268 'check_hardware_resources_conflicts(u, PS, c)'
1269 run a trace around cycle/slot through DFA model
1270 to check resource conflicts involving instruction u
1271 at cycle c given the partial schedule PS.
1272 'add_to_partial_schedule_at_time(u, PS, c)'
1273 Add the node/instruction u to the partial schedule
1275 'calculate_register_pressure(PS)'
1276 Given a schedule of instructions, calculate the register
1277 pressure it implies. One implementation could be the
1278 maximum number of overlapping live ranges.
1279 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1280 registers available in the hardware.
1284 3. for each node u in O in pre-computed order
1285 4. if (PSP(u) != Q && PSS(u) == Q) then
1286 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1287 6. start = Early_start; end = Early_start + II - 1; step = 1
1288 11. else if (PSP(u) == Q && PSS(u) != Q) then
1289 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1290 13. start = Late_start; end = Late_start - II + 1; step = -1
1291 14. else if (PSP(u) != Q && PSS(u) != Q) then
1292 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1293 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1294 17. start = Early_start;
1295 18. end = min(Early_start + II - 1 , Late_start);
1297 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1298 21. start = ASAP(u); end = start + II - 1; step = 1
1302 24. for (c = start ; c != end ; c += step)
1303 25. if check_hardware_resources_conflicts(u, PS, c) then
1304 26. add_to_partial_schedule_at_time(u, PS, c)
1309 31. if (success == false) then
1311 33. if (II > maxII) then
1312 34. finish - failed to schedule
1317 39. if (calculate_register_pressure(PS) > maxRP) then
1320 42. compute epilogue & prologue
1321 43. finish - succeeded to schedule
1324 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1325 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1326 set to 0 to save compile time. */
1327 #define DFA_HISTORY SMS_DFA_HISTORY
1329 /* A threshold for the number of repeated unsuccessful attempts to insert
1330 an empty row, before we flush the partial schedule and start over. */
1331 #define MAX_SPLIT_NUM 10
1332 /* Given the partial schedule PS, this function calculates and returns the
1333 cycles in which we can schedule the node with the given index I.
1334 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1335 noticed that there are several cases in which we fail to SMS the loop
1336 because the sched window of a node is empty due to tight data-deps. In
1337 such cases we want to unschedule some of the predecessors/successors
1338 until we get non-empty scheduling window. It returns -1 if the
1339 scheduling window is empty and zero otherwise. */
1342 get_sched_window (partial_schedule_ptr ps
, int *nodes_order
, int i
,
1343 sbitmap sched_nodes
, int ii
, int *start_p
, int *step_p
, int *end_p
)
1345 int start
, step
, end
;
1347 int u
= nodes_order
[i
];
1348 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1349 sbitmap psp
= sbitmap_alloc (ps
->g
->num_nodes
);
1350 sbitmap pss
= sbitmap_alloc (ps
->g
->num_nodes
);
1351 sbitmap u_node_preds
= NODE_PREDECESSORS (u_node
);
1352 sbitmap u_node_succs
= NODE_SUCCESSORS (u_node
);
1356 /* 1. compute sched window for u (start, end, step). */
1359 psp_not_empty
= sbitmap_a_and_b_cg (psp
, u_node_preds
, sched_nodes
);
1360 pss_not_empty
= sbitmap_a_and_b_cg (pss
, u_node_succs
, sched_nodes
);
1362 if (psp_not_empty
&& !pss_not_empty
)
1364 int early_start
= INT_MIN
;
1367 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1369 ddg_node_ptr v_node
= e
->src
;
1373 fprintf (dump_file
, "\nProcessing edge: ");
1374 print_ddg_edge (dump_file
, e
);
1376 "\nScheduling %d (%d) in psp_not_empty,"
1377 " checking p %d (%d): ", u_node
->cuid
,
1378 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1382 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1384 int p_st
= SCHED_TIME (v_node
);
1387 MAX (early_start
, p_st
+ e
->latency
- (e
->distance
* ii
));
1391 "pred st = %d; early_start = %d; latency: %d",
1392 p_st
, early_start
, e
->latency
);
1394 if (e
->data_type
== MEM_DEP
)
1395 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1398 fprintf (dump_file
, "the node is not scheduled\n");
1400 start
= early_start
;
1401 end
= MIN (end
, early_start
+ ii
);
1402 /* Schedule the node close to it's predecessors. */
1407 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1408 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1411 else if (!psp_not_empty
&& pss_not_empty
)
1413 int late_start
= INT_MAX
;
1416 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1418 ddg_node_ptr v_node
= e
->dest
;
1422 fprintf (dump_file
, "\nProcessing edge:");
1423 print_ddg_edge (dump_file
, e
);
1425 "\nScheduling %d (%d) in pss_not_empty,"
1426 " checking s %d (%d): ", u_node
->cuid
,
1427 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1431 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1433 int s_st
= SCHED_TIME (v_node
);
1435 late_start
= MIN (late_start
,
1436 s_st
- e
->latency
+ (e
->distance
* ii
));
1440 "succ st = %d; late_start = %d; latency = %d",
1441 s_st
, late_start
, e
->latency
);
1443 if (e
->data_type
== MEM_DEP
)
1444 end
= MAX (end
, SCHED_TIME (v_node
) - ii
+ 1);
1446 fprintf (dump_file
, "end = %d\n", end
);
1450 fprintf (dump_file
, "the node is not scheduled\n");
1454 end
= MAX (end
, late_start
- ii
);
1455 /* Schedule the node close to it's successors. */
1460 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1461 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1465 else if (psp_not_empty
&& pss_not_empty
)
1467 int early_start
= INT_MIN
;
1468 int late_start
= INT_MAX
;
1469 int count_preds
= 0;
1470 int count_succs
= 0;
1474 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1476 ddg_node_ptr v_node
= e
->src
;
1480 fprintf (dump_file
, "\nProcessing edge:");
1481 print_ddg_edge (dump_file
, e
);
1483 "\nScheduling %d (%d) in psp_pss_not_empty,"
1484 " checking p %d (%d): ", u_node
->cuid
, INSN_UID
1485 (u_node
->insn
), v_node
->cuid
, INSN_UID
1489 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1491 int p_st
= SCHED_TIME (v_node
);
1493 early_start
= MAX (early_start
,
1495 - (e
->distance
* ii
));
1499 "pred st = %d; early_start = %d; latency = %d",
1500 p_st
, early_start
, e
->latency
);
1502 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1505 if (e
->data_type
== MEM_DEP
)
1506 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1509 fprintf (dump_file
, "the node is not scheduled\n");
1512 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1514 ddg_node_ptr v_node
= e
->dest
;
1518 fprintf (dump_file
, "\nProcessing edge:");
1519 print_ddg_edge (dump_file
, e
);
1521 "\nScheduling %d (%d) in psp_pss_not_empty,"
1522 " checking s %d (%d): ", u_node
->cuid
, INSN_UID
1523 (u_node
->insn
), v_node
->cuid
, INSN_UID
1527 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1529 int s_st
= SCHED_TIME (v_node
);
1531 late_start
= MIN (late_start
,
1533 + (e
->distance
* ii
));
1537 "succ st = %d; late_start = %d; latency = %d",
1538 s_st
, late_start
, e
->latency
);
1540 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1543 if (e
->data_type
== MEM_DEP
)
1544 start
= MAX (start
, SCHED_TIME (v_node
) - ii
+ 1);
1547 fprintf (dump_file
, "the node is not scheduled\n");
1550 start
= MAX (start
, early_start
);
1551 end
= MIN (end
, MIN (early_start
+ ii
, late_start
+ 1));
1553 /* If there are more successors than predecessors schedule the
1554 node close to it's successors. */
1555 if (count_succs
>= count_preds
)
1557 int old_start
= start
;
1560 end
= old_start
- 1;
1564 else /* psp is empty && pss is empty. */
1566 start
= SCHED_ASAP (u_node
);
1577 if ((start
>= end
&& step
== 1) || (start
<= end
&& step
== -1))
1580 fprintf (dump_file
, "\nEmpty window: start=%d, end=%d, step=%d\n",
1588 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1589 node currently been scheduled. At the end of the calculation
1590 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
1591 which are in SCHED_NODES (already scheduled nodes) and scheduled at
1592 the same row as the first/last row of U_NODE's scheduling window.
1593 The first and last rows are calculated using the following parameters:
1594 START/END rows - The cycles that begins/ends the traversal on the window;
1595 searching for an empty cycle to schedule U_NODE.
1596 STEP - The direction in which we traverse the window.
1597 II - The initiation interval.
1598 TODO: We can add an insn to the must_precede/must_follow bitmap only
1599 if it has tight dependence to U and they are both scheduled in the
1600 same row. The current check is more conservative and content with
1601 the fact that both U and the insn are scheduled in the same row. */
1604 calculate_must_precede_follow (ddg_node_ptr u_node
, int start
, int end
,
1605 int step
, int ii
, sbitmap sched_nodes
,
1606 sbitmap must_precede
, sbitmap must_follow
)
1609 int first_cycle_in_window
, last_cycle_in_window
;
1610 int first_row_in_window
, last_row_in_window
;
1612 gcc_assert (must_precede
&& must_follow
);
1614 /* Consider the following scheduling window:
1615 {first_cycle_in_window, first_cycle_in_window+1, ...,
1616 last_cycle_in_window}. If step is 1 then the following will be
1617 the order we traverse the window: {start=first_cycle_in_window,
1618 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1619 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1620 end=first_cycle_in_window-1} if step is -1. */
1621 first_cycle_in_window
= (step
== 1) ? start
: end
- step
;
1622 last_cycle_in_window
= (step
== 1) ? end
- step
: start
;
1624 first_row_in_window
= SMODULO (first_cycle_in_window
, ii
);
1625 last_row_in_window
= SMODULO (last_cycle_in_window
, ii
);
1627 sbitmap_zero (must_precede
);
1628 sbitmap_zero (must_follow
);
1631 fprintf (dump_file
, "\nmust_precede: ");
1633 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1634 if (TEST_BIT (sched_nodes
, e
->src
->cuid
)
1635 && (SMODULO (SCHED_TIME (e
->src
), ii
) == first_row_in_window
))
1638 fprintf (dump_file
, "%d ", e
->src
->cuid
);
1640 SET_BIT (must_precede
, e
->src
->cuid
);
1644 fprintf (dump_file
, "\nmust_follow: ");
1646 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1647 if (TEST_BIT (sched_nodes
, e
->dest
->cuid
)
1648 && (SMODULO (SCHED_TIME (e
->dest
), ii
) == last_row_in_window
))
1651 fprintf (dump_file
, "%d ", e
->dest
->cuid
);
1653 SET_BIT (must_follow
, e
->dest
->cuid
);
1657 fprintf (dump_file
, "\n");
1660 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1661 parameters to decide if that's possible:
1662 PS - The partial schedule.
1663 U - The serial number of U_NODE.
1664 NUM_SPLITS - The number of row spilts made so far.
1665 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1666 the first row of the scheduling window)
1667 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1668 last row of the scheduling window) */
1671 try_scheduling_node_in_cycle (partial_schedule_ptr ps
, ddg_node_ptr u_node
,
1672 int u
, int row
, sbitmap sched_nodes
,
1673 int *num_splits
, sbitmap must_precede
,
1674 sbitmap must_follow
)
1679 verify_partial_schedule (ps
, sched_nodes
);
1680 psi
= ps_add_node_check_conflicts (ps
, u_node
, row
,
1681 must_precede
, must_follow
);
1684 SCHED_TIME (u_node
) = row
;
1685 SET_BIT (sched_nodes
, u
);
1689 fprintf (dump_file
, "Scheduled w/o split in %d\n", row
);
1696 /* This function implements the scheduling algorithm for SMS according to the
1698 static partial_schedule_ptr
1699 sms_schedule_by_order (ddg_ptr g
, int mii
, int maxii
, int *nodes_order
)
1702 int i
, c
, success
, num_splits
= 0;
1703 int flush_and_start_over
= true;
1704 int num_nodes
= g
->num_nodes
;
1705 int start
, end
, step
; /* Place together into one struct? */
1706 sbitmap sched_nodes
= sbitmap_alloc (num_nodes
);
1707 sbitmap must_precede
= sbitmap_alloc (num_nodes
);
1708 sbitmap must_follow
= sbitmap_alloc (num_nodes
);
1709 sbitmap tobe_scheduled
= sbitmap_alloc (num_nodes
);
1711 partial_schedule_ptr ps
= create_partial_schedule (ii
, g
, DFA_HISTORY
);
1713 sbitmap_ones (tobe_scheduled
);
1714 sbitmap_zero (sched_nodes
);
1716 while (flush_and_start_over
&& (ii
< maxii
))
1720 fprintf (dump_file
, "Starting with ii=%d\n", ii
);
1721 flush_and_start_over
= false;
1722 sbitmap_zero (sched_nodes
);
1724 for (i
= 0; i
< num_nodes
; i
++)
1726 int u
= nodes_order
[i
];
1727 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1728 rtx insn
= u_node
->insn
;
1732 RESET_BIT (tobe_scheduled
, u
);
1736 if (JUMP_P (insn
)) /* Closing branch handled later. */
1738 RESET_BIT (tobe_scheduled
, u
);
1742 if (TEST_BIT (sched_nodes
, u
))
1745 /* Try to get non-empty scheduling window. */
1747 if (get_sched_window (ps
, nodes_order
, i
, sched_nodes
, ii
, &start
,
1751 fprintf (dump_file
, "\nTrying to schedule node %d \
1752 INSN = %d in (%d .. %d) step %d\n", u
, (INSN_UID
1753 (g
->nodes
[u
].insn
)), start
, end
, step
);
1755 gcc_assert ((step
> 0 && start
< end
)
1756 || (step
< 0 && start
> end
));
1758 calculate_must_precede_follow (u_node
, start
, end
, step
, ii
,
1759 sched_nodes
, must_precede
,
1762 for (c
= start
; c
!= end
; c
+= step
)
1764 sbitmap tmp_precede
= NULL
;
1765 sbitmap tmp_follow
= NULL
;
1770 tmp_precede
= must_precede
;
1771 else /* step == -1. */
1772 tmp_follow
= must_follow
;
1774 if (c
== end
- step
)
1777 tmp_follow
= must_follow
;
1778 else /* step == -1. */
1779 tmp_precede
= must_precede
;
1783 try_scheduling_node_in_cycle (ps
, u_node
, u
, c
,
1785 &num_splits
, tmp_precede
,
1791 verify_partial_schedule (ps
, sched_nodes
);
1800 if (num_splits
>= MAX_SPLIT_NUM
)
1803 flush_and_start_over
= true;
1804 verify_partial_schedule (ps
, sched_nodes
);
1805 reset_partial_schedule (ps
, ii
);
1806 verify_partial_schedule (ps
, sched_nodes
);
1812 split_row
= compute_split_row (sched_nodes
, start
, end
,
1815 split_row
= compute_split_row (sched_nodes
, end
, start
,
1818 ps_insert_empty_row (ps
, split_row
, sched_nodes
);
1819 i
--; /* Go back and retry node i. */
1822 fprintf (dump_file
, "num_splits=%d\n", num_splits
);
1825 /* ??? If (success), check register pressure estimates. */
1826 } /* Continue with next node. */
1827 } /* While flush_and_start_over. */
1830 free_partial_schedule (ps
);
1834 gcc_assert (sbitmap_equal (tobe_scheduled
, sched_nodes
));
1836 sbitmap_free (sched_nodes
);
1837 sbitmap_free (must_precede
);
1838 sbitmap_free (must_follow
);
1839 sbitmap_free (tobe_scheduled
);
1844 /* This function inserts a new empty row into PS at the position
1845 according to SPLITROW, keeping all already scheduled instructions
1846 intact and updating their SCHED_TIME and cycle accordingly. */
1848 ps_insert_empty_row (partial_schedule_ptr ps
, int split_row
,
1849 sbitmap sched_nodes
)
1851 ps_insn_ptr crr_insn
;
1852 ps_insn_ptr
*rows_new
;
1854 int new_ii
= ii
+ 1;
1857 verify_partial_schedule (ps
, sched_nodes
);
1859 /* We normalize sched_time and rotate ps to have only non-negative sched
1860 times, for simplicity of updating cycles after inserting new row. */
1861 split_row
-= ps
->min_cycle
;
1862 split_row
= SMODULO (split_row
, ii
);
1864 fprintf (dump_file
, "split_row=%d\n", split_row
);
1866 normalize_sched_times (ps
);
1867 rotate_partial_schedule (ps
, ps
->min_cycle
);
1869 rows_new
= (ps_insn_ptr
*) xcalloc (new_ii
, sizeof (ps_insn_ptr
));
1870 for (row
= 0; row
< split_row
; row
++)
1872 rows_new
[row
] = ps
->rows
[row
];
1873 ps
->rows
[row
] = NULL
;
1874 for (crr_insn
= rows_new
[row
];
1875 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1877 ddg_node_ptr u
= crr_insn
->node
;
1878 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
);
1880 SCHED_TIME (u
) = new_time
;
1881 crr_insn
->cycle
= new_time
;
1882 SCHED_ROW (u
) = new_time
% new_ii
;
1883 SCHED_STAGE (u
) = new_time
/ new_ii
;
1888 rows_new
[split_row
] = NULL
;
1890 for (row
= split_row
; row
< ii
; row
++)
1892 rows_new
[row
+ 1] = ps
->rows
[row
];
1893 ps
->rows
[row
] = NULL
;
1894 for (crr_insn
= rows_new
[row
+ 1];
1895 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1897 ddg_node_ptr u
= crr_insn
->node
;
1898 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
) + 1;
1900 SCHED_TIME (u
) = new_time
;
1901 crr_insn
->cycle
= new_time
;
1902 SCHED_ROW (u
) = new_time
% new_ii
;
1903 SCHED_STAGE (u
) = new_time
/ new_ii
;
1908 ps
->min_cycle
= ps
->min_cycle
+ ps
->min_cycle
/ ii
1909 + (SMODULO (ps
->min_cycle
, ii
) >= split_row
? 1 : 0);
1910 ps
->max_cycle
= ps
->max_cycle
+ ps
->max_cycle
/ ii
1911 + (SMODULO (ps
->max_cycle
, ii
) >= split_row
? 1 : 0);
1913 ps
->rows
= rows_new
;
1915 gcc_assert (ps
->min_cycle
>= 0);
1917 verify_partial_schedule (ps
, sched_nodes
);
1920 fprintf (dump_file
, "min_cycle=%d, max_cycle=%d\n", ps
->min_cycle
,
1924 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1925 UP which are the boundaries of it's scheduling window; compute using
1926 SCHED_NODES and II a row in the partial schedule that can be split
1927 which will separate a critical predecessor from a critical successor
1928 thereby expanding the window, and return it. */
1930 compute_split_row (sbitmap sched_nodes
, int low
, int up
, int ii
,
1931 ddg_node_ptr u_node
)
1934 int lower
= INT_MIN
, upper
= INT_MAX
;
1935 ddg_node_ptr crit_pred
= NULL
;
1936 ddg_node_ptr crit_succ
= NULL
;
1939 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1941 ddg_node_ptr v_node
= e
->src
;
1943 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
1944 && (low
== SCHED_TIME (v_node
) + e
->latency
- (e
->distance
* ii
)))
1945 if (SCHED_TIME (v_node
) > lower
)
1948 lower
= SCHED_TIME (v_node
);
1952 if (crit_pred
!= NULL
)
1954 crit_cycle
= SCHED_TIME (crit_pred
) + 1;
1955 return SMODULO (crit_cycle
, ii
);
1958 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1960 ddg_node_ptr v_node
= e
->dest
;
1961 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
1962 && (up
== SCHED_TIME (v_node
) - e
->latency
+ (e
->distance
* ii
)))
1963 if (SCHED_TIME (v_node
) < upper
)
1966 upper
= SCHED_TIME (v_node
);
1970 if (crit_succ
!= NULL
)
1972 crit_cycle
= SCHED_TIME (crit_succ
);
1973 return SMODULO (crit_cycle
, ii
);
1977 fprintf (dump_file
, "Both crit_pred and crit_succ are NULL\n");
1979 return SMODULO ((low
+ up
+ 1) / 2, ii
);
1983 verify_partial_schedule (partial_schedule_ptr ps
, sbitmap sched_nodes
)
1986 ps_insn_ptr crr_insn
;
1988 for (row
= 0; row
< ps
->ii
; row
++)
1989 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1991 ddg_node_ptr u
= crr_insn
->node
;
1993 gcc_assert (TEST_BIT (sched_nodes
, u
->cuid
));
1994 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1995 popcount (sched_nodes) == number of insns in ps. */
1996 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
1997 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
2002 /* This page implements the algorithm for ordering the nodes of a DDG
2003 for modulo scheduling, activated through the
2004 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2006 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2007 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2008 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2009 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2010 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2011 #define DEPTH(x) (ASAP ((x)))
2013 typedef struct node_order_params
* nopa
;
2015 static void order_nodes_of_sccs (ddg_all_sccs_ptr
, int * result
);
2016 static int order_nodes_in_scc (ddg_ptr
, sbitmap
, sbitmap
, int*, int);
2017 static nopa
calculate_order_params (ddg_ptr
, int, int *);
2018 static int find_max_asap (ddg_ptr
, sbitmap
);
2019 static int find_max_hv_min_mob (ddg_ptr
, sbitmap
);
2020 static int find_max_dv_min_mob (ddg_ptr
, sbitmap
);
2022 enum sms_direction
{BOTTOMUP
, TOPDOWN
};
2024 struct node_order_params
2031 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2033 check_nodes_order (int *node_order
, int num_nodes
)
2036 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2041 fprintf (dump_file
, "SMS final nodes order: \n");
2043 for (i
= 0; i
< num_nodes
; i
++)
2045 int u
= node_order
[i
];
2048 fprintf (dump_file
, "%d ", u
);
2049 gcc_assert (u
< num_nodes
&& u
>= 0 && !TEST_BIT (tmp
, u
));
2055 fprintf (dump_file
, "\n");
2060 /* Order the nodes of G for scheduling and pass the result in
2061 NODE_ORDER. Also set aux.count of each node to ASAP.
2062 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2064 sms_order_nodes (ddg_ptr g
, int mii
, int * node_order
, int *pmax_asap
)
2068 ddg_all_sccs_ptr sccs
= create_ddg_all_sccs (g
);
2070 nopa nops
= calculate_order_params (g
, mii
, pmax_asap
);
2073 print_sccs (dump_file
, sccs
, g
);
2075 order_nodes_of_sccs (sccs
, node_order
);
2077 if (sccs
->num_sccs
> 0)
2078 /* First SCC has the largest recurrence_length. */
2079 rec_mii
= sccs
->sccs
[0]->recurrence_length
;
2081 /* Save ASAP before destroying node_order_params. */
2082 for (i
= 0; i
< g
->num_nodes
; i
++)
2084 ddg_node_ptr v
= &g
->nodes
[i
];
2085 v
->aux
.count
= ASAP (v
);
2089 free_ddg_all_sccs (sccs
);
2090 check_nodes_order (node_order
, g
->num_nodes
);
2096 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs
, int * node_order
)
2099 ddg_ptr g
= all_sccs
->ddg
;
2100 int num_nodes
= g
->num_nodes
;
2101 sbitmap prev_sccs
= sbitmap_alloc (num_nodes
);
2102 sbitmap on_path
= sbitmap_alloc (num_nodes
);
2103 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2104 sbitmap ones
= sbitmap_alloc (num_nodes
);
2106 sbitmap_zero (prev_sccs
);
2107 sbitmap_ones (ones
);
2109 /* Perfrom the node ordering starting from the SCC with the highest recMII.
2110 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2111 for (i
= 0; i
< all_sccs
->num_sccs
; i
++)
2113 ddg_scc_ptr scc
= all_sccs
->sccs
[i
];
2115 /* Add nodes on paths from previous SCCs to the current SCC. */
2116 find_nodes_on_paths (on_path
, g
, prev_sccs
, scc
->nodes
);
2117 sbitmap_a_or_b (tmp
, scc
->nodes
, on_path
);
2119 /* Add nodes on paths from the current SCC to previous SCCs. */
2120 find_nodes_on_paths (on_path
, g
, scc
->nodes
, prev_sccs
);
2121 sbitmap_a_or_b (tmp
, tmp
, on_path
);
2123 /* Remove nodes of previous SCCs from current extended SCC. */
2124 sbitmap_difference (tmp
, tmp
, prev_sccs
);
2126 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2127 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2130 /* Handle the remaining nodes that do not belong to any scc. Each call
2131 to order_nodes_in_scc handles a single connected component. */
2132 while (pos
< g
->num_nodes
)
2134 sbitmap_difference (tmp
, ones
, prev_sccs
);
2135 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2137 sbitmap_free (prev_sccs
);
2138 sbitmap_free (on_path
);
2140 sbitmap_free (ones
);
2143 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2144 static struct node_order_params
*
2145 calculate_order_params (ddg_ptr g
, int mii ATTRIBUTE_UNUSED
, int *pmax_asap
)
2149 int num_nodes
= g
->num_nodes
;
2151 /* Allocate a place to hold ordering params for each node in the DDG. */
2152 nopa node_order_params_arr
;
2154 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2155 node_order_params_arr
= (nopa
) xcalloc (num_nodes
,
2156 sizeof (struct node_order_params
));
2158 /* Set the aux pointer of each node to point to its order_params structure. */
2159 for (u
= 0; u
< num_nodes
; u
++)
2160 g
->nodes
[u
].aux
.info
= &node_order_params_arr
[u
];
2162 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2163 calculate ASAP, ALAP, mobility, distance, and height for each node
2164 in the dependence (direct acyclic) graph. */
2166 /* We assume that the nodes in the array are in topological order. */
2169 for (u
= 0; u
< num_nodes
; u
++)
2171 ddg_node_ptr u_node
= &g
->nodes
[u
];
2174 for (e
= u_node
->in
; e
; e
= e
->next_in
)
2175 if (e
->distance
== 0)
2176 ASAP (u_node
) = MAX (ASAP (u_node
),
2177 ASAP (e
->src
) + e
->latency
);
2178 max_asap
= MAX (max_asap
, ASAP (u_node
));
2181 for (u
= num_nodes
- 1; u
> -1; u
--)
2183 ddg_node_ptr u_node
= &g
->nodes
[u
];
2185 ALAP (u_node
) = max_asap
;
2186 HEIGHT (u_node
) = 0;
2187 for (e
= u_node
->out
; e
; e
= e
->next_out
)
2188 if (e
->distance
== 0)
2190 ALAP (u_node
) = MIN (ALAP (u_node
),
2191 ALAP (e
->dest
) - e
->latency
);
2192 HEIGHT (u_node
) = MAX (HEIGHT (u_node
),
2193 HEIGHT (e
->dest
) + e
->latency
);
2198 fprintf (dump_file
, "\nOrder params\n");
2199 for (u
= 0; u
< num_nodes
; u
++)
2201 ddg_node_ptr u_node
= &g
->nodes
[u
];
2203 fprintf (dump_file
, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u
,
2204 ASAP (u_node
), ALAP (u_node
), HEIGHT (u_node
));
2208 *pmax_asap
= max_asap
;
2209 return node_order_params_arr
;
2213 find_max_asap (ddg_ptr g
, sbitmap nodes
)
2218 sbitmap_iterator sbi
;
2220 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2222 ddg_node_ptr u_node
= &g
->nodes
[u
];
2224 if (max_asap
< ASAP (u_node
))
2226 max_asap
= ASAP (u_node
);
2234 find_max_hv_min_mob (ddg_ptr g
, sbitmap nodes
)
2238 int min_mob
= INT_MAX
;
2240 sbitmap_iterator sbi
;
2242 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2244 ddg_node_ptr u_node
= &g
->nodes
[u
];
2246 if (max_hv
< HEIGHT (u_node
))
2248 max_hv
= HEIGHT (u_node
);
2249 min_mob
= MOB (u_node
);
2252 else if ((max_hv
== HEIGHT (u_node
))
2253 && (min_mob
> MOB (u_node
)))
2255 min_mob
= MOB (u_node
);
2263 find_max_dv_min_mob (ddg_ptr g
, sbitmap nodes
)
2267 int min_mob
= INT_MAX
;
2269 sbitmap_iterator sbi
;
2271 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2273 ddg_node_ptr u_node
= &g
->nodes
[u
];
2275 if (max_dv
< DEPTH (u_node
))
2277 max_dv
= DEPTH (u_node
);
2278 min_mob
= MOB (u_node
);
2281 else if ((max_dv
== DEPTH (u_node
))
2282 && (min_mob
> MOB (u_node
)))
2284 min_mob
= MOB (u_node
);
2291 /* Places the nodes of SCC into the NODE_ORDER array starting
2292 at position POS, according to the SMS ordering algorithm.
2293 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2294 the NODE_ORDER array, starting from position zero. */
2296 order_nodes_in_scc (ddg_ptr g
, sbitmap nodes_ordered
, sbitmap scc
,
2297 int * node_order
, int pos
)
2299 enum sms_direction dir
;
2300 int num_nodes
= g
->num_nodes
;
2301 sbitmap workset
= sbitmap_alloc (num_nodes
);
2302 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2303 sbitmap zero_bitmap
= sbitmap_alloc (num_nodes
);
2304 sbitmap predecessors
= sbitmap_alloc (num_nodes
);
2305 sbitmap successors
= sbitmap_alloc (num_nodes
);
2307 sbitmap_zero (predecessors
);
2308 find_predecessors (predecessors
, g
, nodes_ordered
);
2310 sbitmap_zero (successors
);
2311 find_successors (successors
, g
, nodes_ordered
);
2314 if (sbitmap_a_and_b_cg (tmp
, predecessors
, scc
))
2316 sbitmap_copy (workset
, tmp
);
2319 else if (sbitmap_a_and_b_cg (tmp
, successors
, scc
))
2321 sbitmap_copy (workset
, tmp
);
2328 sbitmap_zero (workset
);
2329 if ((u
= find_max_asap (g
, scc
)) >= 0)
2330 SET_BIT (workset
, u
);
2334 sbitmap_zero (zero_bitmap
);
2335 while (!sbitmap_equal (workset
, zero_bitmap
))
2338 ddg_node_ptr v_node
;
2339 sbitmap v_node_preds
;
2340 sbitmap v_node_succs
;
2344 while (!sbitmap_equal (workset
, zero_bitmap
))
2346 v
= find_max_hv_min_mob (g
, workset
);
2347 v_node
= &g
->nodes
[v
];
2348 node_order
[pos
++] = v
;
2349 v_node_succs
= NODE_SUCCESSORS (v_node
);
2350 sbitmap_a_and_b (tmp
, v_node_succs
, scc
);
2352 /* Don't consider the already ordered successors again. */
2353 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2354 sbitmap_a_or_b (workset
, workset
, tmp
);
2355 RESET_BIT (workset
, v
);
2356 SET_BIT (nodes_ordered
, v
);
2359 sbitmap_zero (predecessors
);
2360 find_predecessors (predecessors
, g
, nodes_ordered
);
2361 sbitmap_a_and_b (workset
, predecessors
, scc
);
2365 while (!sbitmap_equal (workset
, zero_bitmap
))
2367 v
= find_max_dv_min_mob (g
, workset
);
2368 v_node
= &g
->nodes
[v
];
2369 node_order
[pos
++] = v
;
2370 v_node_preds
= NODE_PREDECESSORS (v_node
);
2371 sbitmap_a_and_b (tmp
, v_node_preds
, scc
);
2373 /* Don't consider the already ordered predecessors again. */
2374 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2375 sbitmap_a_or_b (workset
, workset
, tmp
);
2376 RESET_BIT (workset
, v
);
2377 SET_BIT (nodes_ordered
, v
);
2380 sbitmap_zero (successors
);
2381 find_successors (successors
, g
, nodes_ordered
);
2382 sbitmap_a_and_b (workset
, successors
, scc
);
2386 sbitmap_free (workset
);
2387 sbitmap_free (zero_bitmap
);
2388 sbitmap_free (predecessors
);
2389 sbitmap_free (successors
);
2394 /* This page contains functions for manipulating partial-schedules during
2395 modulo scheduling. */
2397 /* Create a partial schedule and allocate a memory to hold II rows. */
2399 static partial_schedule_ptr
2400 create_partial_schedule (int ii
, ddg_ptr g
, int history
)
2402 partial_schedule_ptr ps
= XNEW (struct partial_schedule
);
2403 ps
->rows
= (ps_insn_ptr
*) xcalloc (ii
, sizeof (ps_insn_ptr
));
2405 ps
->history
= history
;
2406 ps
->min_cycle
= INT_MAX
;
2407 ps
->max_cycle
= INT_MIN
;
2413 /* Free the PS_INSNs in rows array of the given partial schedule.
2414 ??? Consider caching the PS_INSN's. */
2416 free_ps_insns (partial_schedule_ptr ps
)
2420 for (i
= 0; i
< ps
->ii
; i
++)
2424 ps_insn_ptr ps_insn
= ps
->rows
[i
]->next_in_row
;
2427 ps
->rows
[i
] = ps_insn
;
2433 /* Free all the memory allocated to the partial schedule. */
2436 free_partial_schedule (partial_schedule_ptr ps
)
2445 /* Clear the rows array with its PS_INSNs, and create a new one with
2449 reset_partial_schedule (partial_schedule_ptr ps
, int new_ii
)
2454 if (new_ii
== ps
->ii
)
2456 ps
->rows
= (ps_insn_ptr
*) xrealloc (ps
->rows
, new_ii
2457 * sizeof (ps_insn_ptr
));
2458 memset (ps
->rows
, 0, new_ii
* sizeof (ps_insn_ptr
));
2460 ps
->min_cycle
= INT_MAX
;
2461 ps
->max_cycle
= INT_MIN
;
2464 /* Prints the partial schedule as an ii rows array, for each rows
2465 print the ids of the insns in it. */
2467 print_partial_schedule (partial_schedule_ptr ps
, FILE *dump
)
2471 for (i
= 0; i
< ps
->ii
; i
++)
2473 ps_insn_ptr ps_i
= ps
->rows
[i
];
2475 fprintf (dump
, "\n[CYCLE %d ]: ", i
);
2478 fprintf (dump
, "%d, ",
2479 INSN_UID (ps_i
->node
->insn
));
2480 ps_i
= ps_i
->next_in_row
;
2485 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2487 create_ps_insn (ddg_node_ptr node
, int rest_count
, int cycle
)
2489 ps_insn_ptr ps_i
= XNEW (struct ps_insn
);
2492 ps_i
->next_in_row
= NULL
;
2493 ps_i
->prev_in_row
= NULL
;
2494 ps_i
->row_rest_count
= rest_count
;
2495 ps_i
->cycle
= cycle
;
2501 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2502 node is not found in the partial schedule, else returns true. */
2504 remove_node_from_ps (partial_schedule_ptr ps
, ps_insn_ptr ps_i
)
2511 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2512 if (! ps_i
->prev_in_row
)
2514 if (ps_i
!= ps
->rows
[row
])
2517 ps
->rows
[row
] = ps_i
->next_in_row
;
2519 ps
->rows
[row
]->prev_in_row
= NULL
;
2523 ps_i
->prev_in_row
->next_in_row
= ps_i
->next_in_row
;
2524 if (ps_i
->next_in_row
)
2525 ps_i
->next_in_row
->prev_in_row
= ps_i
->prev_in_row
;
2531 /* Unlike what literature describes for modulo scheduling (which focuses
2532 on VLIW machines) the order of the instructions inside a cycle is
2533 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2534 where the current instruction should go relative to the already
2535 scheduled instructions in the given cycle. Go over these
2536 instructions and find the first possible column to put it in. */
2538 ps_insn_find_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2539 sbitmap must_precede
, sbitmap must_follow
)
2541 ps_insn_ptr next_ps_i
;
2542 ps_insn_ptr first_must_follow
= NULL
;
2543 ps_insn_ptr last_must_precede
= NULL
;
2549 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2551 /* Find the first must follow and the last must precede
2552 and insert the node immediately after the must precede
2553 but make sure that it there is no must follow after it. */
2554 for (next_ps_i
= ps
->rows
[row
];
2556 next_ps_i
= next_ps_i
->next_in_row
)
2558 if (must_follow
&& TEST_BIT (must_follow
, next_ps_i
->node
->cuid
)
2559 && ! first_must_follow
)
2560 first_must_follow
= next_ps_i
;
2561 if (must_precede
&& TEST_BIT (must_precede
, next_ps_i
->node
->cuid
))
2563 /* If we have already met a node that must follow, then
2564 there is no possible column. */
2565 if (first_must_follow
)
2568 last_must_precede
= next_ps_i
;
2572 /* Now insert the node after INSERT_AFTER_PSI. */
2574 if (! last_must_precede
)
2576 ps_i
->next_in_row
= ps
->rows
[row
];
2577 ps_i
->prev_in_row
= NULL
;
2578 if (ps_i
->next_in_row
)
2579 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2580 ps
->rows
[row
] = ps_i
;
2584 ps_i
->next_in_row
= last_must_precede
->next_in_row
;
2585 last_must_precede
->next_in_row
= ps_i
;
2586 ps_i
->prev_in_row
= last_must_precede
;
2587 if (ps_i
->next_in_row
)
2588 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2594 /* Advances the PS_INSN one column in its current row; returns false
2595 in failure and true in success. Bit N is set in MUST_FOLLOW if
2596 the node with cuid N must be come after the node pointed to by
2597 PS_I when scheduled in the same cycle. */
2599 ps_insn_advance_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2600 sbitmap must_follow
)
2602 ps_insn_ptr prev
, next
;
2604 ddg_node_ptr next_node
;
2609 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2611 if (! ps_i
->next_in_row
)
2614 next_node
= ps_i
->next_in_row
->node
;
2616 /* Check if next_in_row is dependent on ps_i, both having same sched
2617 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2618 if (must_follow
&& TEST_BIT (must_follow
, next_node
->cuid
))
2621 /* Advance PS_I over its next_in_row in the doubly linked list. */
2622 prev
= ps_i
->prev_in_row
;
2623 next
= ps_i
->next_in_row
;
2625 if (ps_i
== ps
->rows
[row
])
2626 ps
->rows
[row
] = next
;
2628 ps_i
->next_in_row
= next
->next_in_row
;
2630 if (next
->next_in_row
)
2631 next
->next_in_row
->prev_in_row
= ps_i
;
2633 next
->next_in_row
= ps_i
;
2634 ps_i
->prev_in_row
= next
;
2636 next
->prev_in_row
= prev
;
2638 prev
->next_in_row
= next
;
2643 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2644 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2645 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2646 before/after (respectively) the node pointed to by PS_I when scheduled
2647 in the same cycle. */
2649 add_node_to_ps (partial_schedule_ptr ps
, ddg_node_ptr node
, int cycle
,
2650 sbitmap must_precede
, sbitmap must_follow
)
2654 int row
= SMODULO (cycle
, ps
->ii
);
2657 && ps
->rows
[row
]->row_rest_count
>= issue_rate
)
2661 rest_count
+= ps
->rows
[row
]->row_rest_count
;
2663 ps_i
= create_ps_insn (node
, rest_count
, cycle
);
2665 /* Finds and inserts PS_I according to MUST_FOLLOW and
2667 if (! ps_insn_find_column (ps
, ps_i
, must_precede
, must_follow
))
2676 /* Advance time one cycle. Assumes DFA is being used. */
2678 advance_one_cycle (void)
2680 if (targetm
.sched
.dfa_pre_cycle_insn
)
2681 state_transition (curr_state
,
2682 targetm
.sched
.dfa_pre_cycle_insn ());
2684 state_transition (curr_state
, NULL
);
2686 if (targetm
.sched
.dfa_post_cycle_insn
)
2687 state_transition (curr_state
,
2688 targetm
.sched
.dfa_post_cycle_insn ());
2693 /* Checks if PS has resource conflicts according to DFA, starting from
2694 FROM cycle to TO cycle; returns true if there are conflicts and false
2695 if there are no conflicts. Assumes DFA is being used. */
2697 ps_has_conflicts (partial_schedule_ptr ps
, int from
, int to
)
2701 state_reset (curr_state
);
2703 for (cycle
= from
; cycle
<= to
; cycle
++)
2705 ps_insn_ptr crr_insn
;
2706 /* Holds the remaining issue slots in the current row. */
2707 int can_issue_more
= issue_rate
;
2709 /* Walk through the DFA for the current row. */
2710 for (crr_insn
= ps
->rows
[SMODULO (cycle
, ps
->ii
)];
2712 crr_insn
= crr_insn
->next_in_row
)
2714 rtx insn
= crr_insn
->node
->insn
;
2719 /* Check if there is room for the current insn. */
2720 if (!can_issue_more
|| state_dead_lock_p (curr_state
))
2723 /* Update the DFA state and return with failure if the DFA found
2724 recource conflicts. */
2725 if (state_transition (curr_state
, insn
) >= 0)
2728 if (targetm
.sched
.variable_issue
)
2730 targetm
.sched
.variable_issue (sched_dump
, sched_verbose
,
2731 insn
, can_issue_more
);
2732 /* A naked CLOBBER or USE generates no instruction, so don't
2733 let them consume issue slots. */
2734 else if (GET_CODE (PATTERN (insn
)) != USE
2735 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
2739 /* Advance the DFA to the next cycle. */
2740 advance_one_cycle ();
2745 /* Checks if the given node causes resource conflicts when added to PS at
2746 cycle C. If not the node is added to PS and returned; otherwise zero
2747 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2748 cuid N must be come before/after (respectively) the node pointed to by
2749 PS_I when scheduled in the same cycle. */
2751 ps_add_node_check_conflicts (partial_schedule_ptr ps
, ddg_node_ptr n
,
2752 int c
, sbitmap must_precede
,
2753 sbitmap must_follow
)
2755 int has_conflicts
= 0;
2758 /* First add the node to the PS, if this succeeds check for
2759 conflicts, trying different issue slots in the same row. */
2760 if (! (ps_i
= add_node_to_ps (ps
, n
, c
, must_precede
, must_follow
)))
2761 return NULL
; /* Failed to insert the node at the given cycle. */
2763 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2765 && ps_has_conflicts (ps
,
2769 /* Try different issue slots to find one that the given node can be
2770 scheduled in without conflicts. */
2771 while (has_conflicts
)
2773 if (! ps_insn_advance_column (ps
, ps_i
, must_follow
))
2775 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2777 && ps_has_conflicts (ps
,
2784 remove_node_from_ps (ps
, ps_i
);
2788 ps
->min_cycle
= MIN (ps
->min_cycle
, c
);
2789 ps
->max_cycle
= MAX (ps
->max_cycle
, c
);
2793 /* Rotate the rows of PS such that insns scheduled at time
2794 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2796 rotate_partial_schedule (partial_schedule_ptr ps
, int start_cycle
)
2798 int i
, row
, backward_rotates
;
2799 int last_row
= ps
->ii
- 1;
2801 if (start_cycle
== 0)
2804 backward_rotates
= SMODULO (start_cycle
, ps
->ii
);
2806 /* Revisit later and optimize this into a single loop. */
2807 for (i
= 0; i
< backward_rotates
; i
++)
2809 ps_insn_ptr first_row
= ps
->rows
[0];
2811 for (row
= 0; row
< last_row
; row
++)
2812 ps
->rows
[row
] = ps
->rows
[row
+1];
2814 ps
->rows
[last_row
] = first_row
;
2817 ps
->max_cycle
-= start_cycle
;
2818 ps
->min_cycle
-= start_cycle
;
2821 #endif /* INSN_SCHEDULING */
2824 gate_handle_sms (void)
2826 return (optimize
> 0 && flag_modulo_sched
);
2830 /* Run instruction scheduler. */
2831 /* Perform SMS module scheduling. */
2833 rest_of_handle_sms (void)
2835 #ifdef INSN_SCHEDULING
2838 /* Collect loop information to be used in SMS. */
2839 cfg_layout_initialize (0);
2842 /* Update the life information, because we add pseudos. */
2843 max_regno
= max_reg_num ();
2845 /* Finalize layout changes. */
2847 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2848 bb
->aux
= bb
->next_bb
;
2849 free_dominance_info (CDI_DOMINATORS
);
2850 cfg_layout_finalize ();
2851 #endif /* INSN_SCHEDULING */
2855 struct tree_opt_pass pass_sms
=
2858 gate_handle_sms
, /* gate */
2859 rest_of_handle_sms
, /* execute */
2862 0, /* static_pass_number */
2864 0, /* properties_required */
2865 0, /* properties_provided */
2866 0, /* properties_destroyed */
2867 TODO_dump_func
, /* todo_flags_start */
2868 TODO_df_finish
| TODO_verify_rtl_sharing
|
2870 TODO_ggc_collect
, /* todo_flags_finish */