1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
39 #include "sched-int.h"
41 #include "cfglayout.h"
49 #include "tree-pass.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule
*partial_schedule_ptr
;
106 typedef struct ps_insn
*ps_insn_ptr
;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
125 /* The corresponding DDG_NODE. */
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row
,
136 /* The number of nodes in the same row that come after this node. */
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii
; /* Number of rows in the partial schedule. */
146 int history
; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
151 /* The earliest absolute cycle of an insn in the partial schedule. */
154 /* The latest absolute cycle of an insn in the partial schedule. */
157 ddg_ptr g
; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
167 struct undo_replace_buff_elem
*next
;
172 static partial_schedule_ptr
create_partial_schedule (int ii
, ddg_ptr
, int history
);
173 static void free_partial_schedule (partial_schedule_ptr
);
174 static void reset_partial_schedule (partial_schedule_ptr
, int new_ii
);
175 void print_partial_schedule (partial_schedule_ptr
, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr
, sbitmap
);
177 static ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr
,
178 ddg_node_ptr node
, int cycle
,
179 sbitmap must_precede
,
180 sbitmap must_follow
);
181 static void rotate_partial_schedule (partial_schedule_ptr
, int);
182 void set_row_column_for_ps (partial_schedule_ptr
);
183 static void ps_insert_empty_row (partial_schedule_ptr
, int, sbitmap
);
184 static int compute_split_row (sbitmap
, int, int, int, ddg_node_ptr
);
187 /* This page defines constants and structures for the modulo scheduling
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate
;
197 static int sms_order_nodes (ddg_ptr
, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr
);
199 static partial_schedule_ptr
sms_schedule_by_order (ddg_ptr
, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr
, rtx
);
201 static void generate_prolog_epilog (partial_schedule_ptr
, struct loop
*,
203 static void duplicate_insns_of_cycles (partial_schedule_ptr
,
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap
; /* A lower-bound on the absolute scheduling cycle. */
220 int time
; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
228 /* The number of register-move instructions added, immediately preceding
232 int row
; /* Holds time % ii. */
233 int stage
; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
238 } *node_sched_params_ptr
;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
245 sms_print_insn (rtx insn
, int aligned ATTRIBUTE_UNUSED
)
249 sprintf (tmp
, "i%4d", INSN_UID (insn
));
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED
,
255 regset cond_exec ATTRIBUTE_UNUSED
,
256 regset used ATTRIBUTE_UNUSED
,
257 regset set ATTRIBUTE_UNUSED
)
261 static struct sched_info sms_sched_info
=
270 compute_jump_reg_dependencies
,
275 NULL
, NULL
, NULL
, NULL
, NULL
,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED
, rtx tail ATTRIBUTE_UNUSED
)
287 #ifdef HAVE_doloop_end
288 rtx reg
, condition
, insn
, first_insn_not_to_check
;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition
= doloop_condition_get (tail
);
298 if (REG_P (XEXP (condition
, 0)))
299 reg
= XEXP (condition
, 0);
300 else if (GET_CODE (XEXP (condition
, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition
, 0), 0)))
302 reg
= XEXP (XEXP (condition
, 0), 0);
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check
= (GET_CODE (PATTERN (tail
)) == PARALLEL
? tail
313 for (insn
= head
; insn
!= first_insn_not_to_check
; insn
= NEXT_INSN (insn
))
314 if (reg_mentioned_p (reg
, insn
))
318 fprintf (dump_file
, "SMS count_reg found ");
319 print_rtl_single (dump_file
, reg
);
320 fprintf (dump_file
, " outside control in insn:\n");
321 print_rtl_single (dump_file
, insn
);
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
338 const_iteration_count (rtx count_reg
, basic_block pre_header
,
339 HOST_WIDEST_INT
* count
)
347 get_ebb_head_tail (pre_header
, pre_header
, &head
, &tail
);
349 for (insn
= tail
; insn
!= PREV_INSN (head
); insn
= PREV_INSN (insn
))
350 if (INSN_P (insn
) && single_set (insn
) &&
351 rtx_equal_p (count_reg
, SET_DEST (single_set (insn
))))
353 rtx pat
= single_set (insn
);
355 if (GET_CODE (SET_SRC (pat
)) == CONST_INT
)
357 *count
= INTVAL (SET_SRC (pat
));
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
373 if (targetm
.sched
.sms_res_mii
)
374 return targetm
.sched
.sms_res_mii (g
);
376 return (g
->num_nodes
/ issue_rate
);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params
;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
387 set_node_sched_params (ddg_ptr g
)
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params
= (node_sched_params_ptr
)
394 xcalloc (g
->num_nodes
,
395 sizeof (struct node_sched_params
));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i
= 0; i
< g
->num_nodes
; i
++)
401 /* Watch out for aliasing problems? */
402 node_sched_params
[i
].asap
= g
->nodes
[i
].aux
.count
;
403 g
->nodes
[i
].aux
.info
= &node_sched_params
[i
];
408 print_node_sched_params (FILE *file
, int num_nodes
, ddg_ptr g
)
414 for (i
= 0; i
< num_nodes
; i
++)
416 node_sched_params_ptr nsp
= &node_sched_params
[i
];
417 rtx reg_move
= nsp
->first_reg_move
;
420 fprintf (file
, "Node = %d; INSN = %d\n", i
,
421 (INSN_UID (g
->nodes
[i
].insn
)));
422 fprintf (file
, " asap = %d:\n", nsp
->asap
);
423 fprintf (file
, " time = %d:\n", nsp
->time
);
424 fprintf (file
, " nreg_moves = %d:\n", nsp
->nreg_moves
);
425 for (j
= 0; j
< nsp
->nreg_moves
; j
++)
427 fprintf (file
, " reg_move = ");
428 print_rtl_single (file
, reg_move
);
429 reg_move
= PREV_INSN (reg_move
);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
445 static struct undo_replace_buff_elem
*
446 generate_reg_moves (partial_schedule_ptr ps
, bool rescan
)
451 struct undo_replace_buff_elem
*reg_move_replaces
= NULL
;
453 for (i
= 0; i
< g
->num_nodes
; i
++)
455 ddg_node_ptr u
= &g
->nodes
[i
];
457 int nreg_moves
= 0, i_reg_move
;
458 sbitmap
*uses_of_defs
;
460 rtx prev_reg
, old_reg
;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e
= u
->out
; e
; e
= e
->next_out
)
465 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
467 int nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
469 if (e
->distance
== 1)
470 nreg_moves4e
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
475 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
478 nreg_moves
= MAX (nreg_moves
, nreg_moves4e
);
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
488 uses_of_defs
= sbitmap_vector_alloc (nreg_moves
, g
->num_nodes
);
489 sbitmap_vector_zero (uses_of_defs
, nreg_moves
);
490 for (e
= u
->out
; e
; e
= e
->next_out
)
491 if (e
->type
== TRUE_DEP
&& e
->dest
!= e
->src
)
493 int dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
)) / ii
;
495 if (e
->distance
== 1)
496 dest_copy
= (SCHED_TIME (e
->dest
) - SCHED_TIME (e
->src
) + ii
) / ii
;
498 if (SCHED_ROW (e
->dest
) == SCHED_ROW (e
->src
)
499 && SCHED_COLUMN (e
->dest
) < SCHED_COLUMN (e
->src
))
503 SET_BIT (uses_of_defs
[dest_copy
- 1], e
->dest
->cuid
);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u
) = nreg_moves
;
508 old_reg
= prev_reg
= copy_rtx (SET_DEST (single_set (u
->insn
)));
509 /* Insert the reg-moves right before the notes which precede
510 the insn they relates to. */
511 last_reg_move
= u
->first_note
;
513 for (i_reg_move
= 0; i_reg_move
< nreg_moves
; i_reg_move
++)
515 unsigned int i_use
= 0;
516 rtx new_reg
= gen_reg_rtx (GET_MODE (prev_reg
));
517 rtx reg_move
= gen_move_insn (new_reg
, prev_reg
);
518 sbitmap_iterator sbi
;
520 add_insn_before (reg_move
, last_reg_move
, NULL
);
521 last_reg_move
= reg_move
;
523 if (!SCHED_FIRST_REG_MOVE (u
))
524 SCHED_FIRST_REG_MOVE (u
) = reg_move
;
526 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs
[i_reg_move
], 0, i_use
, sbi
)
528 struct undo_replace_buff_elem
*rep
;
530 rep
= (struct undo_replace_buff_elem
*)
531 xcalloc (1, sizeof (struct undo_replace_buff_elem
));
532 rep
->insn
= g
->nodes
[i_use
].insn
;
533 rep
->orig_reg
= old_reg
;
534 rep
->new_reg
= new_reg
;
536 if (! reg_move_replaces
)
537 reg_move_replaces
= rep
;
540 rep
->next
= reg_move_replaces
;
541 reg_move_replaces
= rep
;
544 replace_rtx (g
->nodes
[i_use
].insn
, old_reg
, new_reg
);
546 df_insn_rescan (g
->nodes
[i_use
].insn
);
551 sbitmap_vector_free (uses_of_defs
);
553 return reg_move_replaces
;
556 /* Free memory allocated for the undo buffer. */
558 free_undo_replace_buff (struct undo_replace_buff_elem
*reg_move_replaces
)
561 while (reg_move_replaces
)
563 struct undo_replace_buff_elem
*rep
= reg_move_replaces
;
565 reg_move_replaces
= reg_move_replaces
->next
;
570 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
571 of SCHED_ROW and SCHED_STAGE. */
573 normalize_sched_times (partial_schedule_ptr ps
)
576 int amount
= PS_MIN_CYCLE (ps
);
578 ps_insn_ptr crr_insn
;
580 for (row
= 0; row
< ii
; row
++)
581 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
583 ddg_node_ptr u
= crr_insn
->node
;
584 int normalized_time
= SCHED_TIME (u
) - amount
;
587 fprintf (dump_file
, "crr_insn->node=%d, crr_insn->cycle=%d,\
588 min_cycle=%d\n", crr_insn
->node
->cuid
, SCHED_TIME
590 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
591 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
592 SCHED_TIME (u
) = normalized_time
;
593 SCHED_ROW (u
) = normalized_time
% ii
;
594 SCHED_STAGE (u
) = normalized_time
/ ii
;
598 /* Set SCHED_COLUMN of each node according to its position in PS. */
600 set_columns_for_ps (partial_schedule_ptr ps
)
604 for (row
= 0; row
< ps
->ii
; row
++)
606 ps_insn_ptr cur_insn
= ps
->rows
[row
];
609 for (; cur_insn
; cur_insn
= cur_insn
->next_in_row
)
610 SCHED_COLUMN (cur_insn
->node
) = column
++;
614 /* Permute the insns according to their order in PS, from row 0 to
615 row ii-1, and position them right before LAST. This schedules
616 the insns of the loop kernel. */
618 permute_partial_schedule (partial_schedule_ptr ps
, rtx last
)
624 for (row
= 0; row
< ii
; row
++)
625 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
626 if (PREV_INSN (last
) != ps_ij
->node
->insn
)
627 reorder_insns_nobb (ps_ij
->node
->first_note
, ps_ij
->node
->insn
,
632 duplicate_insns_of_cycles (partial_schedule_ptr ps
, int from_stage
,
633 int to_stage
, int for_prolog
, rtx count_reg
)
638 for (row
= 0; row
< ps
->ii
; row
++)
639 for (ps_ij
= ps
->rows
[row
]; ps_ij
; ps_ij
= ps_ij
->next_in_row
)
641 ddg_node_ptr u_node
= ps_ij
->node
;
643 rtx reg_move
= NULL_RTX
;
645 /* Do not duplicate any insn which refers to count_reg as it
646 belongs to the control part.
647 TODO: This should be done by analyzing the control part of
649 if (reg_mentioned_p (count_reg
, u_node
->insn
))
654 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
655 number of reg_moves starting with the second occurrence of
656 u_node, which is generated if its SCHED_STAGE <= to_stage. */
657 i_reg_moves
= to_stage
- SCHED_STAGE (u_node
) + 1;
658 i_reg_moves
= MAX (i_reg_moves
, 0);
659 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
661 /* The reg_moves start from the *first* reg_move backwards. */
664 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
665 for (j
= 1; j
< i_reg_moves
; j
++)
666 reg_move
= PREV_INSN (reg_move
);
669 else /* It's for the epilog. */
671 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
672 starting to decrease one stage after u_node no longer occurs;
673 that is, generate all reg_moves until
674 SCHED_STAGE (u_node) == from_stage - 1. */
675 i_reg_moves
= SCHED_NREG_MOVES (u_node
)
676 - (from_stage
- SCHED_STAGE (u_node
) - 1);
677 i_reg_moves
= MAX (i_reg_moves
, 0);
678 i_reg_moves
= MIN (i_reg_moves
, SCHED_NREG_MOVES (u_node
));
680 /* The reg_moves start from the *last* reg_move forwards. */
683 reg_move
= SCHED_FIRST_REG_MOVE (u_node
);
684 for (j
= 1; j
< SCHED_NREG_MOVES (u_node
); j
++)
685 reg_move
= PREV_INSN (reg_move
);
689 for (j
= 0; j
< i_reg_moves
; j
++, reg_move
= NEXT_INSN (reg_move
))
690 emit_insn (copy_rtx (PATTERN (reg_move
)));
691 if (SCHED_STAGE (u_node
) >= from_stage
692 && SCHED_STAGE (u_node
) <= to_stage
)
693 duplicate_insn_chain (u_node
->first_note
, u_node
->insn
);
698 /* Generate the instructions (including reg_moves) for prolog & epilog. */
700 generate_prolog_epilog (partial_schedule_ptr ps
, struct loop
*loop
,
701 rtx count_reg
, rtx count_init
)
704 int last_stage
= PS_STAGE_COUNT (ps
) - 1;
707 /* Generate the prolog, inserting its insns on the loop-entry edge. */
712 /* Generate instructions at the beginning of the prolog to
713 adjust the loop count by STAGE_COUNT. If loop count is constant
714 (count_init), this constant is adjusted by STAGE_COUNT in
715 generate_prolog_epilog function. */
716 rtx sub_reg
= NULL_RTX
;
718 sub_reg
= expand_simple_binop (GET_MODE (count_reg
), MINUS
,
719 count_reg
, GEN_INT (last_stage
),
720 count_reg
, 1, OPTAB_DIRECT
);
721 gcc_assert (REG_P (sub_reg
));
722 if (REGNO (sub_reg
) != REGNO (count_reg
))
723 emit_move_insn (count_reg
, sub_reg
);
726 for (i
= 0; i
< last_stage
; i
++)
727 duplicate_insns_of_cycles (ps
, 0, i
, 1, count_reg
);
729 /* Put the prolog on the entry edge. */
730 e
= loop_preheader_edge (loop
);
731 split_edge_and_insert (e
, get_insns ());
735 /* Generate the epilog, inserting its insns on the loop-exit edge. */
738 for (i
= 0; i
< last_stage
; i
++)
739 duplicate_insns_of_cycles (ps
, i
+ 1, last_stage
, 0, count_reg
);
741 /* Put the epilogue on the exit edge. */
742 gcc_assert (single_exit (loop
));
743 e
= single_exit (loop
);
744 split_edge_and_insert (e
, get_insns ());
748 /* Return true if all the BBs of the loop are empty except the
751 loop_single_full_bb_p (struct loop
*loop
)
754 basic_block
*bbs
= get_loop_body (loop
);
756 for (i
= 0; i
< loop
->num_nodes
; i
++)
759 bool empty_bb
= true;
761 if (bbs
[i
] == loop
->header
)
764 /* Make sure that basic blocks other than the header
765 have only notes labels or jumps. */
766 get_ebb_head_tail (bbs
[i
], bbs
[i
], &head
, &tail
);
767 for (; head
!= NEXT_INSN (tail
); head
= NEXT_INSN (head
))
769 if (NOTE_P (head
) || LABEL_P (head
)
770 || (INSN_P (head
) && JUMP_P (head
)))
786 /* A simple loop from SMS point of view; it is a loop that is composed of
787 either a single basic block or two BBs - a header and a latch. */
788 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
789 && (EDGE_COUNT (loop->latch->preds) == 1) \
790 && (EDGE_COUNT (loop->latch->succs) == 1))
792 /* Return true if the loop is in its canonical form and false if not.
793 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
795 loop_canon_p (struct loop
*loop
)
798 if (loop
->inner
|| !loop_outer (loop
))
801 fprintf (dump_file
, "SMS loop inner or !loop_outer\n");
805 if (!single_exit (loop
))
809 rtx insn
= BB_END (loop
->header
);
811 fprintf (dump_file
, "SMS loop many exits ");
812 fprintf (dump_file
, " %s %d (file, line)\n",
813 insn_file (insn
), insn_line (insn
));
818 if (! SIMPLE_SMS_LOOP_P (loop
) && ! loop_single_full_bb_p (loop
))
822 rtx insn
= BB_END (loop
->header
);
824 fprintf (dump_file
, "SMS loop many BBs. ");
825 fprintf (dump_file
, " %s %d (file, line)\n",
826 insn_file (insn
), insn_line (insn
));
834 /* If there are more than one entry for the loop,
835 make it one by splitting the first entry edge and
836 redirecting the others to the new BB. */
838 canon_loop (struct loop
*loop
)
843 /* Avoid annoying special cases of edges going to exit
845 FOR_EACH_EDGE (e
, i
, EXIT_BLOCK_PTR
->preds
)
846 if ((e
->flags
& EDGE_FALLTHRU
) && (EDGE_COUNT (e
->src
->succs
) > 1))
849 if (loop
->latch
== loop
->header
850 || EDGE_COUNT (loop
->latch
->succs
) > 1)
852 FOR_EACH_EDGE (e
, i
, loop
->header
->preds
)
853 if (e
->src
== loop
->latch
)
859 /* Probability in % that the sms-ed loop rolls enough so that optimized
860 version may be entered. Just a guess. */
861 #define PROB_SMS_ENOUGH_ITERATIONS 80
863 /* Used to calculate the upper bound of ii. */
864 #define MAXII_FACTOR 2
866 /* Main entry point, perform SMS scheduling on the loops of the function
867 that consist of single basic blocks. */
876 partial_schedule_ptr ps
;
877 basic_block bb
= NULL
;
879 basic_block condition_bb
= NULL
;
881 gcov_type trip_count
= 0;
883 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
884 | LOOPS_HAVE_RECORDED_EXITS
);
885 if (number_of_loops () <= 1)
887 loop_optimizer_finalize ();
888 return; /* There are no loops to schedule. */
891 /* Initialize issue_rate. */
892 if (targetm
.sched
.issue_rate
)
894 int temp
= reload_completed
;
896 reload_completed
= 1;
897 issue_rate
= targetm
.sched
.issue_rate ();
898 reload_completed
= temp
;
903 /* Initialize the scheduler. */
904 current_sched_info
= &sms_sched_info
;
906 /* Init Data Flow analysis, to be used in interloop dep calculation. */
907 df_set_flags (DF_LR_RUN_DCE
);
908 df_rd_add_problem ();
909 df_note_add_problem ();
910 df_chain_add_problem (DF_DU_CHAIN
+ DF_UD_CHAIN
);
912 regstat_compute_calls_crossed ();
915 /* Allocate memory to hold the DDG array one entry for each loop.
916 We use loop->num as index into this array. */
917 g_arr
= XCNEWVEC (ddg_ptr
, number_of_loops ());
921 fprintf (dump_file
, "\n\nSMS analysis phase\n");
922 fprintf (dump_file
, "===================\n\n");
925 /* Build DDGs for all the relevant loops and hold them in G_ARR
926 indexed by the loop index. */
927 FOR_EACH_LOOP (li
, loop
, 0)
933 if (dbg_cnt (sms_sched_loop
) == false)
936 fprintf (dump_file
, "SMS reached max limit... \n");
943 rtx insn
= BB_END (loop
->header
);
945 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
946 loop
->num
, insn_file (insn
), insn_line (insn
));
950 if (! loop_canon_p (loop
))
953 if (! loop_single_full_bb_p (loop
))
956 fprintf (dump_file
, "SMS not loop_single_full_bb_p\n");
962 get_ebb_head_tail (bb
, bb
, &head
, &tail
);
963 latch_edge
= loop_latch_edge (loop
);
964 gcc_assert (single_exit (loop
));
965 if (single_exit (loop
)->count
)
966 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
968 /* Perform SMS only on loops that their average count is above threshold. */
970 if ( latch_edge
->count
971 && (latch_edge
->count
< single_exit (loop
)->count
* SMS_LOOP_AVERAGE_COUNT_THRESHOLD
))
975 fprintf (dump_file
, " %s %d (file, line)\n",
976 insn_file (tail
), insn_line (tail
));
977 fprintf (dump_file
, "SMS single-bb-loop\n");
978 if (profile_info
&& flag_branch_probabilities
)
980 fprintf (dump_file
, "SMS loop-count ");
981 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
982 (HOST_WIDEST_INT
) bb
->count
);
983 fprintf (dump_file
, "\n");
984 fprintf (dump_file
, "SMS trip-count ");
985 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
986 (HOST_WIDEST_INT
) trip_count
);
987 fprintf (dump_file
, "\n");
988 fprintf (dump_file
, "SMS profile-sum-max ");
989 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
990 (HOST_WIDEST_INT
) profile_info
->sum_max
);
991 fprintf (dump_file
, "\n");
997 /* Make sure this is a doloop. */
998 if ( !(count_reg
= doloop_register_get (head
, tail
)))
1001 fprintf (dump_file
, "SMS doloop_register_get failed\n");
1005 /* Don't handle BBs with calls or barriers, or !single_set insns,
1006 or auto-increment insns (to avoid creating invalid reg-moves
1007 for the auto-increment insns).
1008 ??? Should handle auto-increment insns.
1009 ??? Should handle insns defining subregs. */
1010 for (insn
= head
; insn
!= NEXT_INSN (tail
); insn
= NEXT_INSN (insn
))
1016 || (INSN_P (insn
) && !JUMP_P (insn
)
1017 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
)
1018 || (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1019 || (INSN_P (insn
) && (set
= single_set (insn
))
1020 && GET_CODE (SET_DEST (set
)) == SUBREG
))
1024 if (insn
!= NEXT_INSN (tail
))
1029 fprintf (dump_file
, "SMS loop-with-call\n");
1030 else if (BARRIER_P (insn
))
1031 fprintf (dump_file
, "SMS loop-with-barrier\n");
1032 else if (FIND_REG_INC_NOTE (insn
, NULL_RTX
) != 0)
1033 fprintf (dump_file
, "SMS reg inc\n");
1034 else if ((INSN_P (insn
) && !JUMP_P (insn
)
1035 && !single_set (insn
) && GET_CODE (PATTERN (insn
)) != USE
))
1036 fprintf (dump_file
, "SMS loop-with-not-single-set\n");
1038 fprintf (dump_file
, "SMS loop with subreg in lhs\n");
1039 print_rtl_single (dump_file
, insn
);
1045 if (! (g
= create_ddg (bb
, 0)))
1048 fprintf (dump_file
, "SMS create_ddg failed\n");
1052 g_arr
[loop
->num
] = g
;
1054 fprintf (dump_file
, "...OK\n");
1059 fprintf (dump_file
, "\nSMS transformation phase\n");
1060 fprintf (dump_file
, "=========================\n\n");
1063 /* We don't want to perform SMS on new loops - created by versioning. */
1064 FOR_EACH_LOOP (li
, loop
, 0)
1067 rtx count_reg
, count_init
;
1069 unsigned stage_count
= 0;
1070 HOST_WIDEST_INT loop_count
= 0;
1072 if (! (g
= g_arr
[loop
->num
]))
1077 rtx insn
= BB_END (loop
->header
);
1079 fprintf (dump_file
, "SMS loop num: %d, file: %s, line: %d\n",
1080 loop
->num
, insn_file (insn
), insn_line (insn
));
1082 print_ddg (dump_file
, g
);
1085 get_ebb_head_tail (loop
->header
, loop
->header
, &head
, &tail
);
1087 latch_edge
= loop_latch_edge (loop
);
1088 gcc_assert (single_exit (loop
));
1089 if (single_exit (loop
)->count
)
1090 trip_count
= latch_edge
->count
/ single_exit (loop
)->count
;
1094 fprintf (dump_file
, " %s %d (file, line)\n",
1095 insn_file (tail
), insn_line (tail
));
1096 fprintf (dump_file
, "SMS single-bb-loop\n");
1097 if (profile_info
&& flag_branch_probabilities
)
1099 fprintf (dump_file
, "SMS loop-count ");
1100 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1101 (HOST_WIDEST_INT
) bb
->count
);
1102 fprintf (dump_file
, "\n");
1103 fprintf (dump_file
, "SMS profile-sum-max ");
1104 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1105 (HOST_WIDEST_INT
) profile_info
->sum_max
);
1106 fprintf (dump_file
, "\n");
1108 fprintf (dump_file
, "SMS doloop\n");
1109 fprintf (dump_file
, "SMS built-ddg %d\n", g
->num_nodes
);
1110 fprintf (dump_file
, "SMS num-loads %d\n", g
->num_loads
);
1111 fprintf (dump_file
, "SMS num-stores %d\n", g
->num_stores
);
1115 /* In case of th loop have doloop register it gets special
1117 count_init
= NULL_RTX
;
1118 if ((count_reg
= doloop_register_get (head
, tail
)))
1120 basic_block pre_header
;
1122 pre_header
= loop_preheader_edge (loop
)->src
;
1123 count_init
= const_iteration_count (count_reg
, pre_header
,
1126 gcc_assert (count_reg
);
1128 if (dump_file
&& count_init
)
1130 fprintf (dump_file
, "SMS const-doloop ");
1131 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
,
1133 fprintf (dump_file
, "\n");
1136 node_order
= XNEWVEC (int, g
->num_nodes
);
1138 mii
= 1; /* Need to pass some estimate of mii. */
1139 rec_mii
= sms_order_nodes (g
, mii
, node_order
, &max_asap
);
1140 mii
= MAX (res_MII (g
), rec_mii
);
1141 maxii
= MAX (max_asap
, MAXII_FACTOR
* mii
);
1144 fprintf (dump_file
, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145 rec_mii
, mii
, maxii
);
1147 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1149 set_node_sched_params (g
);
1151 ps
= sms_schedule_by_order (g
, mii
, maxii
, node_order
);
1154 stage_count
= PS_STAGE_COUNT (ps
);
1156 /* Stage count of 1 means that there is no interleaving between
1157 iterations, let the scheduling passes do the job. */
1159 || (count_init
&& (loop_count
<= stage_count
))
1160 || (flag_branch_probabilities
&& (trip_count
<= stage_count
)))
1164 fprintf (dump_file
, "SMS failed... \n");
1165 fprintf (dump_file
, "SMS sched-failed (stage-count=%d, loop-count=", stage_count
);
1166 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, loop_count
);
1167 fprintf (dump_file
, ", trip-count=");
1168 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, trip_count
);
1169 fprintf (dump_file
, ")\n");
1175 struct undo_replace_buff_elem
*reg_move_replaces
;
1180 "SMS succeeded %d %d (with ii, sc)\n", ps
->ii
,
1182 print_partial_schedule (ps
, dump_file
);
1184 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1185 g
->closing_branch
->cuid
, PS_MIN_CYCLE (ps
) - 1);
1188 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1189 the closing_branch was scheduled and should appear in the last (ii-1)
1190 row. Otherwise, we are free to schedule the branch, and we let nodes
1191 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1192 row; this should reduce stage_count to minimum.
1193 TODO: Revisit the issue of scheduling the insns of the
1194 control part relative to the branch when the control part
1195 has more than one insn. */
1196 normalize_sched_times (ps
);
1197 rotate_partial_schedule (ps
, PS_MIN_CYCLE (ps
));
1198 set_columns_for_ps (ps
);
1202 /* case the BCT count is not known , Do loop-versioning */
1203 if (count_reg
&& ! count_init
)
1205 rtx comp_rtx
= gen_rtx_fmt_ee (GT
, VOIDmode
, count_reg
,
1206 GEN_INT(stage_count
));
1207 unsigned prob
= (PROB_SMS_ENOUGH_ITERATIONS
1208 * REG_BR_PROB_BASE
) / 100;
1210 loop_version (loop
, comp_rtx
, &condition_bb
,
1211 prob
, prob
, REG_BR_PROB_BASE
- prob
,
1215 /* Set new iteration count of loop kernel. */
1216 if (count_reg
&& count_init
)
1217 SET_SRC (single_set (count_init
)) = GEN_INT (loop_count
1220 /* Now apply the scheduled kernel to the RTL of the loop. */
1221 permute_partial_schedule (ps
, g
->closing_branch
->first_note
);
1223 /* Mark this loop as software pipelined so the later
1224 scheduling passes doesn't touch it. */
1225 if (! flag_resched_modulo_sched
)
1226 g
->bb
->flags
|= BB_DISABLE_SCHEDULE
;
1227 /* The life-info is not valid any more. */
1228 df_set_bb_dirty (g
->bb
);
1230 reg_move_replaces
= generate_reg_moves (ps
, true);
1232 print_node_sched_params (dump_file
, g
->num_nodes
, g
);
1233 /* Generate prolog and epilog. */
1234 generate_prolog_epilog (ps
, loop
, count_reg
, count_init
);
1236 free_undo_replace_buff (reg_move_replaces
);
1239 free_partial_schedule (ps
);
1240 free (node_sched_params
);
1245 regstat_free_calls_crossed ();
1248 /* Release scheduler data, needed until now because of DFA. */
1250 loop_optimizer_finalize ();
1253 /* The SMS scheduling algorithm itself
1254 -----------------------------------
1255 Input: 'O' an ordered list of insns of a loop.
1256 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1258 'Q' is the empty Set
1259 'PS' is the partial schedule; it holds the currently scheduled nodes with
1261 'PSP' previously scheduled predecessors.
1262 'PSS' previously scheduled successors.
1263 't(u)' the cycle where u is scheduled.
1264 'l(u)' is the latency of u.
1265 'd(v,u)' is the dependence distance from v to u.
1266 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1267 the node ordering phase.
1268 'check_hardware_resources_conflicts(u, PS, c)'
1269 run a trace around cycle/slot through DFA model
1270 to check resource conflicts involving instruction u
1271 at cycle c given the partial schedule PS.
1272 'add_to_partial_schedule_at_time(u, PS, c)'
1273 Add the node/instruction u to the partial schedule
1275 'calculate_register_pressure(PS)'
1276 Given a schedule of instructions, calculate the register
1277 pressure it implies. One implementation could be the
1278 maximum number of overlapping live ranges.
1279 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1280 registers available in the hardware.
1284 3. for each node u in O in pre-computed order
1285 4. if (PSP(u) != Q && PSS(u) == Q) then
1286 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1287 6. start = Early_start; end = Early_start + II - 1; step = 1
1288 11. else if (PSP(u) == Q && PSS(u) != Q) then
1289 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1290 13. start = Late_start; end = Late_start - II + 1; step = -1
1291 14. else if (PSP(u) != Q && PSS(u) != Q) then
1292 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1293 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1294 17. start = Early_start;
1295 18. end = min(Early_start + II - 1 , Late_start);
1297 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1298 21. start = ASAP(u); end = start + II - 1; step = 1
1302 24. for (c = start ; c != end ; c += step)
1303 25. if check_hardware_resources_conflicts(u, PS, c) then
1304 26. add_to_partial_schedule_at_time(u, PS, c)
1309 31. if (success == false) then
1311 33. if (II > maxII) then
1312 34. finish - failed to schedule
1317 39. if (calculate_register_pressure(PS) > maxRP) then
1320 42. compute epilogue & prologue
1321 43. finish - succeeded to schedule
1324 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1325 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1326 set to 0 to save compile time. */
1327 #define DFA_HISTORY SMS_DFA_HISTORY
1329 /* A threshold for the number of repeated unsuccessful attempts to insert
1330 an empty row, before we flush the partial schedule and start over. */
1331 #define MAX_SPLIT_NUM 10
1332 /* Given the partial schedule PS, this function calculates and returns the
1333 cycles in which we can schedule the node with the given index I.
1334 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1335 noticed that there are several cases in which we fail to SMS the loop
1336 because the sched window of a node is empty due to tight data-deps. In
1337 such cases we want to unschedule some of the predecessors/successors
1338 until we get non-empty scheduling window. It returns -1 if the
1339 scheduling window is empty and zero otherwise. */
1342 get_sched_window (partial_schedule_ptr ps
, int *nodes_order
, int i
,
1343 sbitmap sched_nodes
, int ii
, int *start_p
, int *step_p
, int *end_p
)
1345 int start
, step
, end
;
1347 int u
= nodes_order
[i
];
1348 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1349 sbitmap psp
= sbitmap_alloc (ps
->g
->num_nodes
);
1350 sbitmap pss
= sbitmap_alloc (ps
->g
->num_nodes
);
1351 sbitmap u_node_preds
= NODE_PREDECESSORS (u_node
);
1352 sbitmap u_node_succs
= NODE_SUCCESSORS (u_node
);
1356 /* 1. compute sched window for u (start, end, step). */
1359 psp_not_empty
= sbitmap_a_and_b_cg (psp
, u_node_preds
, sched_nodes
);
1360 pss_not_empty
= sbitmap_a_and_b_cg (pss
, u_node_succs
, sched_nodes
);
1362 if (psp_not_empty
&& !pss_not_empty
)
1364 int early_start
= INT_MIN
;
1367 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1369 ddg_node_ptr v_node
= e
->src
;
1373 fprintf (dump_file
, "\nProcessing edge: ");
1374 print_ddg_edge (dump_file
, e
);
1376 "\nScheduling %d (%d) in psp_not_empty,"
1377 " checking p %d (%d): ", u_node
->cuid
,
1378 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1382 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1384 int p_st
= SCHED_TIME (v_node
);
1387 MAX (early_start
, p_st
+ e
->latency
- (e
->distance
* ii
));
1391 "pred st = %d; early_start = %d; latency: %d",
1392 p_st
, early_start
, e
->latency
);
1394 if (e
->data_type
== MEM_DEP
)
1395 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1398 fprintf (dump_file
, "the node is not scheduled\n");
1400 start
= early_start
;
1401 end
= MIN (end
, early_start
+ ii
);
1402 /* Schedule the node close to it's predecessors. */
1407 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1408 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1411 else if (!psp_not_empty
&& pss_not_empty
)
1413 int late_start
= INT_MAX
;
1416 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1418 ddg_node_ptr v_node
= e
->dest
;
1422 fprintf (dump_file
, "\nProcessing edge:");
1423 print_ddg_edge (dump_file
, e
);
1425 "\nScheduling %d (%d) in pss_not_empty,"
1426 " checking s %d (%d): ", u_node
->cuid
,
1427 INSN_UID (u_node
->insn
), v_node
->cuid
, INSN_UID
1431 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1433 int s_st
= SCHED_TIME (v_node
);
1435 late_start
= MIN (late_start
,
1436 s_st
- e
->latency
+ (e
->distance
* ii
));
1440 "succ st = %d; late_start = %d; latency = %d",
1441 s_st
, late_start
, e
->latency
);
1443 if (e
->data_type
== MEM_DEP
)
1444 end
= MAX (end
, SCHED_TIME (v_node
) - ii
+ 1);
1446 fprintf (dump_file
, "end = %d\n", end
);
1450 fprintf (dump_file
, "the node is not scheduled\n");
1454 end
= MAX (end
, late_start
- ii
);
1455 /* Schedule the node close to it's successors. */
1460 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1461 u_node
->cuid
, INSN_UID (u_node
->insn
), start
, end
, step
);
1465 else if (psp_not_empty
&& pss_not_empty
)
1467 int early_start
= INT_MIN
;
1468 int late_start
= INT_MAX
;
1469 int count_preds
= 0;
1470 int count_succs
= 0;
1474 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1476 ddg_node_ptr v_node
= e
->src
;
1480 fprintf (dump_file
, "\nProcessing edge:");
1481 print_ddg_edge (dump_file
, e
);
1483 "\nScheduling %d (%d) in psp_pss_not_empty,"
1484 " checking p %d (%d): ", u_node
->cuid
, INSN_UID
1485 (u_node
->insn
), v_node
->cuid
, INSN_UID
1489 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1491 int p_st
= SCHED_TIME (v_node
);
1493 early_start
= MAX (early_start
,
1495 - (e
->distance
* ii
));
1499 "pred st = %d; early_start = %d; latency = %d",
1500 p_st
, early_start
, e
->latency
);
1502 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1505 if (e
->data_type
== MEM_DEP
)
1506 end
= MIN (end
, SCHED_TIME (v_node
) + ii
- 1);
1509 fprintf (dump_file
, "the node is not scheduled\n");
1512 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1514 ddg_node_ptr v_node
= e
->dest
;
1518 fprintf (dump_file
, "\nProcessing edge:");
1519 print_ddg_edge (dump_file
, e
);
1521 "\nScheduling %d (%d) in psp_pss_not_empty,"
1522 " checking s %d (%d): ", u_node
->cuid
, INSN_UID
1523 (u_node
->insn
), v_node
->cuid
, INSN_UID
1527 if (TEST_BIT (sched_nodes
, v_node
->cuid
))
1529 int s_st
= SCHED_TIME (v_node
);
1531 late_start
= MIN (late_start
,
1533 + (e
->distance
* ii
));
1537 "succ st = %d; late_start = %d; latency = %d",
1538 s_st
, late_start
, e
->latency
);
1540 if (e
->type
== TRUE_DEP
&& e
->data_type
== REG_DEP
)
1543 if (e
->data_type
== MEM_DEP
)
1544 start
= MAX (start
, SCHED_TIME (v_node
) - ii
+ 1);
1547 fprintf (dump_file
, "the node is not scheduled\n");
1550 start
= MAX (start
, early_start
);
1551 end
= MIN (end
, MIN (early_start
+ ii
, late_start
+ 1));
1553 /* If there are more successors than predecessors schedule the
1554 node close to it's successors. */
1555 if (count_succs
>= count_preds
)
1557 int old_start
= start
;
1560 end
= old_start
- 1;
1564 else /* psp is empty && pss is empty. */
1566 start
= SCHED_ASAP (u_node
);
1577 if ((start
>= end
&& step
== 1) || (start
<= end
&& step
== -1))
1580 fprintf (dump_file
, "\nEmpty window: start=%d, end=%d, step=%d\n",
1588 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1589 node currently been scheduled. At the end of the calculation
1590 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1591 U_NODE which are (1) already scheduled in the first/last row of
1592 U_NODE's scheduling window, (2) whose dependence inequality with U
1593 becomes an equality when U is scheduled in this same row, and (3)
1594 whose dependence latency is zero.
1596 The first and last rows are calculated using the following parameters:
1597 START/END rows - The cycles that begins/ends the traversal on the window;
1598 searching for an empty cycle to schedule U_NODE.
1599 STEP - The direction in which we traverse the window.
1600 II - The initiation interval. */
1603 calculate_must_precede_follow (ddg_node_ptr u_node
, int start
, int end
,
1604 int step
, int ii
, sbitmap sched_nodes
,
1605 sbitmap must_precede
, sbitmap must_follow
)
1608 int first_cycle_in_window
, last_cycle_in_window
;
1610 gcc_assert (must_precede
&& must_follow
);
1612 /* Consider the following scheduling window:
1613 {first_cycle_in_window, first_cycle_in_window+1, ...,
1614 last_cycle_in_window}. If step is 1 then the following will be
1615 the order we traverse the window: {start=first_cycle_in_window,
1616 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1617 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1618 end=first_cycle_in_window-1} if step is -1. */
1619 first_cycle_in_window
= (step
== 1) ? start
: end
- step
;
1620 last_cycle_in_window
= (step
== 1) ? end
- step
: start
;
1622 sbitmap_zero (must_precede
);
1623 sbitmap_zero (must_follow
);
1626 fprintf (dump_file
, "\nmust_precede: ");
1628 /* Instead of checking if:
1629 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1630 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1631 first_cycle_in_window)
1633 we use the fact that latency is non-negative:
1634 SCHED_TIME (e->src) - (e->distance * ii) <=
1635 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1636 first_cycle_in_window
1638 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1639 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1640 if (TEST_BIT (sched_nodes
, e
->src
->cuid
)
1641 && ((SCHED_TIME (e
->src
) - (e
->distance
* ii
)) ==
1642 first_cycle_in_window
))
1645 fprintf (dump_file
, "%d ", e
->src
->cuid
);
1647 SET_BIT (must_precede
, e
->src
->cuid
);
1651 fprintf (dump_file
, "\nmust_follow: ");
1653 /* Instead of checking if:
1654 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1655 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1656 last_cycle_in_window)
1658 we use the fact that latency is non-negative:
1659 SCHED_TIME (e->dest) + (e->distance * ii) >=
1660 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1661 last_cycle_in_window
1663 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1664 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1665 if (TEST_BIT (sched_nodes
, e
->dest
->cuid
)
1666 && ((SCHED_TIME (e
->dest
) + (e
->distance
* ii
)) ==
1667 last_cycle_in_window
))
1670 fprintf (dump_file
, "%d ", e
->dest
->cuid
);
1672 SET_BIT (must_follow
, e
->dest
->cuid
);
1676 fprintf (dump_file
, "\n");
1679 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1680 parameters to decide if that's possible:
1681 PS - The partial schedule.
1682 U - The serial number of U_NODE.
1683 NUM_SPLITS - The number of row splits made so far.
1684 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1685 the first row of the scheduling window)
1686 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1687 last row of the scheduling window) */
1690 try_scheduling_node_in_cycle (partial_schedule_ptr ps
, ddg_node_ptr u_node
,
1691 int u
, int cycle
, sbitmap sched_nodes
,
1692 int *num_splits
, sbitmap must_precede
,
1693 sbitmap must_follow
)
1698 verify_partial_schedule (ps
, sched_nodes
);
1699 psi
= ps_add_node_check_conflicts (ps
, u_node
, cycle
,
1700 must_precede
, must_follow
);
1703 SCHED_TIME (u_node
) = cycle
;
1704 SET_BIT (sched_nodes
, u
);
1708 fprintf (dump_file
, "Scheduled w/o split in %d\n", cycle
);
1715 /* This function implements the scheduling algorithm for SMS according to the
1717 static partial_schedule_ptr
1718 sms_schedule_by_order (ddg_ptr g
, int mii
, int maxii
, int *nodes_order
)
1721 int i
, c
, success
, num_splits
= 0;
1722 int flush_and_start_over
= true;
1723 int num_nodes
= g
->num_nodes
;
1724 int start
, end
, step
; /* Place together into one struct? */
1725 sbitmap sched_nodes
= sbitmap_alloc (num_nodes
);
1726 sbitmap must_precede
= sbitmap_alloc (num_nodes
);
1727 sbitmap must_follow
= sbitmap_alloc (num_nodes
);
1728 sbitmap tobe_scheduled
= sbitmap_alloc (num_nodes
);
1730 partial_schedule_ptr ps
= create_partial_schedule (ii
, g
, DFA_HISTORY
);
1732 sbitmap_ones (tobe_scheduled
);
1733 sbitmap_zero (sched_nodes
);
1735 while (flush_and_start_over
&& (ii
< maxii
))
1739 fprintf (dump_file
, "Starting with ii=%d\n", ii
);
1740 flush_and_start_over
= false;
1741 sbitmap_zero (sched_nodes
);
1743 for (i
= 0; i
< num_nodes
; i
++)
1745 int u
= nodes_order
[i
];
1746 ddg_node_ptr u_node
= &ps
->g
->nodes
[u
];
1747 rtx insn
= u_node
->insn
;
1751 RESET_BIT (tobe_scheduled
, u
);
1755 if (JUMP_P (insn
)) /* Closing branch handled later. */
1757 RESET_BIT (tobe_scheduled
, u
);
1761 if (TEST_BIT (sched_nodes
, u
))
1764 /* Try to get non-empty scheduling window. */
1766 if (get_sched_window (ps
, nodes_order
, i
, sched_nodes
, ii
, &start
,
1770 fprintf (dump_file
, "\nTrying to schedule node %d \
1771 INSN = %d in (%d .. %d) step %d\n", u
, (INSN_UID
1772 (g
->nodes
[u
].insn
)), start
, end
, step
);
1774 gcc_assert ((step
> 0 && start
< end
)
1775 || (step
< 0 && start
> end
));
1777 calculate_must_precede_follow (u_node
, start
, end
, step
, ii
,
1778 sched_nodes
, must_precede
,
1781 for (c
= start
; c
!= end
; c
+= step
)
1783 sbitmap tmp_precede
= NULL
;
1784 sbitmap tmp_follow
= NULL
;
1789 tmp_precede
= must_precede
;
1790 else /* step == -1. */
1791 tmp_follow
= must_follow
;
1793 if (c
== end
- step
)
1796 tmp_follow
= must_follow
;
1797 else /* step == -1. */
1798 tmp_precede
= must_precede
;
1802 try_scheduling_node_in_cycle (ps
, u_node
, u
, c
,
1804 &num_splits
, tmp_precede
,
1810 verify_partial_schedule (ps
, sched_nodes
);
1819 if (num_splits
>= MAX_SPLIT_NUM
)
1822 flush_and_start_over
= true;
1823 verify_partial_schedule (ps
, sched_nodes
);
1824 reset_partial_schedule (ps
, ii
);
1825 verify_partial_schedule (ps
, sched_nodes
);
1831 split_row
= compute_split_row (sched_nodes
, start
, end
,
1834 split_row
= compute_split_row (sched_nodes
, end
, start
,
1837 ps_insert_empty_row (ps
, split_row
, sched_nodes
);
1838 i
--; /* Go back and retry node i. */
1841 fprintf (dump_file
, "num_splits=%d\n", num_splits
);
1844 /* ??? If (success), check register pressure estimates. */
1845 } /* Continue with next node. */
1846 } /* While flush_and_start_over. */
1849 free_partial_schedule (ps
);
1853 gcc_assert (sbitmap_equal (tobe_scheduled
, sched_nodes
));
1855 sbitmap_free (sched_nodes
);
1856 sbitmap_free (must_precede
);
1857 sbitmap_free (must_follow
);
1858 sbitmap_free (tobe_scheduled
);
1863 /* This function inserts a new empty row into PS at the position
1864 according to SPLITROW, keeping all already scheduled instructions
1865 intact and updating their SCHED_TIME and cycle accordingly. */
1867 ps_insert_empty_row (partial_schedule_ptr ps
, int split_row
,
1868 sbitmap sched_nodes
)
1870 ps_insn_ptr crr_insn
;
1871 ps_insn_ptr
*rows_new
;
1873 int new_ii
= ii
+ 1;
1876 verify_partial_schedule (ps
, sched_nodes
);
1878 /* We normalize sched_time and rotate ps to have only non-negative sched
1879 times, for simplicity of updating cycles after inserting new row. */
1880 split_row
-= ps
->min_cycle
;
1881 split_row
= SMODULO (split_row
, ii
);
1883 fprintf (dump_file
, "split_row=%d\n", split_row
);
1885 normalize_sched_times (ps
);
1886 rotate_partial_schedule (ps
, ps
->min_cycle
);
1888 rows_new
= (ps_insn_ptr
*) xcalloc (new_ii
, sizeof (ps_insn_ptr
));
1889 for (row
= 0; row
< split_row
; row
++)
1891 rows_new
[row
] = ps
->rows
[row
];
1892 ps
->rows
[row
] = NULL
;
1893 for (crr_insn
= rows_new
[row
];
1894 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1896 ddg_node_ptr u
= crr_insn
->node
;
1897 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
);
1899 SCHED_TIME (u
) = new_time
;
1900 crr_insn
->cycle
= new_time
;
1901 SCHED_ROW (u
) = new_time
% new_ii
;
1902 SCHED_STAGE (u
) = new_time
/ new_ii
;
1907 rows_new
[split_row
] = NULL
;
1909 for (row
= split_row
; row
< ii
; row
++)
1911 rows_new
[row
+ 1] = ps
->rows
[row
];
1912 ps
->rows
[row
] = NULL
;
1913 for (crr_insn
= rows_new
[row
+ 1];
1914 crr_insn
; crr_insn
= crr_insn
->next_in_row
)
1916 ddg_node_ptr u
= crr_insn
->node
;
1917 int new_time
= SCHED_TIME (u
) + (SCHED_TIME (u
) / ii
) + 1;
1919 SCHED_TIME (u
) = new_time
;
1920 crr_insn
->cycle
= new_time
;
1921 SCHED_ROW (u
) = new_time
% new_ii
;
1922 SCHED_STAGE (u
) = new_time
/ new_ii
;
1927 ps
->min_cycle
= ps
->min_cycle
+ ps
->min_cycle
/ ii
1928 + (SMODULO (ps
->min_cycle
, ii
) >= split_row
? 1 : 0);
1929 ps
->max_cycle
= ps
->max_cycle
+ ps
->max_cycle
/ ii
1930 + (SMODULO (ps
->max_cycle
, ii
) >= split_row
? 1 : 0);
1932 ps
->rows
= rows_new
;
1934 gcc_assert (ps
->min_cycle
>= 0);
1936 verify_partial_schedule (ps
, sched_nodes
);
1939 fprintf (dump_file
, "min_cycle=%d, max_cycle=%d\n", ps
->min_cycle
,
1943 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1944 UP which are the boundaries of it's scheduling window; compute using
1945 SCHED_NODES and II a row in the partial schedule that can be split
1946 which will separate a critical predecessor from a critical successor
1947 thereby expanding the window, and return it. */
1949 compute_split_row (sbitmap sched_nodes
, int low
, int up
, int ii
,
1950 ddg_node_ptr u_node
)
1953 int lower
= INT_MIN
, upper
= INT_MAX
;
1954 ddg_node_ptr crit_pred
= NULL
;
1955 ddg_node_ptr crit_succ
= NULL
;
1958 for (e
= u_node
->in
; e
!= 0; e
= e
->next_in
)
1960 ddg_node_ptr v_node
= e
->src
;
1962 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
1963 && (low
== SCHED_TIME (v_node
) + e
->latency
- (e
->distance
* ii
)))
1964 if (SCHED_TIME (v_node
) > lower
)
1967 lower
= SCHED_TIME (v_node
);
1971 if (crit_pred
!= NULL
)
1973 crit_cycle
= SCHED_TIME (crit_pred
) + 1;
1974 return SMODULO (crit_cycle
, ii
);
1977 for (e
= u_node
->out
; e
!= 0; e
= e
->next_out
)
1979 ddg_node_ptr v_node
= e
->dest
;
1980 if (TEST_BIT (sched_nodes
, v_node
->cuid
)
1981 && (up
== SCHED_TIME (v_node
) - e
->latency
+ (e
->distance
* ii
)))
1982 if (SCHED_TIME (v_node
) < upper
)
1985 upper
= SCHED_TIME (v_node
);
1989 if (crit_succ
!= NULL
)
1991 crit_cycle
= SCHED_TIME (crit_succ
);
1992 return SMODULO (crit_cycle
, ii
);
1996 fprintf (dump_file
, "Both crit_pred and crit_succ are NULL\n");
1998 return SMODULO ((low
+ up
+ 1) / 2, ii
);
2002 verify_partial_schedule (partial_schedule_ptr ps
, sbitmap sched_nodes
)
2005 ps_insn_ptr crr_insn
;
2007 for (row
= 0; row
< ps
->ii
; row
++)
2008 for (crr_insn
= ps
->rows
[row
]; crr_insn
; crr_insn
= crr_insn
->next_in_row
)
2010 ddg_node_ptr u
= crr_insn
->node
;
2012 gcc_assert (TEST_BIT (sched_nodes
, u
->cuid
));
2013 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2014 popcount (sched_nodes) == number of insns in ps. */
2015 gcc_assert (SCHED_TIME (u
) >= ps
->min_cycle
);
2016 gcc_assert (SCHED_TIME (u
) <= ps
->max_cycle
);
2021 /* This page implements the algorithm for ordering the nodes of a DDG
2022 for modulo scheduling, activated through the
2023 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2025 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2026 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2027 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2028 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2029 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2030 #define DEPTH(x) (ASAP ((x)))
2032 typedef struct node_order_params
* nopa
;
2034 static void order_nodes_of_sccs (ddg_all_sccs_ptr
, int * result
);
2035 static int order_nodes_in_scc (ddg_ptr
, sbitmap
, sbitmap
, int*, int);
2036 static nopa
calculate_order_params (ddg_ptr
, int, int *);
2037 static int find_max_asap (ddg_ptr
, sbitmap
);
2038 static int find_max_hv_min_mob (ddg_ptr
, sbitmap
);
2039 static int find_max_dv_min_mob (ddg_ptr
, sbitmap
);
2041 enum sms_direction
{BOTTOMUP
, TOPDOWN
};
2043 struct node_order_params
2050 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2052 check_nodes_order (int *node_order
, int num_nodes
)
2055 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2060 fprintf (dump_file
, "SMS final nodes order: \n");
2062 for (i
= 0; i
< num_nodes
; i
++)
2064 int u
= node_order
[i
];
2067 fprintf (dump_file
, "%d ", u
);
2068 gcc_assert (u
< num_nodes
&& u
>= 0 && !TEST_BIT (tmp
, u
));
2074 fprintf (dump_file
, "\n");
2079 /* Order the nodes of G for scheduling and pass the result in
2080 NODE_ORDER. Also set aux.count of each node to ASAP.
2081 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2083 sms_order_nodes (ddg_ptr g
, int mii
, int * node_order
, int *pmax_asap
)
2087 ddg_all_sccs_ptr sccs
= create_ddg_all_sccs (g
);
2089 nopa nops
= calculate_order_params (g
, mii
, pmax_asap
);
2092 print_sccs (dump_file
, sccs
, g
);
2094 order_nodes_of_sccs (sccs
, node_order
);
2096 if (sccs
->num_sccs
> 0)
2097 /* First SCC has the largest recurrence_length. */
2098 rec_mii
= sccs
->sccs
[0]->recurrence_length
;
2100 /* Save ASAP before destroying node_order_params. */
2101 for (i
= 0; i
< g
->num_nodes
; i
++)
2103 ddg_node_ptr v
= &g
->nodes
[i
];
2104 v
->aux
.count
= ASAP (v
);
2108 free_ddg_all_sccs (sccs
);
2109 check_nodes_order (node_order
, g
->num_nodes
);
2115 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs
, int * node_order
)
2118 ddg_ptr g
= all_sccs
->ddg
;
2119 int num_nodes
= g
->num_nodes
;
2120 sbitmap prev_sccs
= sbitmap_alloc (num_nodes
);
2121 sbitmap on_path
= sbitmap_alloc (num_nodes
);
2122 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2123 sbitmap ones
= sbitmap_alloc (num_nodes
);
2125 sbitmap_zero (prev_sccs
);
2126 sbitmap_ones (ones
);
2128 /* Perform the node ordering starting from the SCC with the highest recMII.
2129 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2130 for (i
= 0; i
< all_sccs
->num_sccs
; i
++)
2132 ddg_scc_ptr scc
= all_sccs
->sccs
[i
];
2134 /* Add nodes on paths from previous SCCs to the current SCC. */
2135 find_nodes_on_paths (on_path
, g
, prev_sccs
, scc
->nodes
);
2136 sbitmap_a_or_b (tmp
, scc
->nodes
, on_path
);
2138 /* Add nodes on paths from the current SCC to previous SCCs. */
2139 find_nodes_on_paths (on_path
, g
, scc
->nodes
, prev_sccs
);
2140 sbitmap_a_or_b (tmp
, tmp
, on_path
);
2142 /* Remove nodes of previous SCCs from current extended SCC. */
2143 sbitmap_difference (tmp
, tmp
, prev_sccs
);
2145 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2146 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2149 /* Handle the remaining nodes that do not belong to any scc. Each call
2150 to order_nodes_in_scc handles a single connected component. */
2151 while (pos
< g
->num_nodes
)
2153 sbitmap_difference (tmp
, ones
, prev_sccs
);
2154 pos
= order_nodes_in_scc (g
, prev_sccs
, tmp
, node_order
, pos
);
2156 sbitmap_free (prev_sccs
);
2157 sbitmap_free (on_path
);
2159 sbitmap_free (ones
);
2162 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2163 static struct node_order_params
*
2164 calculate_order_params (ddg_ptr g
, int mii ATTRIBUTE_UNUSED
, int *pmax_asap
)
2168 int num_nodes
= g
->num_nodes
;
2170 /* Allocate a place to hold ordering params for each node in the DDG. */
2171 nopa node_order_params_arr
;
2173 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2174 node_order_params_arr
= (nopa
) xcalloc (num_nodes
,
2175 sizeof (struct node_order_params
));
2177 /* Set the aux pointer of each node to point to its order_params structure. */
2178 for (u
= 0; u
< num_nodes
; u
++)
2179 g
->nodes
[u
].aux
.info
= &node_order_params_arr
[u
];
2181 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2182 calculate ASAP, ALAP, mobility, distance, and height for each node
2183 in the dependence (direct acyclic) graph. */
2185 /* We assume that the nodes in the array are in topological order. */
2188 for (u
= 0; u
< num_nodes
; u
++)
2190 ddg_node_ptr u_node
= &g
->nodes
[u
];
2193 for (e
= u_node
->in
; e
; e
= e
->next_in
)
2194 if (e
->distance
== 0)
2195 ASAP (u_node
) = MAX (ASAP (u_node
),
2196 ASAP (e
->src
) + e
->latency
);
2197 max_asap
= MAX (max_asap
, ASAP (u_node
));
2200 for (u
= num_nodes
- 1; u
> -1; u
--)
2202 ddg_node_ptr u_node
= &g
->nodes
[u
];
2204 ALAP (u_node
) = max_asap
;
2205 HEIGHT (u_node
) = 0;
2206 for (e
= u_node
->out
; e
; e
= e
->next_out
)
2207 if (e
->distance
== 0)
2209 ALAP (u_node
) = MIN (ALAP (u_node
),
2210 ALAP (e
->dest
) - e
->latency
);
2211 HEIGHT (u_node
) = MAX (HEIGHT (u_node
),
2212 HEIGHT (e
->dest
) + e
->latency
);
2217 fprintf (dump_file
, "\nOrder params\n");
2218 for (u
= 0; u
< num_nodes
; u
++)
2220 ddg_node_ptr u_node
= &g
->nodes
[u
];
2222 fprintf (dump_file
, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u
,
2223 ASAP (u_node
), ALAP (u_node
), HEIGHT (u_node
));
2227 *pmax_asap
= max_asap
;
2228 return node_order_params_arr
;
2232 find_max_asap (ddg_ptr g
, sbitmap nodes
)
2237 sbitmap_iterator sbi
;
2239 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2241 ddg_node_ptr u_node
= &g
->nodes
[u
];
2243 if (max_asap
< ASAP (u_node
))
2245 max_asap
= ASAP (u_node
);
2253 find_max_hv_min_mob (ddg_ptr g
, sbitmap nodes
)
2257 int min_mob
= INT_MAX
;
2259 sbitmap_iterator sbi
;
2261 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2263 ddg_node_ptr u_node
= &g
->nodes
[u
];
2265 if (max_hv
< HEIGHT (u_node
))
2267 max_hv
= HEIGHT (u_node
);
2268 min_mob
= MOB (u_node
);
2271 else if ((max_hv
== HEIGHT (u_node
))
2272 && (min_mob
> MOB (u_node
)))
2274 min_mob
= MOB (u_node
);
2282 find_max_dv_min_mob (ddg_ptr g
, sbitmap nodes
)
2286 int min_mob
= INT_MAX
;
2288 sbitmap_iterator sbi
;
2290 EXECUTE_IF_SET_IN_SBITMAP (nodes
, 0, u
, sbi
)
2292 ddg_node_ptr u_node
= &g
->nodes
[u
];
2294 if (max_dv
< DEPTH (u_node
))
2296 max_dv
= DEPTH (u_node
);
2297 min_mob
= MOB (u_node
);
2300 else if ((max_dv
== DEPTH (u_node
))
2301 && (min_mob
> MOB (u_node
)))
2303 min_mob
= MOB (u_node
);
2310 /* Places the nodes of SCC into the NODE_ORDER array starting
2311 at position POS, according to the SMS ordering algorithm.
2312 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2313 the NODE_ORDER array, starting from position zero. */
2315 order_nodes_in_scc (ddg_ptr g
, sbitmap nodes_ordered
, sbitmap scc
,
2316 int * node_order
, int pos
)
2318 enum sms_direction dir
;
2319 int num_nodes
= g
->num_nodes
;
2320 sbitmap workset
= sbitmap_alloc (num_nodes
);
2321 sbitmap tmp
= sbitmap_alloc (num_nodes
);
2322 sbitmap zero_bitmap
= sbitmap_alloc (num_nodes
);
2323 sbitmap predecessors
= sbitmap_alloc (num_nodes
);
2324 sbitmap successors
= sbitmap_alloc (num_nodes
);
2326 sbitmap_zero (predecessors
);
2327 find_predecessors (predecessors
, g
, nodes_ordered
);
2329 sbitmap_zero (successors
);
2330 find_successors (successors
, g
, nodes_ordered
);
2333 if (sbitmap_a_and_b_cg (tmp
, predecessors
, scc
))
2335 sbitmap_copy (workset
, tmp
);
2338 else if (sbitmap_a_and_b_cg (tmp
, successors
, scc
))
2340 sbitmap_copy (workset
, tmp
);
2347 sbitmap_zero (workset
);
2348 if ((u
= find_max_asap (g
, scc
)) >= 0)
2349 SET_BIT (workset
, u
);
2353 sbitmap_zero (zero_bitmap
);
2354 while (!sbitmap_equal (workset
, zero_bitmap
))
2357 ddg_node_ptr v_node
;
2358 sbitmap v_node_preds
;
2359 sbitmap v_node_succs
;
2363 while (!sbitmap_equal (workset
, zero_bitmap
))
2365 v
= find_max_hv_min_mob (g
, workset
);
2366 v_node
= &g
->nodes
[v
];
2367 node_order
[pos
++] = v
;
2368 v_node_succs
= NODE_SUCCESSORS (v_node
);
2369 sbitmap_a_and_b (tmp
, v_node_succs
, scc
);
2371 /* Don't consider the already ordered successors again. */
2372 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2373 sbitmap_a_or_b (workset
, workset
, tmp
);
2374 RESET_BIT (workset
, v
);
2375 SET_BIT (nodes_ordered
, v
);
2378 sbitmap_zero (predecessors
);
2379 find_predecessors (predecessors
, g
, nodes_ordered
);
2380 sbitmap_a_and_b (workset
, predecessors
, scc
);
2384 while (!sbitmap_equal (workset
, zero_bitmap
))
2386 v
= find_max_dv_min_mob (g
, workset
);
2387 v_node
= &g
->nodes
[v
];
2388 node_order
[pos
++] = v
;
2389 v_node_preds
= NODE_PREDECESSORS (v_node
);
2390 sbitmap_a_and_b (tmp
, v_node_preds
, scc
);
2392 /* Don't consider the already ordered predecessors again. */
2393 sbitmap_difference (tmp
, tmp
, nodes_ordered
);
2394 sbitmap_a_or_b (workset
, workset
, tmp
);
2395 RESET_BIT (workset
, v
);
2396 SET_BIT (nodes_ordered
, v
);
2399 sbitmap_zero (successors
);
2400 find_successors (successors
, g
, nodes_ordered
);
2401 sbitmap_a_and_b (workset
, successors
, scc
);
2405 sbitmap_free (workset
);
2406 sbitmap_free (zero_bitmap
);
2407 sbitmap_free (predecessors
);
2408 sbitmap_free (successors
);
2413 /* This page contains functions for manipulating partial-schedules during
2414 modulo scheduling. */
2416 /* Create a partial schedule and allocate a memory to hold II rows. */
2418 static partial_schedule_ptr
2419 create_partial_schedule (int ii
, ddg_ptr g
, int history
)
2421 partial_schedule_ptr ps
= XNEW (struct partial_schedule
);
2422 ps
->rows
= (ps_insn_ptr
*) xcalloc (ii
, sizeof (ps_insn_ptr
));
2424 ps
->history
= history
;
2425 ps
->min_cycle
= INT_MAX
;
2426 ps
->max_cycle
= INT_MIN
;
2432 /* Free the PS_INSNs in rows array of the given partial schedule.
2433 ??? Consider caching the PS_INSN's. */
2435 free_ps_insns (partial_schedule_ptr ps
)
2439 for (i
= 0; i
< ps
->ii
; i
++)
2443 ps_insn_ptr ps_insn
= ps
->rows
[i
]->next_in_row
;
2446 ps
->rows
[i
] = ps_insn
;
2452 /* Free all the memory allocated to the partial schedule. */
2455 free_partial_schedule (partial_schedule_ptr ps
)
2464 /* Clear the rows array with its PS_INSNs, and create a new one with
2468 reset_partial_schedule (partial_schedule_ptr ps
, int new_ii
)
2473 if (new_ii
== ps
->ii
)
2475 ps
->rows
= (ps_insn_ptr
*) xrealloc (ps
->rows
, new_ii
2476 * sizeof (ps_insn_ptr
));
2477 memset (ps
->rows
, 0, new_ii
* sizeof (ps_insn_ptr
));
2479 ps
->min_cycle
= INT_MAX
;
2480 ps
->max_cycle
= INT_MIN
;
2483 /* Prints the partial schedule as an ii rows array, for each rows
2484 print the ids of the insns in it. */
2486 print_partial_schedule (partial_schedule_ptr ps
, FILE *dump
)
2490 for (i
= 0; i
< ps
->ii
; i
++)
2492 ps_insn_ptr ps_i
= ps
->rows
[i
];
2494 fprintf (dump
, "\n[ROW %d ]: ", i
);
2497 fprintf (dump
, "%d, ",
2498 INSN_UID (ps_i
->node
->insn
));
2499 ps_i
= ps_i
->next_in_row
;
2504 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2506 create_ps_insn (ddg_node_ptr node
, int rest_count
, int cycle
)
2508 ps_insn_ptr ps_i
= XNEW (struct ps_insn
);
2511 ps_i
->next_in_row
= NULL
;
2512 ps_i
->prev_in_row
= NULL
;
2513 ps_i
->row_rest_count
= rest_count
;
2514 ps_i
->cycle
= cycle
;
2520 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2521 node is not found in the partial schedule, else returns true. */
2523 remove_node_from_ps (partial_schedule_ptr ps
, ps_insn_ptr ps_i
)
2530 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2531 if (! ps_i
->prev_in_row
)
2533 if (ps_i
!= ps
->rows
[row
])
2536 ps
->rows
[row
] = ps_i
->next_in_row
;
2538 ps
->rows
[row
]->prev_in_row
= NULL
;
2542 ps_i
->prev_in_row
->next_in_row
= ps_i
->next_in_row
;
2543 if (ps_i
->next_in_row
)
2544 ps_i
->next_in_row
->prev_in_row
= ps_i
->prev_in_row
;
2550 /* Unlike what literature describes for modulo scheduling (which focuses
2551 on VLIW machines) the order of the instructions inside a cycle is
2552 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2553 where the current instruction should go relative to the already
2554 scheduled instructions in the given cycle. Go over these
2555 instructions and find the first possible column to put it in. */
2557 ps_insn_find_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2558 sbitmap must_precede
, sbitmap must_follow
)
2560 ps_insn_ptr next_ps_i
;
2561 ps_insn_ptr first_must_follow
= NULL
;
2562 ps_insn_ptr last_must_precede
= NULL
;
2568 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2570 /* Find the first must follow and the last must precede
2571 and insert the node immediately after the must precede
2572 but make sure that it there is no must follow after it. */
2573 for (next_ps_i
= ps
->rows
[row
];
2575 next_ps_i
= next_ps_i
->next_in_row
)
2577 if (must_follow
&& TEST_BIT (must_follow
, next_ps_i
->node
->cuid
)
2578 && ! first_must_follow
)
2579 first_must_follow
= next_ps_i
;
2580 if (must_precede
&& TEST_BIT (must_precede
, next_ps_i
->node
->cuid
))
2582 /* If we have already met a node that must follow, then
2583 there is no possible column. */
2584 if (first_must_follow
)
2587 last_must_precede
= next_ps_i
;
2591 /* Now insert the node after INSERT_AFTER_PSI. */
2593 if (! last_must_precede
)
2595 ps_i
->next_in_row
= ps
->rows
[row
];
2596 ps_i
->prev_in_row
= NULL
;
2597 if (ps_i
->next_in_row
)
2598 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2599 ps
->rows
[row
] = ps_i
;
2603 ps_i
->next_in_row
= last_must_precede
->next_in_row
;
2604 last_must_precede
->next_in_row
= ps_i
;
2605 ps_i
->prev_in_row
= last_must_precede
;
2606 if (ps_i
->next_in_row
)
2607 ps_i
->next_in_row
->prev_in_row
= ps_i
;
2613 /* Advances the PS_INSN one column in its current row; returns false
2614 in failure and true in success. Bit N is set in MUST_FOLLOW if
2615 the node with cuid N must be come after the node pointed to by
2616 PS_I when scheduled in the same cycle. */
2618 ps_insn_advance_column (partial_schedule_ptr ps
, ps_insn_ptr ps_i
,
2619 sbitmap must_follow
)
2621 ps_insn_ptr prev
, next
;
2623 ddg_node_ptr next_node
;
2628 row
= SMODULO (ps_i
->cycle
, ps
->ii
);
2630 if (! ps_i
->next_in_row
)
2633 next_node
= ps_i
->next_in_row
->node
;
2635 /* Check if next_in_row is dependent on ps_i, both having same sched
2636 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2637 if (must_follow
&& TEST_BIT (must_follow
, next_node
->cuid
))
2640 /* Advance PS_I over its next_in_row in the doubly linked list. */
2641 prev
= ps_i
->prev_in_row
;
2642 next
= ps_i
->next_in_row
;
2644 if (ps_i
== ps
->rows
[row
])
2645 ps
->rows
[row
] = next
;
2647 ps_i
->next_in_row
= next
->next_in_row
;
2649 if (next
->next_in_row
)
2650 next
->next_in_row
->prev_in_row
= ps_i
;
2652 next
->next_in_row
= ps_i
;
2653 ps_i
->prev_in_row
= next
;
2655 next
->prev_in_row
= prev
;
2657 prev
->next_in_row
= next
;
2662 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2663 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2664 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2665 before/after (respectively) the node pointed to by PS_I when scheduled
2666 in the same cycle. */
2668 add_node_to_ps (partial_schedule_ptr ps
, ddg_node_ptr node
, int cycle
,
2669 sbitmap must_precede
, sbitmap must_follow
)
2673 int row
= SMODULO (cycle
, ps
->ii
);
2676 && ps
->rows
[row
]->row_rest_count
>= issue_rate
)
2680 rest_count
+= ps
->rows
[row
]->row_rest_count
;
2682 ps_i
= create_ps_insn (node
, rest_count
, cycle
);
2684 /* Finds and inserts PS_I according to MUST_FOLLOW and
2686 if (! ps_insn_find_column (ps
, ps_i
, must_precede
, must_follow
))
2695 /* Advance time one cycle. Assumes DFA is being used. */
2697 advance_one_cycle (void)
2699 if (targetm
.sched
.dfa_pre_cycle_insn
)
2700 state_transition (curr_state
,
2701 targetm
.sched
.dfa_pre_cycle_insn ());
2703 state_transition (curr_state
, NULL
);
2705 if (targetm
.sched
.dfa_post_cycle_insn
)
2706 state_transition (curr_state
,
2707 targetm
.sched
.dfa_post_cycle_insn ());
2712 /* Checks if PS has resource conflicts according to DFA, starting from
2713 FROM cycle to TO cycle; returns true if there are conflicts and false
2714 if there are no conflicts. Assumes DFA is being used. */
2716 ps_has_conflicts (partial_schedule_ptr ps
, int from
, int to
)
2720 state_reset (curr_state
);
2722 for (cycle
= from
; cycle
<= to
; cycle
++)
2724 ps_insn_ptr crr_insn
;
2725 /* Holds the remaining issue slots in the current row. */
2726 int can_issue_more
= issue_rate
;
2728 /* Walk through the DFA for the current row. */
2729 for (crr_insn
= ps
->rows
[SMODULO (cycle
, ps
->ii
)];
2731 crr_insn
= crr_insn
->next_in_row
)
2733 rtx insn
= crr_insn
->node
->insn
;
2738 /* Check if there is room for the current insn. */
2739 if (!can_issue_more
|| state_dead_lock_p (curr_state
))
2742 /* Update the DFA state and return with failure if the DFA found
2743 resource conflicts. */
2744 if (state_transition (curr_state
, insn
) >= 0)
2747 if (targetm
.sched
.variable_issue
)
2749 targetm
.sched
.variable_issue (sched_dump
, sched_verbose
,
2750 insn
, can_issue_more
);
2751 /* A naked CLOBBER or USE generates no instruction, so don't
2752 let them consume issue slots. */
2753 else if (GET_CODE (PATTERN (insn
)) != USE
2754 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
2758 /* Advance the DFA to the next cycle. */
2759 advance_one_cycle ();
2764 /* Checks if the given node causes resource conflicts when added to PS at
2765 cycle C. If not the node is added to PS and returned; otherwise zero
2766 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2767 cuid N must be come before/after (respectively) the node pointed to by
2768 PS_I when scheduled in the same cycle. */
2770 ps_add_node_check_conflicts (partial_schedule_ptr ps
, ddg_node_ptr n
,
2771 int c
, sbitmap must_precede
,
2772 sbitmap must_follow
)
2774 int has_conflicts
= 0;
2777 /* First add the node to the PS, if this succeeds check for
2778 conflicts, trying different issue slots in the same row. */
2779 if (! (ps_i
= add_node_to_ps (ps
, n
, c
, must_precede
, must_follow
)))
2780 return NULL
; /* Failed to insert the node at the given cycle. */
2782 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2784 && ps_has_conflicts (ps
,
2788 /* Try different issue slots to find one that the given node can be
2789 scheduled in without conflicts. */
2790 while (has_conflicts
)
2792 if (! ps_insn_advance_column (ps
, ps_i
, must_follow
))
2794 has_conflicts
= ps_has_conflicts (ps
, c
, c
)
2796 && ps_has_conflicts (ps
,
2803 remove_node_from_ps (ps
, ps_i
);
2807 ps
->min_cycle
= MIN (ps
->min_cycle
, c
);
2808 ps
->max_cycle
= MAX (ps
->max_cycle
, c
);
2812 /* Rotate the rows of PS such that insns scheduled at time
2813 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2815 rotate_partial_schedule (partial_schedule_ptr ps
, int start_cycle
)
2817 int i
, row
, backward_rotates
;
2818 int last_row
= ps
->ii
- 1;
2820 if (start_cycle
== 0)
2823 backward_rotates
= SMODULO (start_cycle
, ps
->ii
);
2825 /* Revisit later and optimize this into a single loop. */
2826 for (i
= 0; i
< backward_rotates
; i
++)
2828 ps_insn_ptr first_row
= ps
->rows
[0];
2830 for (row
= 0; row
< last_row
; row
++)
2831 ps
->rows
[row
] = ps
->rows
[row
+1];
2833 ps
->rows
[last_row
] = first_row
;
2836 ps
->max_cycle
-= start_cycle
;
2837 ps
->min_cycle
-= start_cycle
;
2840 #endif /* INSN_SCHEDULING */
2843 gate_handle_sms (void)
2845 return (optimize
> 0 && flag_modulo_sched
);
2849 /* Run instruction scheduler. */
2850 /* Perform SMS module scheduling. */
2852 rest_of_handle_sms (void)
2854 #ifdef INSN_SCHEDULING
2857 /* Collect loop information to be used in SMS. */
2858 cfg_layout_initialize (0);
2861 /* Update the life information, because we add pseudos. */
2862 max_regno
= max_reg_num ();
2864 /* Finalize layout changes. */
2866 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2867 bb
->aux
= bb
->next_bb
;
2868 free_dominance_info (CDI_DOMINATORS
);
2869 cfg_layout_finalize ();
2870 #endif /* INSN_SCHEDULING */
2874 struct rtl_opt_pass pass_sms
=
2879 gate_handle_sms
, /* gate */
2880 rest_of_handle_sms
, /* execute */
2883 0, /* static_pass_number */
2885 0, /* properties_required */
2886 0, /* properties_provided */
2887 0, /* properties_destroyed */
2888 TODO_dump_func
, /* todo_flags_start */
2889 TODO_df_finish
| TODO_verify_rtl_sharing
|
2891 TODO_ggc_collect
/* todo_flags_finish */