* tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new block.
[official-gcc.git] / gcc / modulo-sched.c
blob56df63c894c14768a4657ade6a7cc2ac180d3263
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "hard-reg-set.h"
32 #include "regs.h"
33 #include "function.h"
34 #include "flags.h"
35 #include "insn-config.h"
36 #include "insn-attr.h"
37 #include "except.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "sched-int.h"
41 #include "target.h"
42 #include "cfglayout.h"
43 #include "cfgloop.h"
44 #include "cfghooks.h"
45 #include "expr.h"
46 #include "params.h"
47 #include "gcov-io.h"
48 #include "df.h"
49 #include "ddg.h"
50 #include "timevar.h"
51 #include "tree-pass.h"
53 #ifdef INSN_SCHEDULING
55 /* This file contains the implementation of the Swing Modulo Scheduler,
56 described in the following references:
57 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58 Lifetime--sensitive modulo scheduling in a production environment.
59 IEEE Trans. on Comps., 50(3), March 2001
60 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
64 The basic structure is:
65 1. Build a data-dependence graph (DDG) for each loop.
66 2. Use the DDG to order the insns of a loop (not in topological order
67 necessarily, but rather) trying to place each insn after all its
68 predecessors _or_ after all its successors.
69 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70 4. Use the ordering to perform list-scheduling of the loop:
71 1. Set II = MII. We will try to schedule the loop within II cycles.
72 2. Try to schedule the insns one by one according to the ordering.
73 For each insn compute an interval of cycles by considering already-
74 scheduled preds and succs (and associated latencies); try to place
75 the insn in the cycles of this window checking for potential
76 resource conflicts (using the DFA interface).
77 Note: this is different from the cycle-scheduling of schedule_insns;
78 here the insns are not scheduled monotonically top-down (nor bottom-
79 up).
80 3. If failed in scheduling all insns - bump II++ and try again, unless
81 II reaches an upper bound MaxII, in which case report failure.
82 5. If we succeeded in scheduling the loop within II cycles, we now
83 generate prolog and epilog, decrease the counter of the loop, and
84 perform modulo variable expansion for live ranges that span more than
85 II cycles (i.e. use register copies to prevent a def from overwriting
86 itself before reaching the use).
90 /* This page defines partial-schedule structures and functions for
91 modulo scheduling. */
93 typedef struct partial_schedule *partial_schedule_ptr;
94 typedef struct ps_insn *ps_insn_ptr;
96 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
97 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
99 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
100 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
102 /* Perform signed modulo, always returning a non-negative value. */
103 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
105 /* The number of different iterations the nodes in ps span, assuming
106 the stage boundaries are placed efficiently. */
107 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
108 + 1 + (ps)->ii - 1) / (ps)->ii)
110 /* A single instruction in the partial schedule. */
111 struct ps_insn
113 /* The corresponding DDG_NODE. */
114 ddg_node_ptr node;
116 /* The (absolute) cycle in which the PS instruction is scheduled.
117 Same as SCHED_TIME (node). */
118 int cycle;
120 /* The next/prev PS_INSN in the same row. */
121 ps_insn_ptr next_in_row,
122 prev_in_row;
124 /* The number of nodes in the same row that come after this node. */
125 int row_rest_count;
128 /* Holds the partial schedule as an array of II rows. Each entry of the
129 array points to a linked list of PS_INSNs, which represents the
130 instructions that are scheduled for that row. */
131 struct partial_schedule
133 int ii; /* Number of rows in the partial schedule. */
134 int history; /* Threshold for conflict checking using DFA. */
136 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 ps_insn_ptr *rows;
139 /* The earliest absolute cycle of an insn in the partial schedule. */
140 int min_cycle;
142 /* The latest absolute cycle of an insn in the partial schedule. */
143 int max_cycle;
145 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
148 /* We use this to record all the register replacements we do in
149 the kernel so we can undo SMS if it is not profitable. */
150 struct undo_replace_buff_elem
152 rtx insn;
153 rtx orig_reg;
154 rtx new_reg;
155 struct undo_replace_buff_elem *next;
160 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
161 static void free_partial_schedule (partial_schedule_ptr);
162 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
163 void print_partial_schedule (partial_schedule_ptr, FILE *);
164 static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
165 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
166 ddg_node_ptr node, int cycle,
167 sbitmap must_precede,
168 sbitmap must_follow);
169 static void rotate_partial_schedule (partial_schedule_ptr, int);
170 void set_row_column_for_ps (partial_schedule_ptr);
171 static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
174 /* This page defines constants and structures for the modulo scheduling
175 driver. */
177 /* As in haifa-sched.c: */
178 /* issue_rate is the number of insns that can be scheduled in the same
179 machine cycle. It can be defined in the config/mach/mach.h file,
180 otherwise we set it to 1. */
182 static int issue_rate;
184 static int sms_order_nodes (ddg_ptr, int, int * result);
185 static void set_node_sched_params (ddg_ptr);
186 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
187 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
188 static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
189 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
190 int from_stage, int to_stage,
191 int is_prolog);
193 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
194 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
195 #define SCHED_FIRST_REG_MOVE(x) \
196 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
197 #define SCHED_NREG_MOVES(x) \
198 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
199 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
200 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
201 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
203 /* The scheduling parameters held for each node. */
204 typedef struct node_sched_params
206 int asap; /* A lower-bound on the absolute scheduling cycle. */
207 int time; /* The absolute scheduling cycle (time >= asap). */
209 /* The following field (first_reg_move) is a pointer to the first
210 register-move instruction added to handle the modulo-variable-expansion
211 of the register defined by this node. This register-move copies the
212 original register defined by the node. */
213 rtx first_reg_move;
215 /* The number of register-move instructions added, immediately preceding
216 first_reg_move. */
217 int nreg_moves;
219 int row; /* Holds time % ii. */
220 int stage; /* Holds time / ii. */
222 /* The column of a node inside the ps. If nodes u, v are on the same row,
223 u will precede v if column (u) < column (v). */
224 int column;
225 } *node_sched_params_ptr;
228 /* The following three functions are copied from the current scheduler
229 code in order to use sched_analyze() for computing the dependencies.
230 They are used when initializing the sched_info structure. */
231 static const char *
232 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
234 static char tmp[80];
236 sprintf (tmp, "i%4d", INSN_UID (insn));
237 return tmp;
240 static void
241 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
242 regset cond_exec ATTRIBUTE_UNUSED,
243 regset used ATTRIBUTE_UNUSED,
244 regset set ATTRIBUTE_UNUSED)
248 static struct sched_info sms_sched_info =
250 NULL,
251 NULL,
252 NULL,
253 NULL,
254 NULL,
255 sms_print_insn,
256 NULL,
257 compute_jump_reg_dependencies,
258 NULL, NULL,
259 NULL, NULL,
260 0, 0, 0,
262 NULL, NULL, NULL, NULL, NULL,
263 #ifdef ENABLE_CHECKING
264 NULL,
265 #endif
270 /* Return the register decremented and tested in INSN,
271 or zero if it is not a decrement-and-branch insn. */
273 static rtx
274 doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
276 #ifdef HAVE_doloop_end
277 rtx pattern, reg, condition;
279 if (! JUMP_P (insn))
280 return NULL_RTX;
282 pattern = PATTERN (insn);
283 condition = doloop_condition_get (pattern);
284 if (! condition)
285 return NULL_RTX;
287 if (REG_P (XEXP (condition, 0)))
288 reg = XEXP (condition, 0);
289 else if (GET_CODE (XEXP (condition, 0)) == PLUS
290 && REG_P (XEXP (XEXP (condition, 0), 0)))
291 reg = XEXP (XEXP (condition, 0), 0);
292 else
293 gcc_unreachable ();
295 return reg;
296 #else
297 return NULL_RTX;
298 #endif
301 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
302 that the number of iterations is a compile-time constant. If so,
303 return the rtx that sets COUNT_REG to a constant, and set COUNT to
304 this constant. Otherwise return 0. */
305 static rtx
306 const_iteration_count (rtx count_reg, basic_block pre_header,
307 HOST_WIDEST_INT * count)
309 rtx insn;
310 rtx head, tail;
312 if (! pre_header)
313 return NULL_RTX;
315 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
317 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
318 if (INSN_P (insn) && single_set (insn) &&
319 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
321 rtx pat = single_set (insn);
323 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
325 *count = INTVAL (SET_SRC (pat));
326 return insn;
329 return NULL_RTX;
332 return NULL_RTX;
335 /* A very simple resource-based lower bound on the initiation interval.
336 ??? Improve the accuracy of this bound by considering the
337 utilization of various units. */
338 static int
339 res_MII (ddg_ptr g)
341 return (g->num_nodes / issue_rate);
345 /* Points to the array that contains the sched data for each node. */
346 static node_sched_params_ptr node_sched_params;
348 /* Allocate sched_params for each node and initialize it. Assumes that
349 the aux field of each node contain the asap bound (computed earlier),
350 and copies it into the sched_params field. */
351 static void
352 set_node_sched_params (ddg_ptr g)
354 int i;
356 /* Allocate for each node in the DDG a place to hold the "sched_data". */
357 /* Initialize ASAP/ALAP/HIGHT to zero. */
358 node_sched_params = (node_sched_params_ptr)
359 xcalloc (g->num_nodes,
360 sizeof (struct node_sched_params));
362 /* Set the pointer of the general data of the node to point to the
363 appropriate sched_params structure. */
364 for (i = 0; i < g->num_nodes; i++)
366 /* Watch out for aliasing problems? */
367 node_sched_params[i].asap = g->nodes[i].aux.count;
368 g->nodes[i].aux.info = &node_sched_params[i];
372 static void
373 print_node_sched_params (FILE * file, int num_nodes)
375 int i;
377 if (! file)
378 return;
379 for (i = 0; i < num_nodes; i++)
381 node_sched_params_ptr nsp = &node_sched_params[i];
382 rtx reg_move = nsp->first_reg_move;
383 int j;
385 fprintf (file, "Node %d:\n", i);
386 fprintf (file, " asap = %d:\n", nsp->asap);
387 fprintf (file, " time = %d:\n", nsp->time);
388 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
389 for (j = 0; j < nsp->nreg_moves; j++)
391 fprintf (file, " reg_move = ");
392 print_rtl_single (file, reg_move);
393 reg_move = PREV_INSN (reg_move);
398 /* Calculate an upper bound for II. SMS should not schedule the loop if it
399 requires more cycles than this bound. Currently set to the sum of the
400 longest latency edge for each node. Reset based on experiments. */
401 static int
402 calculate_maxii (ddg_ptr g)
404 int i;
405 int maxii = 0;
407 for (i = 0; i < g->num_nodes; i++)
409 ddg_node_ptr u = &g->nodes[i];
410 ddg_edge_ptr e;
411 int max_edge_latency = 0;
413 for (e = u->out; e; e = e->next_out)
414 max_edge_latency = MAX (max_edge_latency, e->latency);
416 maxii += max_edge_latency;
418 return maxii;
422 Breaking intra-loop register anti-dependences:
423 Each intra-loop register anti-dependence implies a cross-iteration true
424 dependence of distance 1. Therefore, we can remove such false dependencies
425 and figure out if the partial schedule broke them by checking if (for a
426 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
427 if so generate a register move. The number of such moves is equal to:
428 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
429 nreg_moves = ----------------------------------- + 1 - { dependence.
430 ii { 1 if not.
432 static struct undo_replace_buff_elem *
433 generate_reg_moves (partial_schedule_ptr ps)
435 ddg_ptr g = ps->g;
436 int ii = ps->ii;
437 int i;
438 struct undo_replace_buff_elem *reg_move_replaces = NULL;
440 for (i = 0; i < g->num_nodes; i++)
442 ddg_node_ptr u = &g->nodes[i];
443 ddg_edge_ptr e;
444 int nreg_moves = 0, i_reg_move;
445 sbitmap *uses_of_defs;
446 rtx last_reg_move;
447 rtx prev_reg, old_reg;
449 /* Compute the number of reg_moves needed for u, by looking at life
450 ranges started at u (excluding self-loops). */
451 for (e = u->out; e; e = e->next_out)
452 if (e->type == TRUE_DEP && e->dest != e->src)
454 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
456 if (e->distance == 1)
457 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
459 /* If dest precedes src in the schedule of the kernel, then dest
460 will read before src writes and we can save one reg_copy. */
461 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
462 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
463 nreg_moves4e--;
465 nreg_moves = MAX (nreg_moves, nreg_moves4e);
468 if (nreg_moves == 0)
469 continue;
471 /* Every use of the register defined by node may require a different
472 copy of this register, depending on the time the use is scheduled.
473 Set a bitmap vector, telling which nodes use each copy of this
474 register. */
475 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
476 sbitmap_vector_zero (uses_of_defs, nreg_moves);
477 for (e = u->out; e; e = e->next_out)
478 if (e->type == TRUE_DEP && e->dest != e->src)
480 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
482 if (e->distance == 1)
483 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
485 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
486 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
487 dest_copy--;
489 if (dest_copy)
490 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
493 /* Now generate the reg_moves, attaching relevant uses to them. */
494 SCHED_NREG_MOVES (u) = nreg_moves;
495 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
496 last_reg_move = u->insn;
498 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
500 unsigned int i_use = 0;
501 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
502 rtx reg_move = gen_move_insn (new_reg, prev_reg);
503 sbitmap_iterator sbi;
505 add_insn_before (reg_move, last_reg_move);
506 last_reg_move = reg_move;
508 if (!SCHED_FIRST_REG_MOVE (u))
509 SCHED_FIRST_REG_MOVE (u) = reg_move;
511 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
513 struct undo_replace_buff_elem *rep;
515 rep = (struct undo_replace_buff_elem *)
516 xcalloc (1, sizeof (struct undo_replace_buff_elem));
517 rep->insn = g->nodes[i_use].insn;
518 rep->orig_reg = old_reg;
519 rep->new_reg = new_reg;
521 if (! reg_move_replaces)
522 reg_move_replaces = rep;
523 else
525 rep->next = reg_move_replaces;
526 reg_move_replaces = rep;
529 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
532 prev_reg = new_reg;
534 sbitmap_vector_free (uses_of_defs);
536 return reg_move_replaces;
539 /* We call this when we want to undo the SMS schedule for a given loop.
540 One of the things that we do is to delete the register moves generated
541 for the sake of SMS; this function deletes the register move instructions
542 recorded in the undo buffer. */
543 static void
544 undo_generate_reg_moves (partial_schedule_ptr ps,
545 struct undo_replace_buff_elem *reg_move_replaces)
547 int i,j;
549 for (i = 0; i < ps->g->num_nodes; i++)
551 ddg_node_ptr u = &ps->g->nodes[i];
552 rtx prev;
553 rtx crr = SCHED_FIRST_REG_MOVE (u);
555 for (j = 0; j < SCHED_NREG_MOVES (u); j++)
557 prev = PREV_INSN (crr);
558 delete_insn (crr);
559 crr = prev;
561 SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
564 while (reg_move_replaces)
566 struct undo_replace_buff_elem *rep = reg_move_replaces;
568 reg_move_replaces = reg_move_replaces->next;
569 replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
573 /* Free memory allocated for the undo buffer. */
574 static void
575 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
578 while (reg_move_replaces)
580 struct undo_replace_buff_elem *rep = reg_move_replaces;
582 reg_move_replaces = reg_move_replaces->next;
583 free (rep);
587 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
588 of SCHED_ROW and SCHED_STAGE. */
589 static void
590 normalize_sched_times (partial_schedule_ptr ps)
592 int i;
593 ddg_ptr g = ps->g;
594 int amount = PS_MIN_CYCLE (ps);
595 int ii = ps->ii;
597 /* Don't include the closing branch assuming that it is the last node. */
598 for (i = 0; i < g->num_nodes - 1; i++)
600 ddg_node_ptr u = &g->nodes[i];
601 int normalized_time = SCHED_TIME (u) - amount;
603 gcc_assert (normalized_time >= 0);
605 SCHED_TIME (u) = normalized_time;
606 SCHED_ROW (u) = normalized_time % ii;
607 SCHED_STAGE (u) = normalized_time / ii;
611 /* Set SCHED_COLUMN of each node according to its position in PS. */
612 static void
613 set_columns_for_ps (partial_schedule_ptr ps)
615 int row;
617 for (row = 0; row < ps->ii; row++)
619 ps_insn_ptr cur_insn = ps->rows[row];
620 int column = 0;
622 for (; cur_insn; cur_insn = cur_insn->next_in_row)
623 SCHED_COLUMN (cur_insn->node) = column++;
627 /* Permute the insns according to their order in PS, from row 0 to
628 row ii-1, and position them right before LAST. This schedules
629 the insns of the loop kernel. */
630 static void
631 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
633 int ii = ps->ii;
634 int row;
635 ps_insn_ptr ps_ij;
637 for (row = 0; row < ii ; row++)
638 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
639 if (PREV_INSN (last) != ps_ij->node->insn)
640 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
641 PREV_INSN (last));
644 /* As part of undoing SMS we return to the original ordering of the
645 instructions inside the loop kernel. Given the partial schedule PS, this
646 function returns the ordering of the instruction according to their CUID
647 in the DDG (PS->G), which is the original order of the instruction before
648 performing SMS. */
649 static void
650 undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
652 int i;
654 for (i = 0 ; i < ps->g->num_nodes; i++)
655 if (last == ps->g->nodes[i].insn
656 || last == ps->g->nodes[i].first_note)
657 break;
658 else if (PREV_INSN (last) != ps->g->nodes[i].insn)
659 reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
660 PREV_INSN (last));
663 /* Used to generate the prologue & epilogue. Duplicate the subset of
664 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
665 of both), together with a prefix/suffix of their reg_moves. */
666 static void
667 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
668 int to_stage, int for_prolog)
670 int row;
671 ps_insn_ptr ps_ij;
673 for (row = 0; row < ps->ii; row++)
674 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
676 ddg_node_ptr u_node = ps_ij->node;
677 int j, i_reg_moves;
678 rtx reg_move = NULL_RTX;
680 if (for_prolog)
682 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
683 number of reg_moves starting with the second occurrence of
684 u_node, which is generated if its SCHED_STAGE <= to_stage. */
685 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
686 i_reg_moves = MAX (i_reg_moves, 0);
687 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
689 /* The reg_moves start from the *first* reg_move backwards. */
690 if (i_reg_moves)
692 reg_move = SCHED_FIRST_REG_MOVE (u_node);
693 for (j = 1; j < i_reg_moves; j++)
694 reg_move = PREV_INSN (reg_move);
697 else /* It's for the epilog. */
699 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
700 starting to decrease one stage after u_node no longer occurs;
701 that is, generate all reg_moves until
702 SCHED_STAGE (u_node) == from_stage - 1. */
703 i_reg_moves = SCHED_NREG_MOVES (u_node)
704 - (from_stage - SCHED_STAGE (u_node) - 1);
705 i_reg_moves = MAX (i_reg_moves, 0);
706 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
708 /* The reg_moves start from the *last* reg_move forwards. */
709 if (i_reg_moves)
711 reg_move = SCHED_FIRST_REG_MOVE (u_node);
712 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
713 reg_move = PREV_INSN (reg_move);
717 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
718 emit_insn (copy_rtx (PATTERN (reg_move)));
719 if (SCHED_STAGE (u_node) >= from_stage
720 && SCHED_STAGE (u_node) <= to_stage)
721 duplicate_insn_chain (u_node->first_note, u_node->insn);
726 /* Generate the instructions (including reg_moves) for prolog & epilog. */
727 static void
728 generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
730 int i;
731 int last_stage = PS_STAGE_COUNT (ps) - 1;
732 edge e;
734 /* Generate the prolog, inserting its insns on the loop-entry edge. */
735 start_sequence ();
737 if (count_reg)
738 /* Generate a subtract instruction at the beginning of the prolog to
739 adjust the loop count by STAGE_COUNT. */
740 emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
742 for (i = 0; i < last_stage; i++)
743 duplicate_insns_of_cycles (ps, 0, i, 1);
745 /* Put the prolog , on the one and only entry edge. */
746 e = loop_preheader_edge (loop);
747 loop_split_edge_with(e , get_insns());
749 end_sequence ();
751 /* Generate the epilog, inserting its insns on the loop-exit edge. */
752 start_sequence ();
754 for (i = 0; i < last_stage; i++)
755 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
757 /* Put the epilogue on the one and only one exit edge. */
758 gcc_assert (loop->single_exit);
759 e = loop->single_exit;
760 loop_split_edge_with(e , get_insns());
761 end_sequence ();
764 /* Return the line note insn preceding INSN, for debugging. Taken from
765 emit-rtl.c. */
766 static rtx
767 find_line_note (rtx insn)
769 for (; insn; insn = PREV_INSN (insn))
770 if (NOTE_P (insn)
771 && NOTE_LINE_NUMBER (insn) >= 0)
772 break;
774 return insn;
777 /* Return true if all the BBs of the loop are empty except the
778 loop header. */
779 static bool
780 loop_single_full_bb_p (struct loop *loop)
782 unsigned i;
783 basic_block *bbs = get_loop_body (loop);
785 for (i = 0; i < loop->num_nodes ; i++)
787 rtx head, tail;
788 bool empty_bb = true;
790 if (bbs[i] == loop->header)
791 continue;
793 /* Make sure that basic blocks other than the header
794 have only notes labels or jumps. */
795 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
796 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
798 if (NOTE_P (head) || LABEL_P (head)
799 || (INSN_P (head) && JUMP_P (head)))
800 continue;
801 empty_bb = false;
802 break;
805 if (! empty_bb)
807 free (bbs);
808 return false;
811 free (bbs);
812 return true;
815 /* A simple loop from SMS point of view; it is a loop that is composed of
816 either a single basic block or two BBs - a header and a latch. */
817 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
818 && (EDGE_COUNT (loop->latch->preds) == 1) \
819 && (EDGE_COUNT (loop->latch->succs) == 1))
821 /* Return true if the loop is in its canonical form and false if not.
822 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
823 static bool
824 loop_canon_p (struct loop *loop)
827 if (loop->inner || ! loop->outer)
828 return false;
830 if (!loop->single_exit)
832 if (dump_file)
834 rtx line_note = find_line_note (BB_END (loop->header));
836 fprintf (dump_file, "SMS loop many exits ");
837 if (line_note)
839 expanded_location xloc;
840 NOTE_EXPANDED_LOCATION (xloc, line_note);
841 fprintf (dump_file, " %s %d (file, line)\n",
842 xloc.file, xloc.line);
845 return false;
848 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
850 if (dump_file)
852 rtx line_note = find_line_note (BB_END (loop->header));
854 fprintf (dump_file, "SMS loop many BBs. ");
855 if (line_note)
857 expanded_location xloc;
858 NOTE_EXPANDED_LOCATION (xloc, line_note);
859 fprintf (dump_file, " %s %d (file, line)\n",
860 xloc.file, xloc.line);
863 return false;
866 return true;
869 /* If there are more than one entry for the loop,
870 make it one by splitting the first entry edge and
871 redirecting the others to the new BB. */
872 static void
873 canon_loop (struct loop *loop)
875 edge e;
876 edge_iterator i;
878 /* Avoid annoying special cases of edges going to exit
879 block. */
880 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
881 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
882 loop_split_edge_with (e, NULL_RTX);
884 if (loop->latch == loop->header
885 || EDGE_COUNT (loop->latch->succs) > 1)
887 FOR_EACH_EDGE (e, i, loop->header->preds)
888 if (e->src == loop->latch)
889 break;
890 loop_split_edge_with (e, NULL_RTX);
894 /* Main entry point, perform SMS scheduling on the loops of the function
895 that consist of single basic blocks. */
896 static void
897 sms_schedule (void)
899 static int passes = 0;
900 rtx insn;
901 ddg_ptr *g_arr, g;
902 int * node_order;
903 int maxii;
904 unsigned i,num_loops;
905 partial_schedule_ptr ps;
906 struct df *df;
907 struct loops *loops;
908 basic_block bb = NULL;
909 /* vars to the versioning only if needed*/
910 struct loop * nloop;
911 basic_block condition_bb = NULL;
912 edge latch_edge;
913 gcov_type trip_count = 0;
915 loops = loop_optimizer_init (LOOPS_HAVE_PREHEADERS
916 | LOOPS_HAVE_MARKED_SINGLE_EXITS);
917 if (!loops)
918 return; /* There is no loops to schedule. */
920 /* Initialize issue_rate. */
921 if (targetm.sched.issue_rate)
923 int temp = reload_completed;
925 reload_completed = 1;
926 issue_rate = targetm.sched.issue_rate ();
927 reload_completed = temp;
929 else
930 issue_rate = 1;
932 /* Initialize the scheduler. */
933 current_sched_info = &sms_sched_info;
934 sched_init ();
936 /* Init Data Flow analysis, to be used in interloop dep calculation. */
937 df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
938 df_rd_add_problem (df, 0);
939 df_ru_add_problem (df, 0);
940 df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
941 df_analyze (df);
943 if (dump_file)
944 df_dump (df, dump_file);
946 /* Allocate memory to hold the DDG array one entry for each loop.
947 We use loop->num as index into this array. */
948 g_arr = XCNEWVEC (ddg_ptr, loops->num);
951 /* Build DDGs for all the relevant loops and hold them in G_ARR
952 indexed by the loop index. */
953 for (i = 0; i < loops->num; i++)
955 rtx head, tail;
956 rtx count_reg;
957 struct loop *loop = loops->parray[i];
959 /* For debugging. */
960 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
962 if (dump_file)
963 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
965 break;
968 if (! loop_canon_p (loop))
969 continue;
971 if (! loop_single_full_bb_p (loop))
972 continue;
974 bb = loop->header;
976 get_ebb_head_tail (bb, bb, &head, &tail);
977 latch_edge = loop_latch_edge (loop);
978 gcc_assert (loop->single_exit);
979 if (loop->single_exit->count)
980 trip_count = latch_edge->count / loop->single_exit->count;
982 /* Perfrom SMS only on loops that their average count is above threshold. */
984 if ( latch_edge->count
985 && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
987 if (dump_file)
989 rtx line_note = find_line_note (tail);
991 if (line_note)
993 expanded_location xloc;
994 NOTE_EXPANDED_LOCATION (xloc, line_note);
995 fprintf (dump_file, "SMS bb %s %d (file, line)\n",
996 xloc.file, xloc.line);
998 fprintf (dump_file, "SMS single-bb-loop\n");
999 if (profile_info && flag_branch_probabilities)
1001 fprintf (dump_file, "SMS loop-count ");
1002 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1003 (HOST_WIDEST_INT) bb->count);
1004 fprintf (dump_file, "\n");
1005 fprintf (dump_file, "SMS trip-count ");
1006 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1007 (HOST_WIDEST_INT) trip_count);
1008 fprintf (dump_file, "\n");
1009 fprintf (dump_file, "SMS profile-sum-max ");
1010 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1011 (HOST_WIDEST_INT) profile_info->sum_max);
1012 fprintf (dump_file, "\n");
1015 continue;
1018 /* Make sure this is a doloop. */
1019 if ( !(count_reg = doloop_register_get (tail)))
1020 continue;
1022 /* Don't handle BBs with calls or barriers, or !single_set insns. */
1023 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1024 if (CALL_P (insn)
1025 || BARRIER_P (insn)
1026 || (INSN_P (insn) && !JUMP_P (insn)
1027 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1028 break;
1030 if (insn != NEXT_INSN (tail))
1032 if (dump_file)
1034 if (CALL_P (insn))
1035 fprintf (dump_file, "SMS loop-with-call\n");
1036 else if (BARRIER_P (insn))
1037 fprintf (dump_file, "SMS loop-with-barrier\n");
1038 else
1039 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1040 print_rtl_single (dump_file, insn);
1043 continue;
1046 if (! (g = create_ddg (bb, df, 0)))
1048 if (dump_file)
1049 fprintf (dump_file, "SMS doloop\n");
1050 continue;
1053 g_arr[i] = g;
1056 /* Release Data Flow analysis data structures. */
1057 df_finish (df);
1058 df = NULL;
1060 /* We don't want to perform SMS on new loops - created by versioning. */
1061 num_loops = loops->num;
1062 /* Go over the built DDGs and perfrom SMS for each one of them. */
1063 for (i = 0; i < num_loops; i++)
1065 rtx head, tail;
1066 rtx count_reg, count_init;
1067 int mii, rec_mii;
1068 unsigned stage_count = 0;
1069 HOST_WIDEST_INT loop_count = 0;
1070 struct loop *loop = loops->parray[i];
1072 if (! (g = g_arr[i]))
1073 continue;
1075 if (dump_file)
1076 print_ddg (dump_file, g);
1078 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1080 latch_edge = loop_latch_edge (loop);
1081 gcc_assert (loop->single_exit);
1082 if (loop->single_exit->count)
1083 trip_count = latch_edge->count / loop->single_exit->count;
1085 if (dump_file)
1087 rtx line_note = find_line_note (tail);
1089 if (line_note)
1091 expanded_location xloc;
1092 NOTE_EXPANDED_LOCATION (xloc, line_note);
1093 fprintf (dump_file, "SMS bb %s %d (file, line)\n",
1094 xloc.file, xloc.line);
1096 fprintf (dump_file, "SMS single-bb-loop\n");
1097 if (profile_info && flag_branch_probabilities)
1099 fprintf (dump_file, "SMS loop-count ");
1100 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1101 (HOST_WIDEST_INT) bb->count);
1102 fprintf (dump_file, "\n");
1103 fprintf (dump_file, "SMS profile-sum-max ");
1104 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1105 (HOST_WIDEST_INT) profile_info->sum_max);
1106 fprintf (dump_file, "\n");
1108 fprintf (dump_file, "SMS doloop\n");
1109 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1110 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1111 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1115 /* In case of th loop have doloop register it gets special
1116 handling. */
1117 count_init = NULL_RTX;
1118 if ((count_reg = doloop_register_get (tail)))
1120 basic_block pre_header;
1122 pre_header = loop_preheader_edge (loop)->src;
1123 count_init = const_iteration_count (count_reg, pre_header,
1124 &loop_count);
1126 gcc_assert (count_reg);
1128 if (dump_file && count_init)
1130 fprintf (dump_file, "SMS const-doloop ");
1131 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1132 loop_count);
1133 fprintf (dump_file, "\n");
1136 node_order = XNEWVEC (int, g->num_nodes);
1138 mii = 1; /* Need to pass some estimate of mii. */
1139 rec_mii = sms_order_nodes (g, mii, node_order);
1140 mii = MAX (res_MII (g), rec_mii);
1141 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1143 if (dump_file)
1144 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145 rec_mii, mii, maxii);
1147 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1148 ASAP. */
1149 set_node_sched_params (g);
1151 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1153 if (ps)
1154 stage_count = PS_STAGE_COUNT (ps);
1156 /* Stage count of 1 means that there is no interleaving between
1157 iterations, let the scheduling passes do the job. */
1158 if (stage_count < 1
1159 || (count_init && (loop_count <= stage_count))
1160 || (flag_branch_probabilities && (trip_count <= stage_count)))
1162 if (dump_file)
1164 fprintf (dump_file, "SMS failed... \n");
1165 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1166 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1167 fprintf (dump_file, ", trip-count=");
1168 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1169 fprintf (dump_file, ")\n");
1171 continue;
1173 else
1175 int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1176 int new_cycles;
1177 struct undo_replace_buff_elem *reg_move_replaces;
1179 if (dump_file)
1181 fprintf (dump_file,
1182 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1183 stage_count);
1184 print_partial_schedule (ps, dump_file);
1185 fprintf (dump_file,
1186 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1187 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1190 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1191 the closing_branch was scheduled and should appear in the last (ii-1)
1192 row. Otherwise, we are free to schedule the branch, and we let nodes
1193 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1194 row; this should reduce stage_count to minimum. */
1195 normalize_sched_times (ps);
1196 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1197 set_columns_for_ps (ps);
1199 /* Generate the kernel just to be able to measure its cycles. */
1200 permute_partial_schedule (ps, g->closing_branch->first_note);
1201 reg_move_replaces = generate_reg_moves (ps);
1203 /* Get the number of cycles the new kernel expect to execute in. */
1204 new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1206 /* Get back to the original loop so we can do loop versioning. */
1207 undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1208 if (reg_move_replaces)
1209 undo_generate_reg_moves (ps, reg_move_replaces);
1211 if ( new_cycles >= orig_cycles)
1213 /* SMS is not profitable so undo the permutation and reg move generation
1214 and return the kernel to its original state. */
1215 if (dump_file)
1216 fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1219 else
1221 canon_loop (loop);
1223 /* case the BCT count is not known , Do loop-versioning */
1224 if (count_reg && ! count_init)
1226 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1227 GEN_INT(stage_count));
1229 nloop = loop_version (loops, loop, comp_rtx, &condition_bb,
1230 true);
1233 /* Set new iteration count of loop kernel. */
1234 if (count_reg && count_init)
1235 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1236 - stage_count + 1);
1238 /* Now apply the scheduled kernel to the RTL of the loop. */
1239 permute_partial_schedule (ps, g->closing_branch->first_note);
1241 /* Mark this loop as software pipelined so the later
1242 scheduling passes doesn't touch it. */
1243 if (! flag_resched_modulo_sched)
1244 g->bb->flags |= BB_DISABLE_SCHEDULE;
1245 /* The life-info is not valid any more. */
1246 g->bb->flags |= BB_DIRTY;
1248 reg_move_replaces = generate_reg_moves (ps);
1249 if (dump_file)
1250 print_node_sched_params (dump_file, g->num_nodes);
1251 /* Generate prolog and epilog. */
1252 if (count_reg && !count_init)
1253 generate_prolog_epilog (ps, loop, count_reg);
1254 else
1255 generate_prolog_epilog (ps, loop, NULL_RTX);
1257 free_undo_replace_buff (reg_move_replaces);
1260 free_partial_schedule (ps);
1261 free (node_sched_params);
1262 free (node_order);
1263 free_ddg (g);
1266 free (g_arr);
1268 /* Release scheduler data, needed until now because of DFA. */
1269 sched_finish ();
1270 loop_optimizer_finalize (loops);
1273 /* The SMS scheduling algorithm itself
1274 -----------------------------------
1275 Input: 'O' an ordered list of insns of a loop.
1276 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1278 'Q' is the empty Set
1279 'PS' is the partial schedule; it holds the currently scheduled nodes with
1280 their cycle/slot.
1281 'PSP' previously scheduled predecessors.
1282 'PSS' previously scheduled successors.
1283 't(u)' the cycle where u is scheduled.
1284 'l(u)' is the latency of u.
1285 'd(v,u)' is the dependence distance from v to u.
1286 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1287 the node ordering phase.
1288 'check_hardware_resources_conflicts(u, PS, c)'
1289 run a trace around cycle/slot through DFA model
1290 to check resource conflicts involving instruction u
1291 at cycle c given the partial schedule PS.
1292 'add_to_partial_schedule_at_time(u, PS, c)'
1293 Add the node/instruction u to the partial schedule
1294 PS at time c.
1295 'calculate_register_pressure(PS)'
1296 Given a schedule of instructions, calculate the register
1297 pressure it implies. One implementation could be the
1298 maximum number of overlapping live ranges.
1299 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1300 registers available in the hardware.
1302 1. II = MII.
1303 2. PS = empty list
1304 3. for each node u in O in pre-computed order
1305 4. if (PSP(u) != Q && PSS(u) == Q) then
1306 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1307 6. start = Early_start; end = Early_start + II - 1; step = 1
1308 11. else if (PSP(u) == Q && PSS(u) != Q) then
1309 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1310 13. start = Late_start; end = Late_start - II + 1; step = -1
1311 14. else if (PSP(u) != Q && PSS(u) != Q) then
1312 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1313 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1314 17. start = Early_start;
1315 18. end = min(Early_start + II - 1 , Late_start);
1316 19. step = 1
1317 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1318 21. start = ASAP(u); end = start + II - 1; step = 1
1319 22. endif
1321 23. success = false
1322 24. for (c = start ; c != end ; c += step)
1323 25. if check_hardware_resources_conflicts(u, PS, c) then
1324 26. add_to_partial_schedule_at_time(u, PS, c)
1325 27. success = true
1326 28. break
1327 29. endif
1328 30. endfor
1329 31. if (success == false) then
1330 32. II = II + 1
1331 33. if (II > maxII) then
1332 34. finish - failed to schedule
1333 35. endif
1334 36. goto 2.
1335 37. endif
1336 38. endfor
1337 39. if (calculate_register_pressure(PS) > maxRP) then
1338 40. goto 32.
1339 41. endif
1340 42. compute epilogue & prologue
1341 43. finish - succeeded to schedule
1344 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1345 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1346 set to 0 to save compile time. */
1347 #define DFA_HISTORY SMS_DFA_HISTORY
1349 /* Given the partial schedule PS, this function calculates and returns the
1350 cycles in which we can schedule the node with the given index I.
1351 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1352 noticed that there are several cases in which we fail to SMS the loop
1353 because the sched window of a node is empty due to tight data-deps. In
1354 such cases we want to unschedule some of the predecessors/successors
1355 until we get non-empty scheduling window. It returns -1 if the
1356 scheduling window is empty and zero otherwise. */
1358 static int
1359 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1360 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1362 int start, step, end;
1363 ddg_edge_ptr e;
1364 int u = nodes_order [i];
1365 ddg_node_ptr u_node = &ps->g->nodes[u];
1366 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1367 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1368 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1369 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1370 int psp_not_empty;
1371 int pss_not_empty;
1373 /* 1. compute sched window for u (start, end, step). */
1374 sbitmap_zero (psp);
1375 sbitmap_zero (pss);
1376 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1377 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1379 if (psp_not_empty && !pss_not_empty)
1381 int early_start = INT_MIN;
1383 end = INT_MAX;
1384 for (e = u_node->in; e != 0; e = e->next_in)
1386 ddg_node_ptr v_node = e->src;
1387 if (TEST_BIT (sched_nodes, v_node->cuid))
1389 int node_st = SCHED_TIME (v_node)
1390 + e->latency - (e->distance * ii);
1392 early_start = MAX (early_start, node_st);
1394 if (e->data_type == MEM_DEP)
1395 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1398 start = early_start;
1399 end = MIN (end, early_start + ii);
1400 step = 1;
1403 else if (!psp_not_empty && pss_not_empty)
1405 int late_start = INT_MAX;
1407 end = INT_MIN;
1408 for (e = u_node->out; e != 0; e = e->next_out)
1410 ddg_node_ptr v_node = e->dest;
1411 if (TEST_BIT (sched_nodes, v_node->cuid))
1413 late_start = MIN (late_start,
1414 SCHED_TIME (v_node) - e->latency
1415 + (e->distance * ii));
1416 if (e->data_type == MEM_DEP)
1417 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1420 start = late_start;
1421 end = MAX (end, late_start - ii);
1422 step = -1;
1425 else if (psp_not_empty && pss_not_empty)
1427 int early_start = INT_MIN;
1428 int late_start = INT_MAX;
1430 start = INT_MIN;
1431 end = INT_MAX;
1432 for (e = u_node->in; e != 0; e = e->next_in)
1434 ddg_node_ptr v_node = e->src;
1436 if (TEST_BIT (sched_nodes, v_node->cuid))
1438 early_start = MAX (early_start,
1439 SCHED_TIME (v_node) + e->latency
1440 - (e->distance * ii));
1441 if (e->data_type == MEM_DEP)
1442 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1445 for (e = u_node->out; e != 0; e = e->next_out)
1447 ddg_node_ptr v_node = e->dest;
1449 if (TEST_BIT (sched_nodes, v_node->cuid))
1451 late_start = MIN (late_start,
1452 SCHED_TIME (v_node) - e->latency
1453 + (e->distance * ii));
1454 if (e->data_type == MEM_DEP)
1455 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1458 start = MAX (start, early_start);
1459 end = MIN (end, MIN (early_start + ii, late_start + 1));
1460 step = 1;
1462 else /* psp is empty && pss is empty. */
1464 start = SCHED_ASAP (u_node);
1465 end = start + ii;
1466 step = 1;
1469 *start_p = start;
1470 *step_p = step;
1471 *end_p = end;
1472 sbitmap_free (psp);
1473 sbitmap_free (pss);
1475 if ((start >= end && step == 1) || (start <= end && step == -1))
1476 return -1;
1477 else
1478 return 0;
1481 /* This function implements the scheduling algorithm for SMS according to the
1482 above algorithm. */
1483 static partial_schedule_ptr
1484 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1486 int ii = mii;
1487 int i, c, success;
1488 int try_again_with_larger_ii = true;
1489 int num_nodes = g->num_nodes;
1490 ddg_edge_ptr e;
1491 int start, end, step; /* Place together into one struct? */
1492 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1493 sbitmap must_precede = sbitmap_alloc (num_nodes);
1494 sbitmap must_follow = sbitmap_alloc (num_nodes);
1495 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1497 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1499 sbitmap_ones (tobe_scheduled);
1500 sbitmap_zero (sched_nodes);
1502 while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1503 || try_again_with_larger_ii ) && ii < maxii)
1505 int j;
1506 bool unscheduled_nodes = false;
1508 if (dump_file)
1509 fprintf(dump_file, "Starting with ii=%d\n", ii);
1510 if (try_again_with_larger_ii)
1512 try_again_with_larger_ii = false;
1513 sbitmap_zero (sched_nodes);
1516 for (i = 0; i < num_nodes; i++)
1518 int u = nodes_order[i];
1519 ddg_node_ptr u_node = &ps->g->nodes[u];
1520 rtx insn = u_node->insn;
1522 if (!INSN_P (insn))
1524 RESET_BIT (tobe_scheduled, u);
1525 continue;
1528 if (JUMP_P (insn)) /* Closing branch handled later. */
1530 RESET_BIT (tobe_scheduled, u);
1531 continue;
1534 if (TEST_BIT (sched_nodes, u))
1535 continue;
1537 /* Try to get non-empty scheduling window. */
1538 j = i;
1539 while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1540 && j > 0)
1542 unscheduled_nodes = true;
1543 if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1544 || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1546 ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1547 RESET_BIT (sched_nodes, nodes_order [j - 1]);
1549 j--;
1551 if (j < 0)
1553 /* ??? Try backtracking instead of immediately ii++? */
1554 ii++;
1555 try_again_with_larger_ii = true;
1556 reset_partial_schedule (ps, ii);
1557 break;
1559 /* 2. Try scheduling u in window. */
1560 if (dump_file)
1561 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1562 u, start, end, step);
1564 /* use must_follow & must_precede bitmaps to determine order
1565 of nodes within the cycle. */
1566 sbitmap_zero (must_precede);
1567 sbitmap_zero (must_follow);
1568 for (e = u_node->in; e != 0; e = e->next_in)
1569 if (TEST_BIT (sched_nodes, e->src->cuid)
1570 && e->latency == (ii * e->distance)
1571 && start == SCHED_TIME (e->src))
1572 SET_BIT (must_precede, e->src->cuid);
1574 for (e = u_node->out; e != 0; e = e->next_out)
1575 if (TEST_BIT (sched_nodes, e->dest->cuid)
1576 && e->latency == (ii * e->distance)
1577 && end == SCHED_TIME (e->dest))
1578 SET_BIT (must_follow, e->dest->cuid);
1580 success = 0;
1581 if ((step > 0 && start < end) || (step < 0 && start > end))
1582 for (c = start; c != end; c += step)
1584 ps_insn_ptr psi;
1586 psi = ps_add_node_check_conflicts (ps, u_node, c,
1587 must_precede,
1588 must_follow);
1590 if (psi)
1592 SCHED_TIME (u_node) = c;
1593 SET_BIT (sched_nodes, u);
1594 success = 1;
1595 if (dump_file)
1596 fprintf(dump_file, "Schedule in %d\n", c);
1597 break;
1600 if (!success)
1602 /* ??? Try backtracking instead of immediately ii++? */
1603 ii++;
1604 try_again_with_larger_ii = true;
1605 reset_partial_schedule (ps, ii);
1606 break;
1608 if (unscheduled_nodes)
1609 break;
1611 /* ??? If (success), check register pressure estimates. */
1612 } /* Continue with next node. */
1613 } /* While try_again_with_larger_ii. */
1615 sbitmap_free (sched_nodes);
1616 sbitmap_free (must_precede);
1617 sbitmap_free (must_follow);
1618 sbitmap_free (tobe_scheduled);
1620 if (ii >= maxii)
1622 free_partial_schedule (ps);
1623 ps = NULL;
1625 return ps;
1629 /* This page implements the algorithm for ordering the nodes of a DDG
1630 for modulo scheduling, activated through the
1631 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1633 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1634 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1635 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1636 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1637 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1638 #define DEPTH(x) (ASAP ((x)))
1640 typedef struct node_order_params * nopa;
1642 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1643 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1644 static nopa calculate_order_params (ddg_ptr, int mii);
1645 static int find_max_asap (ddg_ptr, sbitmap);
1646 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1647 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1649 enum sms_direction {BOTTOMUP, TOPDOWN};
1651 struct node_order_params
1653 int asap;
1654 int alap;
1655 int height;
1658 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1659 static void
1660 check_nodes_order (int *node_order, int num_nodes)
1662 int i;
1663 sbitmap tmp = sbitmap_alloc (num_nodes);
1665 sbitmap_zero (tmp);
1667 for (i = 0; i < num_nodes; i++)
1669 int u = node_order[i];
1671 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1673 SET_BIT (tmp, u);
1676 sbitmap_free (tmp);
1679 /* Order the nodes of G for scheduling and pass the result in
1680 NODE_ORDER. Also set aux.count of each node to ASAP.
1681 Return the recMII for the given DDG. */
1682 static int
1683 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1685 int i;
1686 int rec_mii = 0;
1687 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1689 nopa nops = calculate_order_params (g, mii);
1691 order_nodes_of_sccs (sccs, node_order);
1693 if (sccs->num_sccs > 0)
1694 /* First SCC has the largest recurrence_length. */
1695 rec_mii = sccs->sccs[0]->recurrence_length;
1697 /* Save ASAP before destroying node_order_params. */
1698 for (i = 0; i < g->num_nodes; i++)
1700 ddg_node_ptr v = &g->nodes[i];
1701 v->aux.count = ASAP (v);
1704 free (nops);
1705 free_ddg_all_sccs (sccs);
1706 check_nodes_order (node_order, g->num_nodes);
1708 return rec_mii;
1711 static void
1712 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1714 int i, pos = 0;
1715 ddg_ptr g = all_sccs->ddg;
1716 int num_nodes = g->num_nodes;
1717 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1718 sbitmap on_path = sbitmap_alloc (num_nodes);
1719 sbitmap tmp = sbitmap_alloc (num_nodes);
1720 sbitmap ones = sbitmap_alloc (num_nodes);
1722 sbitmap_zero (prev_sccs);
1723 sbitmap_ones (ones);
1725 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1726 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1727 for (i = 0; i < all_sccs->num_sccs; i++)
1729 ddg_scc_ptr scc = all_sccs->sccs[i];
1731 /* Add nodes on paths from previous SCCs to the current SCC. */
1732 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1733 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1735 /* Add nodes on paths from the current SCC to previous SCCs. */
1736 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1737 sbitmap_a_or_b (tmp, tmp, on_path);
1739 /* Remove nodes of previous SCCs from current extended SCC. */
1740 sbitmap_difference (tmp, tmp, prev_sccs);
1742 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1743 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1746 /* Handle the remaining nodes that do not belong to any scc. Each call
1747 to order_nodes_in_scc handles a single connected component. */
1748 while (pos < g->num_nodes)
1750 sbitmap_difference (tmp, ones, prev_sccs);
1751 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1753 sbitmap_free (prev_sccs);
1754 sbitmap_free (on_path);
1755 sbitmap_free (tmp);
1756 sbitmap_free (ones);
1759 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1760 static struct node_order_params *
1761 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1763 int u;
1764 int max_asap;
1765 int num_nodes = g->num_nodes;
1766 ddg_edge_ptr e;
1767 /* Allocate a place to hold ordering params for each node in the DDG. */
1768 nopa node_order_params_arr;
1770 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1771 node_order_params_arr = (nopa) xcalloc (num_nodes,
1772 sizeof (struct node_order_params));
1774 /* Set the aux pointer of each node to point to its order_params structure. */
1775 for (u = 0; u < num_nodes; u++)
1776 g->nodes[u].aux.info = &node_order_params_arr[u];
1778 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1779 calculate ASAP, ALAP, mobility, distance, and height for each node
1780 in the dependence (direct acyclic) graph. */
1782 /* We assume that the nodes in the array are in topological order. */
1784 max_asap = 0;
1785 for (u = 0; u < num_nodes; u++)
1787 ddg_node_ptr u_node = &g->nodes[u];
1789 ASAP (u_node) = 0;
1790 for (e = u_node->in; e; e = e->next_in)
1791 if (e->distance == 0)
1792 ASAP (u_node) = MAX (ASAP (u_node),
1793 ASAP (e->src) + e->latency);
1794 max_asap = MAX (max_asap, ASAP (u_node));
1797 for (u = num_nodes - 1; u > -1; u--)
1799 ddg_node_ptr u_node = &g->nodes[u];
1801 ALAP (u_node) = max_asap;
1802 HEIGHT (u_node) = 0;
1803 for (e = u_node->out; e; e = e->next_out)
1804 if (e->distance == 0)
1806 ALAP (u_node) = MIN (ALAP (u_node),
1807 ALAP (e->dest) - e->latency);
1808 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1809 HEIGHT (e->dest) + e->latency);
1813 return node_order_params_arr;
1816 static int
1817 find_max_asap (ddg_ptr g, sbitmap nodes)
1819 unsigned int u = 0;
1820 int max_asap = -1;
1821 int result = -1;
1822 sbitmap_iterator sbi;
1824 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1826 ddg_node_ptr u_node = &g->nodes[u];
1828 if (max_asap < ASAP (u_node))
1830 max_asap = ASAP (u_node);
1831 result = u;
1834 return result;
1837 static int
1838 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1840 unsigned int u = 0;
1841 int max_hv = -1;
1842 int min_mob = INT_MAX;
1843 int result = -1;
1844 sbitmap_iterator sbi;
1846 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1848 ddg_node_ptr u_node = &g->nodes[u];
1850 if (max_hv < HEIGHT (u_node))
1852 max_hv = HEIGHT (u_node);
1853 min_mob = MOB (u_node);
1854 result = u;
1856 else if ((max_hv == HEIGHT (u_node))
1857 && (min_mob > MOB (u_node)))
1859 min_mob = MOB (u_node);
1860 result = u;
1863 return result;
1866 static int
1867 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1869 unsigned int u = 0;
1870 int max_dv = -1;
1871 int min_mob = INT_MAX;
1872 int result = -1;
1873 sbitmap_iterator sbi;
1875 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1877 ddg_node_ptr u_node = &g->nodes[u];
1879 if (max_dv < DEPTH (u_node))
1881 max_dv = DEPTH (u_node);
1882 min_mob = MOB (u_node);
1883 result = u;
1885 else if ((max_dv == DEPTH (u_node))
1886 && (min_mob > MOB (u_node)))
1888 min_mob = MOB (u_node);
1889 result = u;
1892 return result;
1895 /* Places the nodes of SCC into the NODE_ORDER array starting
1896 at position POS, according to the SMS ordering algorithm.
1897 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1898 the NODE_ORDER array, starting from position zero. */
1899 static int
1900 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1901 int * node_order, int pos)
1903 enum sms_direction dir;
1904 int num_nodes = g->num_nodes;
1905 sbitmap workset = sbitmap_alloc (num_nodes);
1906 sbitmap tmp = sbitmap_alloc (num_nodes);
1907 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1908 sbitmap predecessors = sbitmap_alloc (num_nodes);
1909 sbitmap successors = sbitmap_alloc (num_nodes);
1911 sbitmap_zero (predecessors);
1912 find_predecessors (predecessors, g, nodes_ordered);
1914 sbitmap_zero (successors);
1915 find_successors (successors, g, nodes_ordered);
1917 sbitmap_zero (tmp);
1918 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1920 sbitmap_copy (workset, tmp);
1921 dir = BOTTOMUP;
1923 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1925 sbitmap_copy (workset, tmp);
1926 dir = TOPDOWN;
1928 else
1930 int u;
1932 sbitmap_zero (workset);
1933 if ((u = find_max_asap (g, scc)) >= 0)
1934 SET_BIT (workset, u);
1935 dir = BOTTOMUP;
1938 sbitmap_zero (zero_bitmap);
1939 while (!sbitmap_equal (workset, zero_bitmap))
1941 int v;
1942 ddg_node_ptr v_node;
1943 sbitmap v_node_preds;
1944 sbitmap v_node_succs;
1946 if (dir == TOPDOWN)
1948 while (!sbitmap_equal (workset, zero_bitmap))
1950 v = find_max_hv_min_mob (g, workset);
1951 v_node = &g->nodes[v];
1952 node_order[pos++] = v;
1953 v_node_succs = NODE_SUCCESSORS (v_node);
1954 sbitmap_a_and_b (tmp, v_node_succs, scc);
1956 /* Don't consider the already ordered successors again. */
1957 sbitmap_difference (tmp, tmp, nodes_ordered);
1958 sbitmap_a_or_b (workset, workset, tmp);
1959 RESET_BIT (workset, v);
1960 SET_BIT (nodes_ordered, v);
1962 dir = BOTTOMUP;
1963 sbitmap_zero (predecessors);
1964 find_predecessors (predecessors, g, nodes_ordered);
1965 sbitmap_a_and_b (workset, predecessors, scc);
1967 else
1969 while (!sbitmap_equal (workset, zero_bitmap))
1971 v = find_max_dv_min_mob (g, workset);
1972 v_node = &g->nodes[v];
1973 node_order[pos++] = v;
1974 v_node_preds = NODE_PREDECESSORS (v_node);
1975 sbitmap_a_and_b (tmp, v_node_preds, scc);
1977 /* Don't consider the already ordered predecessors again. */
1978 sbitmap_difference (tmp, tmp, nodes_ordered);
1979 sbitmap_a_or_b (workset, workset, tmp);
1980 RESET_BIT (workset, v);
1981 SET_BIT (nodes_ordered, v);
1983 dir = TOPDOWN;
1984 sbitmap_zero (successors);
1985 find_successors (successors, g, nodes_ordered);
1986 sbitmap_a_and_b (workset, successors, scc);
1989 sbitmap_free (tmp);
1990 sbitmap_free (workset);
1991 sbitmap_free (zero_bitmap);
1992 sbitmap_free (predecessors);
1993 sbitmap_free (successors);
1994 return pos;
1998 /* This page contains functions for manipulating partial-schedules during
1999 modulo scheduling. */
2001 /* Create a partial schedule and allocate a memory to hold II rows. */
2003 static partial_schedule_ptr
2004 create_partial_schedule (int ii, ddg_ptr g, int history)
2006 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2007 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2008 ps->ii = ii;
2009 ps->history = history;
2010 ps->min_cycle = INT_MAX;
2011 ps->max_cycle = INT_MIN;
2012 ps->g = g;
2014 return ps;
2017 /* Free the PS_INSNs in rows array of the given partial schedule.
2018 ??? Consider caching the PS_INSN's. */
2019 static void
2020 free_ps_insns (partial_schedule_ptr ps)
2022 int i;
2024 for (i = 0; i < ps->ii; i++)
2026 while (ps->rows[i])
2028 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2030 free (ps->rows[i]);
2031 ps->rows[i] = ps_insn;
2033 ps->rows[i] = NULL;
2037 /* Free all the memory allocated to the partial schedule. */
2039 static void
2040 free_partial_schedule (partial_schedule_ptr ps)
2042 if (!ps)
2043 return;
2044 free_ps_insns (ps);
2045 free (ps->rows);
2046 free (ps);
2049 /* Clear the rows array with its PS_INSNs, and create a new one with
2050 NEW_II rows. */
2052 static void
2053 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2055 if (!ps)
2056 return;
2057 free_ps_insns (ps);
2058 if (new_ii == ps->ii)
2059 return;
2060 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2061 * sizeof (ps_insn_ptr));
2062 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2063 ps->ii = new_ii;
2064 ps->min_cycle = INT_MAX;
2065 ps->max_cycle = INT_MIN;
2068 /* Prints the partial schedule as an ii rows array, for each rows
2069 print the ids of the insns in it. */
2070 void
2071 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2073 int i;
2075 for (i = 0; i < ps->ii; i++)
2077 ps_insn_ptr ps_i = ps->rows[i];
2079 fprintf (dump, "\n[CYCLE %d ]: ", i);
2080 while (ps_i)
2082 fprintf (dump, "%d, ",
2083 INSN_UID (ps_i->node->insn));
2084 ps_i = ps_i->next_in_row;
2089 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2090 static ps_insn_ptr
2091 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2093 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2095 ps_i->node = node;
2096 ps_i->next_in_row = NULL;
2097 ps_i->prev_in_row = NULL;
2098 ps_i->row_rest_count = rest_count;
2099 ps_i->cycle = cycle;
2101 return ps_i;
2105 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2106 node is not found in the partial schedule, else returns true. */
2107 static bool
2108 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2110 int row;
2112 if (!ps || !ps_i)
2113 return false;
2115 row = SMODULO (ps_i->cycle, ps->ii);
2116 if (! ps_i->prev_in_row)
2118 if (ps_i != ps->rows[row])
2119 return false;
2121 ps->rows[row] = ps_i->next_in_row;
2122 if (ps->rows[row])
2123 ps->rows[row]->prev_in_row = NULL;
2125 else
2127 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2128 if (ps_i->next_in_row)
2129 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2131 free (ps_i);
2132 return true;
2135 /* Unlike what literature describes for modulo scheduling (which focuses
2136 on VLIW machines) the order of the instructions inside a cycle is
2137 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2138 where the current instruction should go relative to the already
2139 scheduled instructions in the given cycle. Go over these
2140 instructions and find the first possible column to put it in. */
2141 static bool
2142 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2143 sbitmap must_precede, sbitmap must_follow)
2145 ps_insn_ptr next_ps_i;
2146 ps_insn_ptr first_must_follow = NULL;
2147 ps_insn_ptr last_must_precede = NULL;
2148 int row;
2150 if (! ps_i)
2151 return false;
2153 row = SMODULO (ps_i->cycle, ps->ii);
2155 /* Find the first must follow and the last must precede
2156 and insert the node immediately after the must precede
2157 but make sure that it there is no must follow after it. */
2158 for (next_ps_i = ps->rows[row];
2159 next_ps_i;
2160 next_ps_i = next_ps_i->next_in_row)
2162 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2163 && ! first_must_follow)
2164 first_must_follow = next_ps_i;
2165 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2167 /* If we have already met a node that must follow, then
2168 there is no possible column. */
2169 if (first_must_follow)
2170 return false;
2171 else
2172 last_must_precede = next_ps_i;
2176 /* Now insert the node after INSERT_AFTER_PSI. */
2178 if (! last_must_precede)
2180 ps_i->next_in_row = ps->rows[row];
2181 ps_i->prev_in_row = NULL;
2182 if (ps_i->next_in_row)
2183 ps_i->next_in_row->prev_in_row = ps_i;
2184 ps->rows[row] = ps_i;
2186 else
2188 ps_i->next_in_row = last_must_precede->next_in_row;
2189 last_must_precede->next_in_row = ps_i;
2190 ps_i->prev_in_row = last_must_precede;
2191 if (ps_i->next_in_row)
2192 ps_i->next_in_row->prev_in_row = ps_i;
2195 return true;
2198 /* Advances the PS_INSN one column in its current row; returns false
2199 in failure and true in success. Bit N is set in MUST_FOLLOW if
2200 the node with cuid N must be come after the node pointed to by
2201 PS_I when scheduled in the same cycle. */
2202 static int
2203 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2204 sbitmap must_follow)
2206 ps_insn_ptr prev, next;
2207 int row;
2208 ddg_node_ptr next_node;
2210 if (!ps || !ps_i)
2211 return false;
2213 row = SMODULO (ps_i->cycle, ps->ii);
2215 if (! ps_i->next_in_row)
2216 return false;
2218 next_node = ps_i->next_in_row->node;
2220 /* Check if next_in_row is dependent on ps_i, both having same sched
2221 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2222 if (TEST_BIT (must_follow, next_node->cuid))
2223 return false;
2225 /* Advance PS_I over its next_in_row in the doubly linked list. */
2226 prev = ps_i->prev_in_row;
2227 next = ps_i->next_in_row;
2229 if (ps_i == ps->rows[row])
2230 ps->rows[row] = next;
2232 ps_i->next_in_row = next->next_in_row;
2234 if (next->next_in_row)
2235 next->next_in_row->prev_in_row = ps_i;
2237 next->next_in_row = ps_i;
2238 ps_i->prev_in_row = next;
2240 next->prev_in_row = prev;
2241 if (prev)
2242 prev->next_in_row = next;
2244 return true;
2247 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2248 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2249 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2250 before/after (respectively) the node pointed to by PS_I when scheduled
2251 in the same cycle. */
2252 static ps_insn_ptr
2253 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2254 sbitmap must_precede, sbitmap must_follow)
2256 ps_insn_ptr ps_i;
2257 int rest_count = 1;
2258 int row = SMODULO (cycle, ps->ii);
2260 if (ps->rows[row]
2261 && ps->rows[row]->row_rest_count >= issue_rate)
2262 return NULL;
2264 if (ps->rows[row])
2265 rest_count += ps->rows[row]->row_rest_count;
2267 ps_i = create_ps_insn (node, rest_count, cycle);
2269 /* Finds and inserts PS_I according to MUST_FOLLOW and
2270 MUST_PRECEDE. */
2271 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2273 free (ps_i);
2274 return NULL;
2277 return ps_i;
2280 /* Advance time one cycle. Assumes DFA is being used. */
2281 static void
2282 advance_one_cycle (void)
2284 if (targetm.sched.dfa_pre_cycle_insn)
2285 state_transition (curr_state,
2286 targetm.sched.dfa_pre_cycle_insn ());
2288 state_transition (curr_state, NULL);
2290 if (targetm.sched.dfa_post_cycle_insn)
2291 state_transition (curr_state,
2292 targetm.sched.dfa_post_cycle_insn ());
2295 /* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2296 the number of cycles according to DFA that the kernel fits in,
2297 we use this to check if we done well with SMS after we add
2298 register moves. In some cases register moves overhead makes
2299 it even worse than the original loop. We want SMS to be performed
2300 when it gives less cycles after register moves are added. */
2301 static int
2302 kernel_number_of_cycles (rtx first_insn, rtx last_insn)
2304 int cycles = 0;
2305 rtx insn;
2306 int can_issue_more = issue_rate;
2308 state_reset (curr_state);
2310 for (insn = first_insn;
2311 insn != NULL_RTX && insn != last_insn;
2312 insn = NEXT_INSN (insn))
2314 if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2315 continue;
2317 /* Check if there is room for the current insn. */
2318 if (!can_issue_more || state_dead_lock_p (curr_state))
2320 cycles ++;
2321 advance_one_cycle ();
2322 can_issue_more = issue_rate;
2325 /* Update the DFA state and return with failure if the DFA found
2326 recource conflicts. */
2327 if (state_transition (curr_state, insn) >= 0)
2329 cycles ++;
2330 advance_one_cycle ();
2331 can_issue_more = issue_rate;
2334 if (targetm.sched.variable_issue)
2335 can_issue_more =
2336 targetm.sched.variable_issue (sched_dump, sched_verbose,
2337 insn, can_issue_more);
2338 /* A naked CLOBBER or USE generates no instruction, so don't
2339 let them consume issue slots. */
2340 else if (GET_CODE (PATTERN (insn)) != USE
2341 && GET_CODE (PATTERN (insn)) != CLOBBER)
2342 can_issue_more--;
2344 return cycles;
2347 /* Checks if PS has resource conflicts according to DFA, starting from
2348 FROM cycle to TO cycle; returns true if there are conflicts and false
2349 if there are no conflicts. Assumes DFA is being used. */
2350 static int
2351 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2353 int cycle;
2355 state_reset (curr_state);
2357 for (cycle = from; cycle <= to; cycle++)
2359 ps_insn_ptr crr_insn;
2360 /* Holds the remaining issue slots in the current row. */
2361 int can_issue_more = issue_rate;
2363 /* Walk through the DFA for the current row. */
2364 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2365 crr_insn;
2366 crr_insn = crr_insn->next_in_row)
2368 rtx insn = crr_insn->node->insn;
2370 if (!INSN_P (insn))
2371 continue;
2373 /* Check if there is room for the current insn. */
2374 if (!can_issue_more || state_dead_lock_p (curr_state))
2375 return true;
2377 /* Update the DFA state and return with failure if the DFA found
2378 recource conflicts. */
2379 if (state_transition (curr_state, insn) >= 0)
2380 return true;
2382 if (targetm.sched.variable_issue)
2383 can_issue_more =
2384 targetm.sched.variable_issue (sched_dump, sched_verbose,
2385 insn, can_issue_more);
2386 /* A naked CLOBBER or USE generates no instruction, so don't
2387 let them consume issue slots. */
2388 else if (GET_CODE (PATTERN (insn)) != USE
2389 && GET_CODE (PATTERN (insn)) != CLOBBER)
2390 can_issue_more--;
2393 /* Advance the DFA to the next cycle. */
2394 advance_one_cycle ();
2396 return false;
2399 /* Checks if the given node causes resource conflicts when added to PS at
2400 cycle C. If not the node is added to PS and returned; otherwise zero
2401 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2402 cuid N must be come before/after (respectively) the node pointed to by
2403 PS_I when scheduled in the same cycle. */
2404 ps_insn_ptr
2405 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2406 int c, sbitmap must_precede,
2407 sbitmap must_follow)
2409 int has_conflicts = 0;
2410 ps_insn_ptr ps_i;
2412 /* First add the node to the PS, if this succeeds check for
2413 conflicts, trying different issue slots in the same row. */
2414 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2415 return NULL; /* Failed to insert the node at the given cycle. */
2417 has_conflicts = ps_has_conflicts (ps, c, c)
2418 || (ps->history > 0
2419 && ps_has_conflicts (ps,
2420 c - ps->history,
2421 c + ps->history));
2423 /* Try different issue slots to find one that the given node can be
2424 scheduled in without conflicts. */
2425 while (has_conflicts)
2427 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2428 break;
2429 has_conflicts = ps_has_conflicts (ps, c, c)
2430 || (ps->history > 0
2431 && ps_has_conflicts (ps,
2432 c - ps->history,
2433 c + ps->history));
2436 if (has_conflicts)
2438 remove_node_from_ps (ps, ps_i);
2439 return NULL;
2442 ps->min_cycle = MIN (ps->min_cycle, c);
2443 ps->max_cycle = MAX (ps->max_cycle, c);
2444 return ps_i;
2447 /* Rotate the rows of PS such that insns scheduled at time
2448 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2449 void
2450 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2452 int i, row, backward_rotates;
2453 int last_row = ps->ii - 1;
2455 if (start_cycle == 0)
2456 return;
2458 backward_rotates = SMODULO (start_cycle, ps->ii);
2460 /* Revisit later and optimize this into a single loop. */
2461 for (i = 0; i < backward_rotates; i++)
2463 ps_insn_ptr first_row = ps->rows[0];
2465 for (row = 0; row < last_row; row++)
2466 ps->rows[row] = ps->rows[row+1];
2468 ps->rows[last_row] = first_row;
2471 ps->max_cycle -= start_cycle;
2472 ps->min_cycle -= start_cycle;
2475 /* Remove the node N from the partial schedule PS; because we restart the DFA
2476 each time we want to check for resource conflicts; this is equivalent to
2477 unscheduling the node N. */
2478 static bool
2479 ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2481 ps_insn_ptr ps_i;
2482 int row = SMODULO (SCHED_TIME (n), ps->ii);
2484 if (row < 0 || row > ps->ii)
2485 return false;
2487 for (ps_i = ps->rows[row];
2488 ps_i && ps_i->node != n;
2489 ps_i = ps_i->next_in_row);
2490 if (!ps_i)
2491 return false;
2493 return remove_node_from_ps (ps, ps_i);
2495 #endif /* INSN_SCHEDULING */
2497 static bool
2498 gate_handle_sms (void)
2500 return (optimize > 0 && flag_modulo_sched);
2504 /* Run instruction scheduler. */
2505 /* Perform SMS module scheduling. */
2506 static unsigned int
2507 rest_of_handle_sms (void)
2509 #ifdef INSN_SCHEDULING
2510 basic_block bb;
2512 /* We want to be able to create new pseudos. */
2513 no_new_pseudos = 0;
2514 /* Collect loop information to be used in SMS. */
2515 cfg_layout_initialize (CLEANUP_UPDATE_LIFE);
2516 sms_schedule ();
2518 /* Update the life information, because we add pseudos. */
2519 max_regno = max_reg_num ();
2520 allocate_reg_info (max_regno, FALSE, FALSE);
2521 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2522 (PROP_DEATH_NOTES
2523 | PROP_REG_INFO
2524 | PROP_KILL_DEAD_CODE
2525 | PROP_SCAN_DEAD_CODE));
2527 no_new_pseudos = 1;
2529 /* Finalize layout changes. */
2530 FOR_EACH_BB (bb)
2531 if (bb->next_bb != EXIT_BLOCK_PTR)
2532 bb->aux = bb->next_bb;
2533 cfg_layout_finalize ();
2534 free_dominance_info (CDI_DOMINATORS);
2535 #endif /* INSN_SCHEDULING */
2536 return 0;
2539 struct tree_opt_pass pass_sms =
2541 "sms", /* name */
2542 gate_handle_sms, /* gate */
2543 rest_of_handle_sms, /* execute */
2544 NULL, /* sub */
2545 NULL, /* next */
2546 0, /* static_pass_number */
2547 TV_SMS, /* tv_id */
2548 0, /* properties_required */
2549 0, /* properties_provided */
2550 0, /* properties_destroyed */
2551 TODO_dump_func, /* todo_flags_start */
2552 TODO_dump_func |
2553 TODO_ggc_collect, /* todo_flags_finish */
2554 'm' /* letter */