* charset.c (convert_using_iconv): Close out any shift states,
[official-gcc.git] / gcc / modulo-sched.c
blobc052ddeaa47f29c5ba6735f9d207c51e9795bd88
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
195 static int issue_rate;
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
247 static char tmp[80];
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
261 static struct sched_info sms_sched_info =
263 NULL,
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 sms_print_insn,
269 NULL,
270 compute_jump_reg_dependencies,
271 NULL, NULL,
272 NULL, NULL,
273 0, 0, 0,
275 NULL, NULL, NULL, NULL, NULL,
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
284 static rtx
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
287 #ifdef HAVE_doloop_end
288 rtx reg, condition, insn, first_insn_not_to_check;
290 if (!JUMP_P (tail))
291 return NULL_RTX;
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition = doloop_condition_get (tail);
295 if (! condition)
296 return NULL_RTX;
298 if (REG_P (XEXP (condition, 0)))
299 reg = XEXP (condition, 0);
300 else if (GET_CODE (XEXP (condition, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition, 0), 0)))
302 reg = XEXP (XEXP (condition, 0), 0);
303 else
304 gcc_unreachable ();
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
311 : PREV_INSN (tail));
313 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
314 if (reg_mentioned_p (reg, insn))
316 if (dump_file)
318 fprintf (dump_file, "SMS count_reg found ");
319 print_rtl_single (dump_file, reg);
320 fprintf (dump_file, " outside control in insn:\n");
321 print_rtl_single (dump_file, insn);
324 return NULL_RTX;
327 return reg;
328 #else
329 return NULL_RTX;
330 #endif
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
337 static rtx
338 const_iteration_count (rtx count_reg, basic_block pre_header,
339 HOST_WIDEST_INT * count)
341 rtx insn;
342 rtx head, tail;
344 if (! pre_header)
345 return NULL_RTX;
347 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
349 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
350 if (INSN_P (insn) && single_set (insn) &&
351 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
353 rtx pat = single_set (insn);
355 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
357 *count = INTVAL (SET_SRC (pat));
358 return insn;
361 return NULL_RTX;
364 return NULL_RTX;
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
370 static int
371 res_MII (ddg_ptr g)
373 if (targetm.sched.sms_res_mii)
374 return targetm.sched.sms_res_mii (g);
376 return (g->num_nodes / issue_rate);
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params;
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
386 static void
387 set_node_sched_params (ddg_ptr g)
389 int i;
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params = (node_sched_params_ptr)
394 xcalloc (g->num_nodes,
395 sizeof (struct node_sched_params));
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i = 0; i < g->num_nodes; i++)
401 /* Watch out for aliasing problems? */
402 node_sched_params[i].asap = g->nodes[i].aux.count;
403 g->nodes[i].aux.info = &node_sched_params[i];
407 static void
408 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
410 int i;
412 if (! file)
413 return;
414 for (i = 0; i < num_nodes; i++)
416 node_sched_params_ptr nsp = &node_sched_params[i];
417 rtx reg_move = nsp->first_reg_move;
418 int j;
420 fprintf (file, "Node = %d; INSN = %d\n", i,
421 (INSN_UID (g->nodes[i].insn)));
422 fprintf (file, " asap = %d:\n", nsp->asap);
423 fprintf (file, " time = %d:\n", nsp->time);
424 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
425 for (j = 0; j < nsp->nreg_moves; j++)
427 fprintf (file, " reg_move = ");
428 print_rtl_single (file, reg_move);
429 reg_move = PREV_INSN (reg_move);
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
443 ii { 1 if not.
445 static struct undo_replace_buff_elem *
446 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
448 ddg_ptr g = ps->g;
449 int ii = ps->ii;
450 int i;
451 struct undo_replace_buff_elem *reg_move_replaces = NULL;
453 for (i = 0; i < g->num_nodes; i++)
455 ddg_node_ptr u = &g->nodes[i];
456 ddg_edge_ptr e;
457 int nreg_moves = 0, i_reg_move;
458 sbitmap *uses_of_defs;
459 rtx last_reg_move;
460 rtx prev_reg, old_reg;
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e = u->out; e; e = e->next_out)
465 if (e->type == TRUE_DEP && e->dest != e->src)
467 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
469 if (e->distance == 1)
470 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
475 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
476 nreg_moves4e--;
478 nreg_moves = MAX (nreg_moves, nreg_moves4e);
481 if (nreg_moves == 0)
482 continue;
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
487 register. */
488 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
489 sbitmap_vector_zero (uses_of_defs, nreg_moves);
490 for (e = u->out; e; e = e->next_out)
491 if (e->type == TRUE_DEP && e->dest != e->src)
493 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
495 if (e->distance == 1)
496 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 last_reg_move = u->insn;
511 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
513 unsigned int i_use = 0;
514 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
515 rtx reg_move = gen_move_insn (new_reg, prev_reg);
516 sbitmap_iterator sbi;
518 add_insn_before (reg_move, last_reg_move, NULL);
519 last_reg_move = reg_move;
521 if (!SCHED_FIRST_REG_MOVE (u))
522 SCHED_FIRST_REG_MOVE (u) = reg_move;
524 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
526 struct undo_replace_buff_elem *rep;
528 rep = (struct undo_replace_buff_elem *)
529 xcalloc (1, sizeof (struct undo_replace_buff_elem));
530 rep->insn = g->nodes[i_use].insn;
531 rep->orig_reg = old_reg;
532 rep->new_reg = new_reg;
534 if (! reg_move_replaces)
535 reg_move_replaces = rep;
536 else
538 rep->next = reg_move_replaces;
539 reg_move_replaces = rep;
542 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
543 if (rescan)
544 df_insn_rescan (g->nodes[i_use].insn);
547 prev_reg = new_reg;
549 sbitmap_vector_free (uses_of_defs);
551 return reg_move_replaces;
554 /* Free memory allocated for the undo buffer. */
555 static void
556 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
559 while (reg_move_replaces)
561 struct undo_replace_buff_elem *rep = reg_move_replaces;
563 reg_move_replaces = reg_move_replaces->next;
564 free (rep);
568 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
569 of SCHED_ROW and SCHED_STAGE. */
570 static void
571 normalize_sched_times (partial_schedule_ptr ps)
573 int row;
574 int amount = PS_MIN_CYCLE (ps);
575 int ii = ps->ii;
576 ps_insn_ptr crr_insn;
578 for (row = 0; row < ii; row++)
579 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
581 ddg_node_ptr u = crr_insn->node;
582 int normalized_time = SCHED_TIME (u) - amount;
584 if (dump_file)
585 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
586 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
587 (u), ps->min_cycle);
588 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
589 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
590 SCHED_TIME (u) = normalized_time;
591 SCHED_ROW (u) = normalized_time % ii;
592 SCHED_STAGE (u) = normalized_time / ii;
596 /* Set SCHED_COLUMN of each node according to its position in PS. */
597 static void
598 set_columns_for_ps (partial_schedule_ptr ps)
600 int row;
602 for (row = 0; row < ps->ii; row++)
604 ps_insn_ptr cur_insn = ps->rows[row];
605 int column = 0;
607 for (; cur_insn; cur_insn = cur_insn->next_in_row)
608 SCHED_COLUMN (cur_insn->node) = column++;
612 /* Permute the insns according to their order in PS, from row 0 to
613 row ii-1, and position them right before LAST. This schedules
614 the insns of the loop kernel. */
615 static void
616 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
618 int ii = ps->ii;
619 int row;
620 ps_insn_ptr ps_ij;
622 for (row = 0; row < ii ; row++)
623 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
624 if (PREV_INSN (last) != ps_ij->node->insn)
625 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
626 PREV_INSN (last));
629 static void
630 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
631 int to_stage, int for_prolog, rtx count_reg)
633 int row;
634 ps_insn_ptr ps_ij;
636 for (row = 0; row < ps->ii; row++)
637 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
639 ddg_node_ptr u_node = ps_ij->node;
640 int j, i_reg_moves;
641 rtx reg_move = NULL_RTX;
643 /* Do not duplicate any insn which refers to count_reg as it
644 belongs to the control part.
645 TODO: This should be done by analyzing the control part of
646 the loop. */
647 if (reg_mentioned_p (count_reg, u_node->insn))
648 continue;
650 if (for_prolog)
652 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
653 number of reg_moves starting with the second occurrence of
654 u_node, which is generated if its SCHED_STAGE <= to_stage. */
655 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
656 i_reg_moves = MAX (i_reg_moves, 0);
657 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
659 /* The reg_moves start from the *first* reg_move backwards. */
660 if (i_reg_moves)
662 reg_move = SCHED_FIRST_REG_MOVE (u_node);
663 for (j = 1; j < i_reg_moves; j++)
664 reg_move = PREV_INSN (reg_move);
667 else /* It's for the epilog. */
669 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
670 starting to decrease one stage after u_node no longer occurs;
671 that is, generate all reg_moves until
672 SCHED_STAGE (u_node) == from_stage - 1. */
673 i_reg_moves = SCHED_NREG_MOVES (u_node)
674 - (from_stage - SCHED_STAGE (u_node) - 1);
675 i_reg_moves = MAX (i_reg_moves, 0);
676 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
678 /* The reg_moves start from the *last* reg_move forwards. */
679 if (i_reg_moves)
681 reg_move = SCHED_FIRST_REG_MOVE (u_node);
682 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
683 reg_move = PREV_INSN (reg_move);
687 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
688 emit_insn (copy_rtx (PATTERN (reg_move)));
689 if (SCHED_STAGE (u_node) >= from_stage
690 && SCHED_STAGE (u_node) <= to_stage)
691 duplicate_insn_chain (u_node->first_note, u_node->insn);
696 /* Generate the instructions (including reg_moves) for prolog & epilog. */
697 static void
698 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
699 rtx count_reg, rtx count_init)
701 int i;
702 int last_stage = PS_STAGE_COUNT (ps) - 1;
703 edge e;
705 /* Generate the prolog, inserting its insns on the loop-entry edge. */
706 start_sequence ();
708 if (!count_init)
710 /* Generate instructions at the beginning of the prolog to
711 adjust the loop count by STAGE_COUNT. If loop count is constant
712 (count_init), this constant is adjusted by STAGE_COUNT in
713 generate_prolog_epilog function. */
714 rtx sub_reg = NULL_RTX;
716 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
717 count_reg, GEN_INT (last_stage),
718 count_reg, 1, OPTAB_DIRECT);
719 gcc_assert (REG_P (sub_reg));
720 if (REGNO (sub_reg) != REGNO (count_reg))
721 emit_move_insn (count_reg, sub_reg);
724 for (i = 0; i < last_stage; i++)
725 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
727 /* Put the prolog on the entry edge. */
728 e = loop_preheader_edge (loop);
729 split_edge_and_insert (e, get_insns ());
731 end_sequence ();
733 /* Generate the epilog, inserting its insns on the loop-exit edge. */
734 start_sequence ();
736 for (i = 0; i < last_stage; i++)
737 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
739 /* Put the epilogue on the exit edge. */
740 gcc_assert (single_exit (loop));
741 e = single_exit (loop);
742 split_edge_and_insert (e, get_insns ());
743 end_sequence ();
746 /* Return true if all the BBs of the loop are empty except the
747 loop header. */
748 static bool
749 loop_single_full_bb_p (struct loop *loop)
751 unsigned i;
752 basic_block *bbs = get_loop_body (loop);
754 for (i = 0; i < loop->num_nodes ; i++)
756 rtx head, tail;
757 bool empty_bb = true;
759 if (bbs[i] == loop->header)
760 continue;
762 /* Make sure that basic blocks other than the header
763 have only notes labels or jumps. */
764 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
765 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
767 if (NOTE_P (head) || LABEL_P (head)
768 || (INSN_P (head) && JUMP_P (head)))
769 continue;
770 empty_bb = false;
771 break;
774 if (! empty_bb)
776 free (bbs);
777 return false;
780 free (bbs);
781 return true;
784 /* A simple loop from SMS point of view; it is a loop that is composed of
785 either a single basic block or two BBs - a header and a latch. */
786 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
787 && (EDGE_COUNT (loop->latch->preds) == 1) \
788 && (EDGE_COUNT (loop->latch->succs) == 1))
790 /* Return true if the loop is in its canonical form and false if not.
791 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
792 static bool
793 loop_canon_p (struct loop *loop)
796 if (loop->inner || !loop_outer (loop))
797 return false;
799 if (!single_exit (loop))
801 if (dump_file)
803 rtx insn = BB_END (loop->header);
805 fprintf (dump_file, "SMS loop many exits ");
806 fprintf (dump_file, " %s %d (file, line)\n",
807 insn_file (insn), insn_line (insn));
809 return false;
812 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
814 if (dump_file)
816 rtx insn = BB_END (loop->header);
818 fprintf (dump_file, "SMS loop many BBs. ");
819 fprintf (dump_file, " %s %d (file, line)\n",
820 insn_file (insn), insn_line (insn));
822 return false;
825 return true;
828 /* If there are more than one entry for the loop,
829 make it one by splitting the first entry edge and
830 redirecting the others to the new BB. */
831 static void
832 canon_loop (struct loop *loop)
834 edge e;
835 edge_iterator i;
837 /* Avoid annoying special cases of edges going to exit
838 block. */
839 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
840 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
841 split_edge (e);
843 if (loop->latch == loop->header
844 || EDGE_COUNT (loop->latch->succs) > 1)
846 FOR_EACH_EDGE (e, i, loop->header->preds)
847 if (e->src == loop->latch)
848 break;
849 split_edge (e);
853 /* Probability in % that the sms-ed loop rolls enough so that optimized
854 version may be entered. Just a guess. */
855 #define PROB_SMS_ENOUGH_ITERATIONS 80
857 /* Used to calculate the upper bound of ii. */
858 #define MAXII_FACTOR 2
860 /* Main entry point, perform SMS scheduling on the loops of the function
861 that consist of single basic blocks. */
862 static void
863 sms_schedule (void)
865 rtx insn;
866 ddg_ptr *g_arr, g;
867 int * node_order;
868 int maxii, max_asap;
869 loop_iterator li;
870 partial_schedule_ptr ps;
871 basic_block bb = NULL;
872 struct loop *loop;
873 basic_block condition_bb = NULL;
874 edge latch_edge;
875 gcov_type trip_count = 0;
877 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
878 | LOOPS_HAVE_RECORDED_EXITS);
879 if (number_of_loops () <= 1)
881 loop_optimizer_finalize ();
882 return; /* There are no loops to schedule. */
885 /* Initialize issue_rate. */
886 if (targetm.sched.issue_rate)
888 int temp = reload_completed;
890 reload_completed = 1;
891 issue_rate = targetm.sched.issue_rate ();
892 reload_completed = temp;
894 else
895 issue_rate = 1;
897 /* Initialize the scheduler. */
898 current_sched_info = &sms_sched_info;
900 /* Init Data Flow analysis, to be used in interloop dep calculation. */
901 df_set_flags (DF_LR_RUN_DCE);
902 df_rd_add_problem ();
903 df_note_add_problem ();
904 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
905 df_analyze ();
906 regstat_compute_calls_crossed ();
907 sched_init ();
909 /* Allocate memory to hold the DDG array one entry for each loop.
910 We use loop->num as index into this array. */
911 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
913 /* Build DDGs for all the relevant loops and hold them in G_ARR
914 indexed by the loop index. */
915 FOR_EACH_LOOP (li, loop, 0)
917 rtx head, tail;
918 rtx count_reg;
920 /* For debugging. */
921 if (dbg_cnt (sms_sched_loop) == false)
923 if (dump_file)
924 fprintf (dump_file, "SMS reached max limit... \n");
926 break;
929 if (! loop_canon_p (loop))
930 continue;
932 if (! loop_single_full_bb_p (loop))
933 continue;
935 bb = loop->header;
937 get_ebb_head_tail (bb, bb, &head, &tail);
938 latch_edge = loop_latch_edge (loop);
939 gcc_assert (single_exit (loop));
940 if (single_exit (loop)->count)
941 trip_count = latch_edge->count / single_exit (loop)->count;
943 /* Perfrom SMS only on loops that their average count is above threshold. */
945 if ( latch_edge->count
946 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
948 if (dump_file)
950 fprintf (dump_file, " %s %d (file, line)\n",
951 insn_file (tail), insn_line (tail));
952 fprintf (dump_file, "SMS single-bb-loop\n");
953 if (profile_info && flag_branch_probabilities)
955 fprintf (dump_file, "SMS loop-count ");
956 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
957 (HOST_WIDEST_INT) bb->count);
958 fprintf (dump_file, "\n");
959 fprintf (dump_file, "SMS trip-count ");
960 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
961 (HOST_WIDEST_INT) trip_count);
962 fprintf (dump_file, "\n");
963 fprintf (dump_file, "SMS profile-sum-max ");
964 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
965 (HOST_WIDEST_INT) profile_info->sum_max);
966 fprintf (dump_file, "\n");
969 continue;
972 /* Make sure this is a doloop. */
973 if ( !(count_reg = doloop_register_get (head, tail)))
974 continue;
976 /* Don't handle BBs with calls or barriers, or !single_set insns,
977 or auto-increment insns (to avoid creating invalid reg-moves
978 for the auto-increment insns).
979 ??? Should handle auto-increment insns.
980 ??? Should handle insns defining subregs. */
981 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
983 rtx set;
985 if (CALL_P (insn)
986 || BARRIER_P (insn)
987 || (INSN_P (insn) && !JUMP_P (insn)
988 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
989 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
990 || (INSN_P (insn) && (set = single_set (insn))
991 && GET_CODE (SET_DEST (set)) == SUBREG))
992 break;
995 if (insn != NEXT_INSN (tail))
997 if (dump_file)
999 if (CALL_P (insn))
1000 fprintf (dump_file, "SMS loop-with-call\n");
1001 else if (BARRIER_P (insn))
1002 fprintf (dump_file, "SMS loop-with-barrier\n");
1003 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1004 fprintf (dump_file, "SMS reg inc\n");
1005 else if ((INSN_P (insn) && !JUMP_P (insn)
1006 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1007 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1008 else
1009 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1010 print_rtl_single (dump_file, insn);
1013 continue;
1016 if (! (g = create_ddg (bb, 0)))
1018 if (dump_file)
1019 fprintf (dump_file, "SMS doloop\n");
1020 continue;
1023 g_arr[loop->num] = g;
1026 /* We don't want to perform SMS on new loops - created by versioning. */
1027 FOR_EACH_LOOP (li, loop, 0)
1029 rtx head, tail;
1030 rtx count_reg, count_init;
1031 int mii, rec_mii;
1032 unsigned stage_count = 0;
1033 HOST_WIDEST_INT loop_count = 0;
1035 if (! (g = g_arr[loop->num]))
1036 continue;
1038 if (dump_file)
1039 print_ddg (dump_file, g);
1041 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1043 latch_edge = loop_latch_edge (loop);
1044 gcc_assert (single_exit (loop));
1045 if (single_exit (loop)->count)
1046 trip_count = latch_edge->count / single_exit (loop)->count;
1048 if (dump_file)
1050 fprintf (dump_file, " %s %d (file, line)\n",
1051 insn_file (tail), insn_line (tail));
1052 fprintf (dump_file, "SMS single-bb-loop\n");
1053 if (profile_info && flag_branch_probabilities)
1055 fprintf (dump_file, "SMS loop-count ");
1056 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1057 (HOST_WIDEST_INT) bb->count);
1058 fprintf (dump_file, "\n");
1059 fprintf (dump_file, "SMS profile-sum-max ");
1060 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1061 (HOST_WIDEST_INT) profile_info->sum_max);
1062 fprintf (dump_file, "\n");
1064 fprintf (dump_file, "SMS doloop\n");
1065 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1066 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1067 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1071 /* In case of th loop have doloop register it gets special
1072 handling. */
1073 count_init = NULL_RTX;
1074 if ((count_reg = doloop_register_get (head, tail)))
1076 basic_block pre_header;
1078 pre_header = loop_preheader_edge (loop)->src;
1079 count_init = const_iteration_count (count_reg, pre_header,
1080 &loop_count);
1082 gcc_assert (count_reg);
1084 if (dump_file && count_init)
1086 fprintf (dump_file, "SMS const-doloop ");
1087 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1088 loop_count);
1089 fprintf (dump_file, "\n");
1092 node_order = XNEWVEC (int, g->num_nodes);
1094 mii = 1; /* Need to pass some estimate of mii. */
1095 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1096 mii = MAX (res_MII (g), rec_mii);
1097 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1099 if (dump_file)
1100 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1101 rec_mii, mii, maxii);
1103 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1104 ASAP. */
1105 set_node_sched_params (g);
1107 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1109 if (ps)
1110 stage_count = PS_STAGE_COUNT (ps);
1112 /* Stage count of 1 means that there is no interleaving between
1113 iterations, let the scheduling passes do the job. */
1114 if (stage_count < 1
1115 || (count_init && (loop_count <= stage_count))
1116 || (flag_branch_probabilities && (trip_count <= stage_count)))
1118 if (dump_file)
1120 fprintf (dump_file, "SMS failed... \n");
1121 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1122 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1123 fprintf (dump_file, ", trip-count=");
1124 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1125 fprintf (dump_file, ")\n");
1127 continue;
1129 else
1131 struct undo_replace_buff_elem *reg_move_replaces;
1133 if (dump_file)
1135 fprintf (dump_file,
1136 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1137 stage_count);
1138 print_partial_schedule (ps, dump_file);
1139 fprintf (dump_file,
1140 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1141 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1144 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1145 the closing_branch was scheduled and should appear in the last (ii-1)
1146 row. Otherwise, we are free to schedule the branch, and we let nodes
1147 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1148 row; this should reduce stage_count to minimum.
1149 TODO: Revisit the issue of scheduling the insns of the
1150 control part relative to the branch when the control part
1151 has more than one insn. */
1152 normalize_sched_times (ps);
1153 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1154 set_columns_for_ps (ps);
1156 canon_loop (loop);
1158 /* case the BCT count is not known , Do loop-versioning */
1159 if (count_reg && ! count_init)
1161 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1162 GEN_INT(stage_count));
1163 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1164 * REG_BR_PROB_BASE) / 100;
1166 loop_version (loop, comp_rtx, &condition_bb,
1167 prob, prob, REG_BR_PROB_BASE - prob,
1168 true);
1171 /* Set new iteration count of loop kernel. */
1172 if (count_reg && count_init)
1173 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1174 - stage_count + 1);
1176 /* Now apply the scheduled kernel to the RTL of the loop. */
1177 permute_partial_schedule (ps, g->closing_branch->first_note);
1179 /* Mark this loop as software pipelined so the later
1180 scheduling passes doesn't touch it. */
1181 if (! flag_resched_modulo_sched)
1182 g->bb->flags |= BB_DISABLE_SCHEDULE;
1183 /* The life-info is not valid any more. */
1184 df_set_bb_dirty (g->bb);
1186 reg_move_replaces = generate_reg_moves (ps, true);
1187 if (dump_file)
1188 print_node_sched_params (dump_file, g->num_nodes, g);
1189 /* Generate prolog and epilog. */
1190 generate_prolog_epilog (ps, loop, count_reg, count_init);
1192 free_undo_replace_buff (reg_move_replaces);
1195 free_partial_schedule (ps);
1196 free (node_sched_params);
1197 free (node_order);
1198 free_ddg (g);
1201 regstat_free_calls_crossed ();
1202 free (g_arr);
1204 /* Release scheduler data, needed until now because of DFA. */
1205 sched_finish ();
1206 loop_optimizer_finalize ();
1209 /* The SMS scheduling algorithm itself
1210 -----------------------------------
1211 Input: 'O' an ordered list of insns of a loop.
1212 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1214 'Q' is the empty Set
1215 'PS' is the partial schedule; it holds the currently scheduled nodes with
1216 their cycle/slot.
1217 'PSP' previously scheduled predecessors.
1218 'PSS' previously scheduled successors.
1219 't(u)' the cycle where u is scheduled.
1220 'l(u)' is the latency of u.
1221 'd(v,u)' is the dependence distance from v to u.
1222 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1223 the node ordering phase.
1224 'check_hardware_resources_conflicts(u, PS, c)'
1225 run a trace around cycle/slot through DFA model
1226 to check resource conflicts involving instruction u
1227 at cycle c given the partial schedule PS.
1228 'add_to_partial_schedule_at_time(u, PS, c)'
1229 Add the node/instruction u to the partial schedule
1230 PS at time c.
1231 'calculate_register_pressure(PS)'
1232 Given a schedule of instructions, calculate the register
1233 pressure it implies. One implementation could be the
1234 maximum number of overlapping live ranges.
1235 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1236 registers available in the hardware.
1238 1. II = MII.
1239 2. PS = empty list
1240 3. for each node u in O in pre-computed order
1241 4. if (PSP(u) != Q && PSS(u) == Q) then
1242 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1243 6. start = Early_start; end = Early_start + II - 1; step = 1
1244 11. else if (PSP(u) == Q && PSS(u) != Q) then
1245 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1246 13. start = Late_start; end = Late_start - II + 1; step = -1
1247 14. else if (PSP(u) != Q && PSS(u) != Q) then
1248 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1249 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1250 17. start = Early_start;
1251 18. end = min(Early_start + II - 1 , Late_start);
1252 19. step = 1
1253 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1254 21. start = ASAP(u); end = start + II - 1; step = 1
1255 22. endif
1257 23. success = false
1258 24. for (c = start ; c != end ; c += step)
1259 25. if check_hardware_resources_conflicts(u, PS, c) then
1260 26. add_to_partial_schedule_at_time(u, PS, c)
1261 27. success = true
1262 28. break
1263 29. endif
1264 30. endfor
1265 31. if (success == false) then
1266 32. II = II + 1
1267 33. if (II > maxII) then
1268 34. finish - failed to schedule
1269 35. endif
1270 36. goto 2.
1271 37. endif
1272 38. endfor
1273 39. if (calculate_register_pressure(PS) > maxRP) then
1274 40. goto 32.
1275 41. endif
1276 42. compute epilogue & prologue
1277 43. finish - succeeded to schedule
1280 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1281 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1282 set to 0 to save compile time. */
1283 #define DFA_HISTORY SMS_DFA_HISTORY
1285 /* A threshold for the number of repeated unsuccessful attempts to insert
1286 an empty row, before we flush the partial schedule and start over. */
1287 #define MAX_SPLIT_NUM 10
1288 /* Given the partial schedule PS, this function calculates and returns the
1289 cycles in which we can schedule the node with the given index I.
1290 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1291 noticed that there are several cases in which we fail to SMS the loop
1292 because the sched window of a node is empty due to tight data-deps. In
1293 such cases we want to unschedule some of the predecessors/successors
1294 until we get non-empty scheduling window. It returns -1 if the
1295 scheduling window is empty and zero otherwise. */
1297 static int
1298 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1299 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1301 int start, step, end;
1302 ddg_edge_ptr e;
1303 int u = nodes_order [i];
1304 ddg_node_ptr u_node = &ps->g->nodes[u];
1305 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1306 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1307 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1308 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1309 int psp_not_empty;
1310 int pss_not_empty;
1312 /* 1. compute sched window for u (start, end, step). */
1313 sbitmap_zero (psp);
1314 sbitmap_zero (pss);
1315 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1316 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1318 if (psp_not_empty && !pss_not_empty)
1320 int early_start = INT_MIN;
1322 end = INT_MAX;
1323 for (e = u_node->in; e != 0; e = e->next_in)
1325 ddg_node_ptr v_node = e->src;
1327 if (dump_file)
1329 fprintf (dump_file, "\nProcessing edge: ");
1330 print_ddg_edge (dump_file, e);
1331 fprintf (dump_file,
1332 "\nScheduling %d (%d) in psp_not_empty,"
1333 " checking p %d (%d): ", u_node->cuid,
1334 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1335 (v_node->insn));
1338 if (TEST_BIT (sched_nodes, v_node->cuid))
1340 int p_st = SCHED_TIME (v_node);
1342 early_start =
1343 MAX (early_start, p_st + e->latency - (e->distance * ii));
1345 if (dump_file)
1346 fprintf (dump_file,
1347 "pred st = %d; early_start = %d; latency: %d",
1348 p_st, early_start, e->latency);
1350 if (e->data_type == MEM_DEP)
1351 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1353 else if (dump_file)
1354 fprintf (dump_file, "the node is not scheduled\n");
1356 start = early_start;
1357 end = MIN (end, early_start + ii);
1358 /* Schedule the node close to it's predecessors. */
1359 step = 1;
1361 if (dump_file)
1362 fprintf (dump_file,
1363 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1364 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1367 else if (!psp_not_empty && pss_not_empty)
1369 int late_start = INT_MAX;
1371 end = INT_MIN;
1372 for (e = u_node->out; e != 0; e = e->next_out)
1374 ddg_node_ptr v_node = e->dest;
1376 if (dump_file)
1378 fprintf (dump_file, "\nProcessing edge:");
1379 print_ddg_edge (dump_file, e);
1380 fprintf (dump_file,
1381 "\nScheduling %d (%d) in pss_not_empty,"
1382 " checking s %d (%d): ", u_node->cuid,
1383 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1384 (v_node->insn));
1387 if (TEST_BIT (sched_nodes, v_node->cuid))
1389 int s_st = SCHED_TIME (v_node);
1391 late_start = MIN (late_start,
1392 s_st - e->latency + (e->distance * ii));
1394 if (dump_file)
1395 fprintf (dump_file,
1396 "succ st = %d; late_start = %d; latency = %d",
1397 s_st, late_start, e->latency);
1399 if (e->data_type == MEM_DEP)
1400 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1401 if (dump_file)
1402 fprintf (dump_file, "end = %d\n", end);
1405 else if (dump_file)
1406 fprintf (dump_file, "the node is not scheduled\n");
1409 start = late_start;
1410 end = MAX (end, late_start - ii);
1411 /* Schedule the node close to it's successors. */
1412 step = -1;
1414 if (dump_file)
1415 fprintf (dump_file,
1416 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1417 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1421 else if (psp_not_empty && pss_not_empty)
1423 int early_start = INT_MIN;
1424 int late_start = INT_MAX;
1425 int count_preds = 0;
1426 int count_succs = 0;
1428 start = INT_MIN;
1429 end = INT_MAX;
1430 for (e = u_node->in; e != 0; e = e->next_in)
1432 ddg_node_ptr v_node = e->src;
1434 if (dump_file)
1436 fprintf (dump_file, "\nProcessing edge:");
1437 print_ddg_edge (dump_file, e);
1438 fprintf (dump_file,
1439 "\nScheduling %d (%d) in psp_pss_not_empty,"
1440 " checking p %d (%d): ", u_node->cuid, INSN_UID
1441 (u_node->insn), v_node->cuid, INSN_UID
1442 (v_node->insn));
1445 if (TEST_BIT (sched_nodes, v_node->cuid))
1447 int p_st = SCHED_TIME (v_node);
1449 early_start = MAX (early_start,
1450 p_st + e->latency
1451 - (e->distance * ii));
1453 if (dump_file)
1454 fprintf (dump_file,
1455 "pred st = %d; early_start = %d; latency = %d",
1456 p_st, early_start, e->latency);
1458 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1459 count_preds++;
1461 if (e->data_type == MEM_DEP)
1462 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1464 else if (dump_file)
1465 fprintf (dump_file, "the node is not scheduled\n");
1468 for (e = u_node->out; e != 0; e = e->next_out)
1470 ddg_node_ptr v_node = e->dest;
1472 if (dump_file)
1474 fprintf (dump_file, "\nProcessing edge:");
1475 print_ddg_edge (dump_file, e);
1476 fprintf (dump_file,
1477 "\nScheduling %d (%d) in psp_pss_not_empty,"
1478 " checking s %d (%d): ", u_node->cuid, INSN_UID
1479 (u_node->insn), v_node->cuid, INSN_UID
1480 (v_node->insn));
1483 if (TEST_BIT (sched_nodes, v_node->cuid))
1485 int s_st = SCHED_TIME (v_node);
1487 late_start = MIN (late_start,
1488 s_st - e->latency
1489 + (e->distance * ii));
1491 if (dump_file)
1492 fprintf (dump_file,
1493 "succ st = %d; late_start = %d; latency = %d",
1494 s_st, late_start, e->latency);
1496 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1497 count_succs++;
1499 if (e->data_type == MEM_DEP)
1500 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1502 else if (dump_file)
1503 fprintf (dump_file, "the node is not scheduled\n");
1506 start = MAX (start, early_start);
1507 end = MIN (end, MIN (early_start + ii, late_start + 1));
1508 step = 1;
1509 /* If there are more successors than predecessors schedule the
1510 node close to it's successors. */
1511 if (count_succs >= count_preds)
1513 int old_start = start;
1515 start = end - 1;
1516 end = old_start - 1;
1517 step = -1;
1520 else /* psp is empty && pss is empty. */
1522 start = SCHED_ASAP (u_node);
1523 end = start + ii;
1524 step = 1;
1527 *start_p = start;
1528 *step_p = step;
1529 *end_p = end;
1530 sbitmap_free (psp);
1531 sbitmap_free (pss);
1533 if ((start >= end && step == 1) || (start <= end && step == -1))
1535 if (dump_file)
1536 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1537 start, end, step);
1538 return -1;
1541 return 0;
1544 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1545 node currently been scheduled. At the end of the calculation
1546 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
1547 which are in SCHED_NODES (already scheduled nodes) and scheduled at
1548 the same row as the first/last row of U_NODE's scheduling window.
1549 The first and last rows are calculated using the following paramaters:
1550 START/END rows - The cycles that begins/ends the traversal on the window;
1551 searching for an empty cycle to schedule U_NODE.
1552 STEP - The direction in which we traverse the window.
1553 II - The initiation interval.
1554 TODO: We can add an insn to the must_precede/must_follow bitmap only
1555 if it has tight dependence to U and they are both scheduled in the
1556 same row. The current check is more conservative and content with
1557 the fact that both U and the insn are scheduled in the same row. */
1559 static void
1560 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1561 int step, int ii, sbitmap sched_nodes,
1562 sbitmap must_precede, sbitmap must_follow)
1564 ddg_edge_ptr e;
1565 int first_cycle_in_window, last_cycle_in_window;
1566 int first_row_in_window, last_row_in_window;
1568 gcc_assert (must_precede && must_follow);
1570 /* Consider the following scheduling window:
1571 {first_cycle_in_window, first_cycle_in_window+1, ...,
1572 last_cycle_in_window}. If step is 1 then the following will be
1573 the order we traverse the window: {start=first_cycle_in_window,
1574 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1575 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1576 end=first_cycle_in_window-1} if step is -1. */
1577 first_cycle_in_window = (step == 1) ? start : end - step;
1578 last_cycle_in_window = (step == 1) ? end - step : start;
1580 first_row_in_window = SMODULO (first_cycle_in_window, ii);
1581 last_row_in_window = SMODULO (last_cycle_in_window, ii);
1583 sbitmap_zero (must_precede);
1584 sbitmap_zero (must_follow);
1586 if (dump_file)
1587 fprintf (dump_file, "\nmust_precede: ");
1589 for (e = u_node->in; e != 0; e = e->next_in)
1590 if (TEST_BIT (sched_nodes, e->src->cuid)
1591 && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
1593 if (dump_file)
1594 fprintf (dump_file, "%d ", e->src->cuid);
1596 SET_BIT (must_precede, e->src->cuid);
1599 if (dump_file)
1600 fprintf (dump_file, "\nmust_follow: ");
1602 for (e = u_node->out; e != 0; e = e->next_out)
1603 if (TEST_BIT (sched_nodes, e->dest->cuid)
1604 && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
1606 if (dump_file)
1607 fprintf (dump_file, "%d ", e->dest->cuid);
1609 SET_BIT (must_follow, e->dest->cuid);
1612 if (dump_file)
1613 fprintf (dump_file, "\n");
1616 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1617 parameters to decide if that's possible:
1618 PS - The partial schedule.
1619 U - The serial number of U_NODE.
1620 NUM_SPLITS - The number of row spilts made so far.
1621 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1622 the first row of the scheduling window)
1623 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1624 last row of the scheduling window) */
1626 static bool
1627 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1628 int u, int row, sbitmap sched_nodes,
1629 int *num_splits, sbitmap must_precede,
1630 sbitmap must_follow)
1632 ps_insn_ptr psi;
1633 bool success = 0;
1635 verify_partial_schedule (ps, sched_nodes);
1636 psi = ps_add_node_check_conflicts (ps, u_node, row,
1637 must_precede, must_follow);
1638 if (psi)
1640 SCHED_TIME (u_node) = row;
1641 SET_BIT (sched_nodes, u);
1642 success = 1;
1643 *num_splits = 0;
1644 if (dump_file)
1645 fprintf (dump_file, "Scheduled w/o split in %d\n", row);
1649 return success;
1652 /* This function implements the scheduling algorithm for SMS according to the
1653 above algorithm. */
1654 static partial_schedule_ptr
1655 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1657 int ii = mii;
1658 int i, c, success, num_splits = 0;
1659 int flush_and_start_over = true;
1660 int num_nodes = g->num_nodes;
1661 int start, end, step; /* Place together into one struct? */
1662 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1663 sbitmap must_precede = sbitmap_alloc (num_nodes);
1664 sbitmap must_follow = sbitmap_alloc (num_nodes);
1665 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1667 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1669 sbitmap_ones (tobe_scheduled);
1670 sbitmap_zero (sched_nodes);
1672 while (flush_and_start_over && (ii < maxii))
1675 if (dump_file)
1676 fprintf (dump_file, "Starting with ii=%d\n", ii);
1677 flush_and_start_over = false;
1678 sbitmap_zero (sched_nodes);
1680 for (i = 0; i < num_nodes; i++)
1682 int u = nodes_order[i];
1683 ddg_node_ptr u_node = &ps->g->nodes[u];
1684 rtx insn = u_node->insn;
1686 if (!INSN_P (insn))
1688 RESET_BIT (tobe_scheduled, u);
1689 continue;
1692 if (JUMP_P (insn)) /* Closing branch handled later. */
1694 RESET_BIT (tobe_scheduled, u);
1695 continue;
1698 if (TEST_BIT (sched_nodes, u))
1699 continue;
1701 /* Try to get non-empty scheduling window. */
1702 success = 0;
1703 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1704 &step, &end) == 0)
1706 if (dump_file)
1707 fprintf (dump_file, "\nTrying to schedule node %d \
1708 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1709 (g->nodes[u].insn)), start, end, step);
1711 gcc_assert ((step > 0 && start < end)
1712 || (step < 0 && start > end));
1714 calculate_must_precede_follow (u_node, start, end, step, ii,
1715 sched_nodes, must_precede,
1716 must_follow);
1718 for (c = start; c != end; c += step)
1720 sbitmap tmp_precede = NULL;
1721 sbitmap tmp_follow = NULL;
1723 if (c == start)
1725 if (step == 1)
1726 tmp_precede = must_precede;
1727 else /* step == -1. */
1728 tmp_follow = must_follow;
1730 if (c == end - step)
1732 if (step == 1)
1733 tmp_follow = must_follow;
1734 else /* step == -1. */
1735 tmp_precede = must_precede;
1738 success =
1739 try_scheduling_node_in_cycle (ps, u_node, u, c,
1740 sched_nodes,
1741 &num_splits, tmp_precede,
1742 tmp_follow);
1743 if (success)
1744 break;
1747 verify_partial_schedule (ps, sched_nodes);
1749 if (!success)
1751 int split_row;
1753 if (ii++ == maxii)
1754 break;
1756 if (num_splits >= MAX_SPLIT_NUM)
1758 num_splits = 0;
1759 flush_and_start_over = true;
1760 verify_partial_schedule (ps, sched_nodes);
1761 reset_partial_schedule (ps, ii);
1762 verify_partial_schedule (ps, sched_nodes);
1763 break;
1766 num_splits++;
1767 if (step == 1)
1768 split_row = compute_split_row (sched_nodes, start, end,
1769 ps->ii, u_node);
1770 else
1771 split_row = compute_split_row (sched_nodes, end, start,
1772 ps->ii, u_node);
1774 ps_insert_empty_row (ps, split_row, sched_nodes);
1775 i--; /* Go back and retry node i. */
1777 if (dump_file)
1778 fprintf (dump_file, "num_splits=%d\n", num_splits);
1781 /* ??? If (success), check register pressure estimates. */
1782 } /* Continue with next node. */
1783 } /* While flush_and_start_over. */
1784 if (ii >= maxii)
1786 free_partial_schedule (ps);
1787 ps = NULL;
1789 else
1790 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1792 sbitmap_free (sched_nodes);
1793 sbitmap_free (must_precede);
1794 sbitmap_free (must_follow);
1795 sbitmap_free (tobe_scheduled);
1797 return ps;
1800 /* This function inserts a new empty row into PS at the position
1801 according to SPLITROW, keeping all already scheduled instructions
1802 intact and updating their SCHED_TIME and cycle accordingly. */
1803 static void
1804 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1805 sbitmap sched_nodes)
1807 ps_insn_ptr crr_insn;
1808 ps_insn_ptr *rows_new;
1809 int ii = ps->ii;
1810 int new_ii = ii + 1;
1811 int row;
1813 verify_partial_schedule (ps, sched_nodes);
1815 /* We normalize sched_time and rotate ps to have only non-negative sched
1816 times, for simplicity of updating cycles after inserting new row. */
1817 split_row -= ps->min_cycle;
1818 split_row = SMODULO (split_row, ii);
1819 if (dump_file)
1820 fprintf (dump_file, "split_row=%d\n", split_row);
1822 normalize_sched_times (ps);
1823 rotate_partial_schedule (ps, ps->min_cycle);
1825 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1826 for (row = 0; row < split_row; row++)
1828 rows_new[row] = ps->rows[row];
1829 ps->rows[row] = NULL;
1830 for (crr_insn = rows_new[row];
1831 crr_insn; crr_insn = crr_insn->next_in_row)
1833 ddg_node_ptr u = crr_insn->node;
1834 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1836 SCHED_TIME (u) = new_time;
1837 crr_insn->cycle = new_time;
1838 SCHED_ROW (u) = new_time % new_ii;
1839 SCHED_STAGE (u) = new_time / new_ii;
1844 rows_new[split_row] = NULL;
1846 for (row = split_row; row < ii; row++)
1848 rows_new[row + 1] = ps->rows[row];
1849 ps->rows[row] = NULL;
1850 for (crr_insn = rows_new[row + 1];
1851 crr_insn; crr_insn = crr_insn->next_in_row)
1853 ddg_node_ptr u = crr_insn->node;
1854 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1856 SCHED_TIME (u) = new_time;
1857 crr_insn->cycle = new_time;
1858 SCHED_ROW (u) = new_time % new_ii;
1859 SCHED_STAGE (u) = new_time / new_ii;
1863 /* Updating ps. */
1864 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1865 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1866 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1867 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1868 free (ps->rows);
1869 ps->rows = rows_new;
1870 ps->ii = new_ii;
1871 gcc_assert (ps->min_cycle >= 0);
1873 verify_partial_schedule (ps, sched_nodes);
1875 if (dump_file)
1876 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1877 ps->max_cycle);
1880 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1881 UP which are the boundaries of it's scheduling window; compute using
1882 SCHED_NODES and II a row in the partial schedule that can be split
1883 which will separate a critical predecessor from a critical successor
1884 thereby expanding the window, and return it. */
1885 static int
1886 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1887 ddg_node_ptr u_node)
1889 ddg_edge_ptr e;
1890 int lower = INT_MIN, upper = INT_MAX;
1891 ddg_node_ptr crit_pred = NULL;
1892 ddg_node_ptr crit_succ = NULL;
1893 int crit_cycle;
1895 for (e = u_node->in; e != 0; e = e->next_in)
1897 ddg_node_ptr v_node = e->src;
1899 if (TEST_BIT (sched_nodes, v_node->cuid)
1900 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1901 if (SCHED_TIME (v_node) > lower)
1903 crit_pred = v_node;
1904 lower = SCHED_TIME (v_node);
1908 if (crit_pred != NULL)
1910 crit_cycle = SCHED_TIME (crit_pred) + 1;
1911 return SMODULO (crit_cycle, ii);
1914 for (e = u_node->out; e != 0; e = e->next_out)
1916 ddg_node_ptr v_node = e->dest;
1917 if (TEST_BIT (sched_nodes, v_node->cuid)
1918 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1919 if (SCHED_TIME (v_node) < upper)
1921 crit_succ = v_node;
1922 upper = SCHED_TIME (v_node);
1926 if (crit_succ != NULL)
1928 crit_cycle = SCHED_TIME (crit_succ);
1929 return SMODULO (crit_cycle, ii);
1932 if (dump_file)
1933 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1935 return SMODULO ((low + up + 1) / 2, ii);
1938 static void
1939 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
1941 int row;
1942 ps_insn_ptr crr_insn;
1944 for (row = 0; row < ps->ii; row++)
1945 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1947 ddg_node_ptr u = crr_insn->node;
1949 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1950 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1951 popcount (sched_nodes) == number of insns in ps. */
1952 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1953 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
1958 /* This page implements the algorithm for ordering the nodes of a DDG
1959 for modulo scheduling, activated through the
1960 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1962 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1963 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1964 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1965 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1966 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1967 #define DEPTH(x) (ASAP ((x)))
1969 typedef struct node_order_params * nopa;
1971 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1972 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1973 static nopa calculate_order_params (ddg_ptr, int, int *);
1974 static int find_max_asap (ddg_ptr, sbitmap);
1975 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1976 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1978 enum sms_direction {BOTTOMUP, TOPDOWN};
1980 struct node_order_params
1982 int asap;
1983 int alap;
1984 int height;
1987 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1988 static void
1989 check_nodes_order (int *node_order, int num_nodes)
1991 int i;
1992 sbitmap tmp = sbitmap_alloc (num_nodes);
1994 sbitmap_zero (tmp);
1996 if (dump_file)
1997 fprintf (dump_file, "SMS final nodes order: \n");
1999 for (i = 0; i < num_nodes; i++)
2001 int u = node_order[i];
2003 if (dump_file)
2004 fprintf (dump_file, "%d ", u);
2005 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2007 SET_BIT (tmp, u);
2010 if (dump_file)
2011 fprintf (dump_file, "\n");
2013 sbitmap_free (tmp);
2016 /* Order the nodes of G for scheduling and pass the result in
2017 NODE_ORDER. Also set aux.count of each node to ASAP.
2018 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2019 static int
2020 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2022 int i;
2023 int rec_mii = 0;
2024 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2026 nopa nops = calculate_order_params (g, mii, pmax_asap);
2028 if (dump_file)
2029 print_sccs (dump_file, sccs, g);
2031 order_nodes_of_sccs (sccs, node_order);
2033 if (sccs->num_sccs > 0)
2034 /* First SCC has the largest recurrence_length. */
2035 rec_mii = sccs->sccs[0]->recurrence_length;
2037 /* Save ASAP before destroying node_order_params. */
2038 for (i = 0; i < g->num_nodes; i++)
2040 ddg_node_ptr v = &g->nodes[i];
2041 v->aux.count = ASAP (v);
2044 free (nops);
2045 free_ddg_all_sccs (sccs);
2046 check_nodes_order (node_order, g->num_nodes);
2048 return rec_mii;
2051 static void
2052 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2054 int i, pos = 0;
2055 ddg_ptr g = all_sccs->ddg;
2056 int num_nodes = g->num_nodes;
2057 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2058 sbitmap on_path = sbitmap_alloc (num_nodes);
2059 sbitmap tmp = sbitmap_alloc (num_nodes);
2060 sbitmap ones = sbitmap_alloc (num_nodes);
2062 sbitmap_zero (prev_sccs);
2063 sbitmap_ones (ones);
2065 /* Perfrom the node ordering starting from the SCC with the highest recMII.
2066 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2067 for (i = 0; i < all_sccs->num_sccs; i++)
2069 ddg_scc_ptr scc = all_sccs->sccs[i];
2071 /* Add nodes on paths from previous SCCs to the current SCC. */
2072 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2073 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2075 /* Add nodes on paths from the current SCC to previous SCCs. */
2076 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2077 sbitmap_a_or_b (tmp, tmp, on_path);
2079 /* Remove nodes of previous SCCs from current extended SCC. */
2080 sbitmap_difference (tmp, tmp, prev_sccs);
2082 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2083 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2086 /* Handle the remaining nodes that do not belong to any scc. Each call
2087 to order_nodes_in_scc handles a single connected component. */
2088 while (pos < g->num_nodes)
2090 sbitmap_difference (tmp, ones, prev_sccs);
2091 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2093 sbitmap_free (prev_sccs);
2094 sbitmap_free (on_path);
2095 sbitmap_free (tmp);
2096 sbitmap_free (ones);
2099 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2100 static struct node_order_params *
2101 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2103 int u;
2104 int max_asap;
2105 int num_nodes = g->num_nodes;
2106 ddg_edge_ptr e;
2107 /* Allocate a place to hold ordering params for each node in the DDG. */
2108 nopa node_order_params_arr;
2110 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2111 node_order_params_arr = (nopa) xcalloc (num_nodes,
2112 sizeof (struct node_order_params));
2114 /* Set the aux pointer of each node to point to its order_params structure. */
2115 for (u = 0; u < num_nodes; u++)
2116 g->nodes[u].aux.info = &node_order_params_arr[u];
2118 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2119 calculate ASAP, ALAP, mobility, distance, and height for each node
2120 in the dependence (direct acyclic) graph. */
2122 /* We assume that the nodes in the array are in topological order. */
2124 max_asap = 0;
2125 for (u = 0; u < num_nodes; u++)
2127 ddg_node_ptr u_node = &g->nodes[u];
2129 ASAP (u_node) = 0;
2130 for (e = u_node->in; e; e = e->next_in)
2131 if (e->distance == 0)
2132 ASAP (u_node) = MAX (ASAP (u_node),
2133 ASAP (e->src) + e->latency);
2134 max_asap = MAX (max_asap, ASAP (u_node));
2137 for (u = num_nodes - 1; u > -1; u--)
2139 ddg_node_ptr u_node = &g->nodes[u];
2141 ALAP (u_node) = max_asap;
2142 HEIGHT (u_node) = 0;
2143 for (e = u_node->out; e; e = e->next_out)
2144 if (e->distance == 0)
2146 ALAP (u_node) = MIN (ALAP (u_node),
2147 ALAP (e->dest) - e->latency);
2148 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2149 HEIGHT (e->dest) + e->latency);
2152 if (dump_file)
2154 fprintf (dump_file, "\nOrder params\n");
2155 for (u = 0; u < num_nodes; u++)
2157 ddg_node_ptr u_node = &g->nodes[u];
2159 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2160 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2164 *pmax_asap = max_asap;
2165 return node_order_params_arr;
2168 static int
2169 find_max_asap (ddg_ptr g, sbitmap nodes)
2171 unsigned int u = 0;
2172 int max_asap = -1;
2173 int result = -1;
2174 sbitmap_iterator sbi;
2176 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2178 ddg_node_ptr u_node = &g->nodes[u];
2180 if (max_asap < ASAP (u_node))
2182 max_asap = ASAP (u_node);
2183 result = u;
2186 return result;
2189 static int
2190 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2192 unsigned int u = 0;
2193 int max_hv = -1;
2194 int min_mob = INT_MAX;
2195 int result = -1;
2196 sbitmap_iterator sbi;
2198 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2200 ddg_node_ptr u_node = &g->nodes[u];
2202 if (max_hv < HEIGHT (u_node))
2204 max_hv = HEIGHT (u_node);
2205 min_mob = MOB (u_node);
2206 result = u;
2208 else if ((max_hv == HEIGHT (u_node))
2209 && (min_mob > MOB (u_node)))
2211 min_mob = MOB (u_node);
2212 result = u;
2215 return result;
2218 static int
2219 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2221 unsigned int u = 0;
2222 int max_dv = -1;
2223 int min_mob = INT_MAX;
2224 int result = -1;
2225 sbitmap_iterator sbi;
2227 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2229 ddg_node_ptr u_node = &g->nodes[u];
2231 if (max_dv < DEPTH (u_node))
2233 max_dv = DEPTH (u_node);
2234 min_mob = MOB (u_node);
2235 result = u;
2237 else if ((max_dv == DEPTH (u_node))
2238 && (min_mob > MOB (u_node)))
2240 min_mob = MOB (u_node);
2241 result = u;
2244 return result;
2247 /* Places the nodes of SCC into the NODE_ORDER array starting
2248 at position POS, according to the SMS ordering algorithm.
2249 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2250 the NODE_ORDER array, starting from position zero. */
2251 static int
2252 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2253 int * node_order, int pos)
2255 enum sms_direction dir;
2256 int num_nodes = g->num_nodes;
2257 sbitmap workset = sbitmap_alloc (num_nodes);
2258 sbitmap tmp = sbitmap_alloc (num_nodes);
2259 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2260 sbitmap predecessors = sbitmap_alloc (num_nodes);
2261 sbitmap successors = sbitmap_alloc (num_nodes);
2263 sbitmap_zero (predecessors);
2264 find_predecessors (predecessors, g, nodes_ordered);
2266 sbitmap_zero (successors);
2267 find_successors (successors, g, nodes_ordered);
2269 sbitmap_zero (tmp);
2270 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2272 sbitmap_copy (workset, tmp);
2273 dir = BOTTOMUP;
2275 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2277 sbitmap_copy (workset, tmp);
2278 dir = TOPDOWN;
2280 else
2282 int u;
2284 sbitmap_zero (workset);
2285 if ((u = find_max_asap (g, scc)) >= 0)
2286 SET_BIT (workset, u);
2287 dir = BOTTOMUP;
2290 sbitmap_zero (zero_bitmap);
2291 while (!sbitmap_equal (workset, zero_bitmap))
2293 int v;
2294 ddg_node_ptr v_node;
2295 sbitmap v_node_preds;
2296 sbitmap v_node_succs;
2298 if (dir == TOPDOWN)
2300 while (!sbitmap_equal (workset, zero_bitmap))
2302 v = find_max_hv_min_mob (g, workset);
2303 v_node = &g->nodes[v];
2304 node_order[pos++] = v;
2305 v_node_succs = NODE_SUCCESSORS (v_node);
2306 sbitmap_a_and_b (tmp, v_node_succs, scc);
2308 /* Don't consider the already ordered successors again. */
2309 sbitmap_difference (tmp, tmp, nodes_ordered);
2310 sbitmap_a_or_b (workset, workset, tmp);
2311 RESET_BIT (workset, v);
2312 SET_BIT (nodes_ordered, v);
2314 dir = BOTTOMUP;
2315 sbitmap_zero (predecessors);
2316 find_predecessors (predecessors, g, nodes_ordered);
2317 sbitmap_a_and_b (workset, predecessors, scc);
2319 else
2321 while (!sbitmap_equal (workset, zero_bitmap))
2323 v = find_max_dv_min_mob (g, workset);
2324 v_node = &g->nodes[v];
2325 node_order[pos++] = v;
2326 v_node_preds = NODE_PREDECESSORS (v_node);
2327 sbitmap_a_and_b (tmp, v_node_preds, scc);
2329 /* Don't consider the already ordered predecessors again. */
2330 sbitmap_difference (tmp, tmp, nodes_ordered);
2331 sbitmap_a_or_b (workset, workset, tmp);
2332 RESET_BIT (workset, v);
2333 SET_BIT (nodes_ordered, v);
2335 dir = TOPDOWN;
2336 sbitmap_zero (successors);
2337 find_successors (successors, g, nodes_ordered);
2338 sbitmap_a_and_b (workset, successors, scc);
2341 sbitmap_free (tmp);
2342 sbitmap_free (workset);
2343 sbitmap_free (zero_bitmap);
2344 sbitmap_free (predecessors);
2345 sbitmap_free (successors);
2346 return pos;
2350 /* This page contains functions for manipulating partial-schedules during
2351 modulo scheduling. */
2353 /* Create a partial schedule and allocate a memory to hold II rows. */
2355 static partial_schedule_ptr
2356 create_partial_schedule (int ii, ddg_ptr g, int history)
2358 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2359 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2360 ps->ii = ii;
2361 ps->history = history;
2362 ps->min_cycle = INT_MAX;
2363 ps->max_cycle = INT_MIN;
2364 ps->g = g;
2366 return ps;
2369 /* Free the PS_INSNs in rows array of the given partial schedule.
2370 ??? Consider caching the PS_INSN's. */
2371 static void
2372 free_ps_insns (partial_schedule_ptr ps)
2374 int i;
2376 for (i = 0; i < ps->ii; i++)
2378 while (ps->rows[i])
2380 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2382 free (ps->rows[i]);
2383 ps->rows[i] = ps_insn;
2385 ps->rows[i] = NULL;
2389 /* Free all the memory allocated to the partial schedule. */
2391 static void
2392 free_partial_schedule (partial_schedule_ptr ps)
2394 if (!ps)
2395 return;
2396 free_ps_insns (ps);
2397 free (ps->rows);
2398 free (ps);
2401 /* Clear the rows array with its PS_INSNs, and create a new one with
2402 NEW_II rows. */
2404 static void
2405 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2407 if (!ps)
2408 return;
2409 free_ps_insns (ps);
2410 if (new_ii == ps->ii)
2411 return;
2412 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2413 * sizeof (ps_insn_ptr));
2414 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2415 ps->ii = new_ii;
2416 ps->min_cycle = INT_MAX;
2417 ps->max_cycle = INT_MIN;
2420 /* Prints the partial schedule as an ii rows array, for each rows
2421 print the ids of the insns in it. */
2422 void
2423 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2425 int i;
2427 for (i = 0; i < ps->ii; i++)
2429 ps_insn_ptr ps_i = ps->rows[i];
2431 fprintf (dump, "\n[CYCLE %d ]: ", i);
2432 while (ps_i)
2434 fprintf (dump, "%d, ",
2435 INSN_UID (ps_i->node->insn));
2436 ps_i = ps_i->next_in_row;
2441 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2442 static ps_insn_ptr
2443 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2445 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2447 ps_i->node = node;
2448 ps_i->next_in_row = NULL;
2449 ps_i->prev_in_row = NULL;
2450 ps_i->row_rest_count = rest_count;
2451 ps_i->cycle = cycle;
2453 return ps_i;
2457 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2458 node is not found in the partial schedule, else returns true. */
2459 static bool
2460 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2462 int row;
2464 if (!ps || !ps_i)
2465 return false;
2467 row = SMODULO (ps_i->cycle, ps->ii);
2468 if (! ps_i->prev_in_row)
2470 if (ps_i != ps->rows[row])
2471 return false;
2473 ps->rows[row] = ps_i->next_in_row;
2474 if (ps->rows[row])
2475 ps->rows[row]->prev_in_row = NULL;
2477 else
2479 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2480 if (ps_i->next_in_row)
2481 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2483 free (ps_i);
2484 return true;
2487 /* Unlike what literature describes for modulo scheduling (which focuses
2488 on VLIW machines) the order of the instructions inside a cycle is
2489 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2490 where the current instruction should go relative to the already
2491 scheduled instructions in the given cycle. Go over these
2492 instructions and find the first possible column to put it in. */
2493 static bool
2494 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2495 sbitmap must_precede, sbitmap must_follow)
2497 ps_insn_ptr next_ps_i;
2498 ps_insn_ptr first_must_follow = NULL;
2499 ps_insn_ptr last_must_precede = NULL;
2500 int row;
2502 if (! ps_i)
2503 return false;
2505 row = SMODULO (ps_i->cycle, ps->ii);
2507 /* Find the first must follow and the last must precede
2508 and insert the node immediately after the must precede
2509 but make sure that it there is no must follow after it. */
2510 for (next_ps_i = ps->rows[row];
2511 next_ps_i;
2512 next_ps_i = next_ps_i->next_in_row)
2514 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2515 && ! first_must_follow)
2516 first_must_follow = next_ps_i;
2517 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2519 /* If we have already met a node that must follow, then
2520 there is no possible column. */
2521 if (first_must_follow)
2522 return false;
2523 else
2524 last_must_precede = next_ps_i;
2528 /* Now insert the node after INSERT_AFTER_PSI. */
2530 if (! last_must_precede)
2532 ps_i->next_in_row = ps->rows[row];
2533 ps_i->prev_in_row = NULL;
2534 if (ps_i->next_in_row)
2535 ps_i->next_in_row->prev_in_row = ps_i;
2536 ps->rows[row] = ps_i;
2538 else
2540 ps_i->next_in_row = last_must_precede->next_in_row;
2541 last_must_precede->next_in_row = ps_i;
2542 ps_i->prev_in_row = last_must_precede;
2543 if (ps_i->next_in_row)
2544 ps_i->next_in_row->prev_in_row = ps_i;
2547 return true;
2550 /* Advances the PS_INSN one column in its current row; returns false
2551 in failure and true in success. Bit N is set in MUST_FOLLOW if
2552 the node with cuid N must be come after the node pointed to by
2553 PS_I when scheduled in the same cycle. */
2554 static int
2555 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2556 sbitmap must_follow)
2558 ps_insn_ptr prev, next;
2559 int row;
2560 ddg_node_ptr next_node;
2562 if (!ps || !ps_i)
2563 return false;
2565 row = SMODULO (ps_i->cycle, ps->ii);
2567 if (! ps_i->next_in_row)
2568 return false;
2570 next_node = ps_i->next_in_row->node;
2572 /* Check if next_in_row is dependent on ps_i, both having same sched
2573 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2574 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2575 return false;
2577 /* Advance PS_I over its next_in_row in the doubly linked list. */
2578 prev = ps_i->prev_in_row;
2579 next = ps_i->next_in_row;
2581 if (ps_i == ps->rows[row])
2582 ps->rows[row] = next;
2584 ps_i->next_in_row = next->next_in_row;
2586 if (next->next_in_row)
2587 next->next_in_row->prev_in_row = ps_i;
2589 next->next_in_row = ps_i;
2590 ps_i->prev_in_row = next;
2592 next->prev_in_row = prev;
2593 if (prev)
2594 prev->next_in_row = next;
2596 return true;
2599 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2600 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2601 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2602 before/after (respectively) the node pointed to by PS_I when scheduled
2603 in the same cycle. */
2604 static ps_insn_ptr
2605 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2606 sbitmap must_precede, sbitmap must_follow)
2608 ps_insn_ptr ps_i;
2609 int rest_count = 1;
2610 int row = SMODULO (cycle, ps->ii);
2612 if (ps->rows[row]
2613 && ps->rows[row]->row_rest_count >= issue_rate)
2614 return NULL;
2616 if (ps->rows[row])
2617 rest_count += ps->rows[row]->row_rest_count;
2619 ps_i = create_ps_insn (node, rest_count, cycle);
2621 /* Finds and inserts PS_I according to MUST_FOLLOW and
2622 MUST_PRECEDE. */
2623 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2625 free (ps_i);
2626 return NULL;
2629 return ps_i;
2632 /* Advance time one cycle. Assumes DFA is being used. */
2633 static void
2634 advance_one_cycle (void)
2636 if (targetm.sched.dfa_pre_cycle_insn)
2637 state_transition (curr_state,
2638 targetm.sched.dfa_pre_cycle_insn ());
2640 state_transition (curr_state, NULL);
2642 if (targetm.sched.dfa_post_cycle_insn)
2643 state_transition (curr_state,
2644 targetm.sched.dfa_post_cycle_insn ());
2649 /* Checks if PS has resource conflicts according to DFA, starting from
2650 FROM cycle to TO cycle; returns true if there are conflicts and false
2651 if there are no conflicts. Assumes DFA is being used. */
2652 static int
2653 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2655 int cycle;
2657 state_reset (curr_state);
2659 for (cycle = from; cycle <= to; cycle++)
2661 ps_insn_ptr crr_insn;
2662 /* Holds the remaining issue slots in the current row. */
2663 int can_issue_more = issue_rate;
2665 /* Walk through the DFA for the current row. */
2666 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2667 crr_insn;
2668 crr_insn = crr_insn->next_in_row)
2670 rtx insn = crr_insn->node->insn;
2672 if (!INSN_P (insn))
2673 continue;
2675 /* Check if there is room for the current insn. */
2676 if (!can_issue_more || state_dead_lock_p (curr_state))
2677 return true;
2679 /* Update the DFA state and return with failure if the DFA found
2680 recource conflicts. */
2681 if (state_transition (curr_state, insn) >= 0)
2682 return true;
2684 if (targetm.sched.variable_issue)
2685 can_issue_more =
2686 targetm.sched.variable_issue (sched_dump, sched_verbose,
2687 insn, can_issue_more);
2688 /* A naked CLOBBER or USE generates no instruction, so don't
2689 let them consume issue slots. */
2690 else if (GET_CODE (PATTERN (insn)) != USE
2691 && GET_CODE (PATTERN (insn)) != CLOBBER)
2692 can_issue_more--;
2695 /* Advance the DFA to the next cycle. */
2696 advance_one_cycle ();
2698 return false;
2701 /* Checks if the given node causes resource conflicts when added to PS at
2702 cycle C. If not the node is added to PS and returned; otherwise zero
2703 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2704 cuid N must be come before/after (respectively) the node pointed to by
2705 PS_I when scheduled in the same cycle. */
2706 ps_insn_ptr
2707 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2708 int c, sbitmap must_precede,
2709 sbitmap must_follow)
2711 int has_conflicts = 0;
2712 ps_insn_ptr ps_i;
2714 /* First add the node to the PS, if this succeeds check for
2715 conflicts, trying different issue slots in the same row. */
2716 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2717 return NULL; /* Failed to insert the node at the given cycle. */
2719 has_conflicts = ps_has_conflicts (ps, c, c)
2720 || (ps->history > 0
2721 && ps_has_conflicts (ps,
2722 c - ps->history,
2723 c + ps->history));
2725 /* Try different issue slots to find one that the given node can be
2726 scheduled in without conflicts. */
2727 while (has_conflicts)
2729 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2730 break;
2731 has_conflicts = ps_has_conflicts (ps, c, c)
2732 || (ps->history > 0
2733 && ps_has_conflicts (ps,
2734 c - ps->history,
2735 c + ps->history));
2738 if (has_conflicts)
2740 remove_node_from_ps (ps, ps_i);
2741 return NULL;
2744 ps->min_cycle = MIN (ps->min_cycle, c);
2745 ps->max_cycle = MAX (ps->max_cycle, c);
2746 return ps_i;
2749 /* Rotate the rows of PS such that insns scheduled at time
2750 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2751 void
2752 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2754 int i, row, backward_rotates;
2755 int last_row = ps->ii - 1;
2757 if (start_cycle == 0)
2758 return;
2760 backward_rotates = SMODULO (start_cycle, ps->ii);
2762 /* Revisit later and optimize this into a single loop. */
2763 for (i = 0; i < backward_rotates; i++)
2765 ps_insn_ptr first_row = ps->rows[0];
2767 for (row = 0; row < last_row; row++)
2768 ps->rows[row] = ps->rows[row+1];
2770 ps->rows[last_row] = first_row;
2773 ps->max_cycle -= start_cycle;
2774 ps->min_cycle -= start_cycle;
2777 #endif /* INSN_SCHEDULING */
2779 static bool
2780 gate_handle_sms (void)
2782 return (optimize > 0 && flag_modulo_sched);
2786 /* Run instruction scheduler. */
2787 /* Perform SMS module scheduling. */
2788 static unsigned int
2789 rest_of_handle_sms (void)
2791 #ifdef INSN_SCHEDULING
2792 basic_block bb;
2794 /* Collect loop information to be used in SMS. */
2795 cfg_layout_initialize (0);
2796 sms_schedule ();
2798 /* Update the life information, because we add pseudos. */
2799 max_regno = max_reg_num ();
2801 /* Finalize layout changes. */
2802 FOR_EACH_BB (bb)
2803 if (bb->next_bb != EXIT_BLOCK_PTR)
2804 bb->aux = bb->next_bb;
2805 free_dominance_info (CDI_DOMINATORS);
2806 cfg_layout_finalize ();
2807 #endif /* INSN_SCHEDULING */
2808 return 0;
2811 struct tree_opt_pass pass_sms =
2813 "sms", /* name */
2814 gate_handle_sms, /* gate */
2815 rest_of_handle_sms, /* execute */
2816 NULL, /* sub */
2817 NULL, /* next */
2818 0, /* static_pass_number */
2819 TV_SMS, /* tv_id */
2820 0, /* properties_required */
2821 0, /* properties_provided */
2822 0, /* properties_destroyed */
2823 TODO_dump_func, /* todo_flags_start */
2824 TODO_df_finish | TODO_verify_rtl_sharing |
2825 TODO_dump_func |
2826 TODO_ggc_collect, /* todo_flags_finish */
2827 'm' /* letter */