gcc/fortran/:
[official-gcc.git] / gcc / modulo-sched.c
blobebf76492e2b2bb74e2e70fe5021d953de6543fba
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
51 #include "df.h"
53 #ifdef INSN_SCHEDULING
55 /* This file contains the implementation of the Swing Modulo Scheduler,
56 described in the following references:
57 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58 Lifetime--sensitive modulo scheduling in a production environment.
59 IEEE Trans. on Comps., 50(3), March 2001
60 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
64 The basic structure is:
65 1. Build a data-dependence graph (DDG) for each loop.
66 2. Use the DDG to order the insns of a loop (not in topological order
67 necessarily, but rather) trying to place each insn after all its
68 predecessors _or_ after all its successors.
69 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70 4. Use the ordering to perform list-scheduling of the loop:
71 1. Set II = MII. We will try to schedule the loop within II cycles.
72 2. Try to schedule the insns one by one according to the ordering.
73 For each insn compute an interval of cycles by considering already-
74 scheduled preds and succs (and associated latencies); try to place
75 the insn in the cycles of this window checking for potential
76 resource conflicts (using the DFA interface).
77 Note: this is different from the cycle-scheduling of schedule_insns;
78 here the insns are not scheduled monotonically top-down (nor bottom-
79 up).
80 3. If failed in scheduling all insns - bump II++ and try again, unless
81 II reaches an upper bound MaxII, in which case report failure.
82 5. If we succeeded in scheduling the loop within II cycles, we now
83 generate prolog and epilog, decrease the counter of the loop, and
84 perform modulo variable expansion for live ranges that span more than
85 II cycles (i.e. use register copies to prevent a def from overwriting
86 itself before reaching the use).
88 SMS works with countable loops (1) whose control part can be easily
89 decoupled from the rest of the loop and (2) whose loop count can
90 be easily adjusted. This is because we peel a constant number of
91 iterations into a prologue and epilogue for which we want to avoid
92 emitting the control part, and a kernel which is to iterate that
93 constant number of iterations less than the original loop. So the
94 control part should be a set of insns clearly identified and having
95 its own iv, not otherwise used in the loop (at-least for now), which
96 initializes a register before the loop to the number of iterations.
97 Currently SMS relies on the do-loop pattern to recognize such loops,
98 where (1) the control part comprises of all insns defining and/or
99 using a certain 'count' register and (2) the loop count can be
100 adjusted by modifying this register prior to the loop.
101 TODO: Rely on cfgloop analysis instead. */
103 /* This page defines partial-schedule structures and functions for
104 modulo scheduling. */
106 typedef struct partial_schedule *partial_schedule_ptr;
107 typedef struct ps_insn *ps_insn_ptr;
109 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
110 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
112 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
113 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
115 /* Perform signed modulo, always returning a non-negative value. */
116 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
118 /* The number of different iterations the nodes in ps span, assuming
119 the stage boundaries are placed efficiently. */
120 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
121 + 1 + (ps)->ii - 1) / (ps)->ii)
123 /* A single instruction in the partial schedule. */
124 struct ps_insn
126 /* The corresponding DDG_NODE. */
127 ddg_node_ptr node;
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
131 int cycle;
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
135 prev_in_row;
137 /* The number of nodes in the same row that come after this node. */
138 int row_rest_count;
141 /* Holds the partial schedule as an array of II rows. Each entry of the
142 array points to a linked list of PS_INSNs, which represents the
143 instructions that are scheduled for that row. */
144 struct partial_schedule
146 int ii; /* Number of rows in the partial schedule. */
147 int history; /* Threshold for conflict checking using DFA. */
149 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
150 ps_insn_ptr *rows;
152 /* The earliest absolute cycle of an insn in the partial schedule. */
153 int min_cycle;
155 /* The latest absolute cycle of an insn in the partial schedule. */
156 int max_cycle;
158 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
161 /* We use this to record all the register replacements we do in
162 the kernel so we can undo SMS if it is not profitable. */
163 struct undo_replace_buff_elem
165 rtx insn;
166 rtx orig_reg;
167 rtx new_reg;
168 struct undo_replace_buff_elem *next;
173 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
174 static void free_partial_schedule (partial_schedule_ptr);
175 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
176 void print_partial_schedule (partial_schedule_ptr, FILE *);
177 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
178 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
179 ddg_node_ptr node, int cycle,
180 sbitmap must_precede,
181 sbitmap must_follow);
182 static void rotate_partial_schedule (partial_schedule_ptr, int);
183 void set_row_column_for_ps (partial_schedule_ptr);
184 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
185 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
188 /* This page defines constants and structures for the modulo scheduling
189 driver. */
191 static int sms_order_nodes (ddg_ptr, int, int *, int *);
192 static void set_node_sched_params (ddg_ptr);
193 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
194 static void permute_partial_schedule (partial_schedule_ptr, rtx);
195 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
196 rtx, rtx);
197 static void duplicate_insns_of_cycles (partial_schedule_ptr,
198 int, int, int, rtx);
200 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
201 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
202 #define SCHED_FIRST_REG_MOVE(x) \
203 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
204 #define SCHED_NREG_MOVES(x) \
205 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
206 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
207 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
208 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
210 /* The scheduling parameters held for each node. */
211 typedef struct node_sched_params
213 int asap; /* A lower-bound on the absolute scheduling cycle. */
214 int time; /* The absolute scheduling cycle (time >= asap). */
216 /* The following field (first_reg_move) is a pointer to the first
217 register-move instruction added to handle the modulo-variable-expansion
218 of the register defined by this node. This register-move copies the
219 original register defined by the node. */
220 rtx first_reg_move;
222 /* The number of register-move instructions added, immediately preceding
223 first_reg_move. */
224 int nreg_moves;
226 int row; /* Holds time % ii. */
227 int stage; /* Holds time / ii. */
229 /* The column of a node inside the ps. If nodes u, v are on the same row,
230 u will precede v if column (u) < column (v). */
231 int column;
232 } *node_sched_params_ptr;
235 /* The following three functions are copied from the current scheduler
236 code in order to use sched_analyze() for computing the dependencies.
237 They are used when initializing the sched_info structure. */
238 static const char *
239 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
241 static char tmp[80];
243 sprintf (tmp, "i%4d", INSN_UID (insn));
244 return tmp;
247 static void
248 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
249 regset cond_exec ATTRIBUTE_UNUSED,
250 regset used ATTRIBUTE_UNUSED,
251 regset set ATTRIBUTE_UNUSED)
255 static struct common_sched_info_def sms_common_sched_info;
257 static struct sched_deps_info_def sms_sched_deps_info =
259 compute_jump_reg_dependencies,
260 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
261 NULL,
262 0, 0, 0
265 static struct haifa_sched_info sms_sched_info =
267 NULL,
268 NULL,
269 NULL,
270 NULL,
271 NULL,
272 sms_print_insn,
273 NULL,
274 NULL, /* insn_finishes_block_p */
275 NULL, NULL,
276 NULL, NULL,
277 0, 0,
279 NULL, NULL, NULL,
283 /* Given HEAD and TAIL which are the first and last insns in a loop;
284 return the register which controls the loop. Return zero if it has
285 more than one occurrence in the loop besides the control part or the
286 do-loop pattern is not of the form we expect. */
287 static rtx
288 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
290 #ifdef HAVE_doloop_end
291 rtx reg, condition, insn, first_insn_not_to_check;
293 if (!JUMP_P (tail))
294 return NULL_RTX;
296 /* TODO: Free SMS's dependence on doloop_condition_get. */
297 condition = doloop_condition_get (tail);
298 if (! condition)
299 return NULL_RTX;
301 if (REG_P (XEXP (condition, 0)))
302 reg = XEXP (condition, 0);
303 else if (GET_CODE (XEXP (condition, 0)) == PLUS
304 && REG_P (XEXP (XEXP (condition, 0), 0)))
305 reg = XEXP (XEXP (condition, 0), 0);
306 else
307 gcc_unreachable ();
309 /* Check that the COUNT_REG has no other occurrences in the loop
310 until the decrement. We assume the control part consists of
311 either a single (parallel) branch-on-count or a (non-parallel)
312 branch immediately preceded by a single (decrement) insn. */
313 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
314 : PREV_INSN (tail));
316 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
317 if (reg_mentioned_p (reg, insn))
319 if (dump_file)
321 fprintf (dump_file, "SMS count_reg found ");
322 print_rtl_single (dump_file, reg);
323 fprintf (dump_file, " outside control in insn:\n");
324 print_rtl_single (dump_file, insn);
327 return NULL_RTX;
330 return reg;
331 #else
332 return NULL_RTX;
333 #endif
336 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
337 that the number of iterations is a compile-time constant. If so,
338 return the rtx that sets COUNT_REG to a constant, and set COUNT to
339 this constant. Otherwise return 0. */
340 static rtx
341 const_iteration_count (rtx count_reg, basic_block pre_header,
342 HOST_WIDEST_INT * count)
344 rtx insn;
345 rtx head, tail;
347 if (! pre_header)
348 return NULL_RTX;
350 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
352 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
353 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
354 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
356 rtx pat = single_set (insn);
358 if (CONST_INT_P (SET_SRC (pat)))
360 *count = INTVAL (SET_SRC (pat));
361 return insn;
364 return NULL_RTX;
367 return NULL_RTX;
370 /* A very simple resource-based lower bound on the initiation interval.
371 ??? Improve the accuracy of this bound by considering the
372 utilization of various units. */
373 static int
374 res_MII (ddg_ptr g)
376 if (targetm.sched.sms_res_mii)
377 return targetm.sched.sms_res_mii (g);
379 return ((g->num_nodes - g->num_debug) / issue_rate);
383 /* Points to the array that contains the sched data for each node. */
384 static node_sched_params_ptr node_sched_params;
386 /* Allocate sched_params for each node and initialize it. Assumes that
387 the aux field of each node contain the asap bound (computed earlier),
388 and copies it into the sched_params field. */
389 static void
390 set_node_sched_params (ddg_ptr g)
392 int i;
394 /* Allocate for each node in the DDG a place to hold the "sched_data". */
395 /* Initialize ASAP/ALAP/HIGHT to zero. */
396 node_sched_params = (node_sched_params_ptr)
397 xcalloc (g->num_nodes,
398 sizeof (struct node_sched_params));
400 /* Set the pointer of the general data of the node to point to the
401 appropriate sched_params structure. */
402 for (i = 0; i < g->num_nodes; i++)
404 /* Watch out for aliasing problems? */
405 node_sched_params[i].asap = g->nodes[i].aux.count;
406 g->nodes[i].aux.info = &node_sched_params[i];
410 static void
411 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
413 int i;
415 if (! file)
416 return;
417 for (i = 0; i < num_nodes; i++)
419 node_sched_params_ptr nsp = &node_sched_params[i];
420 rtx reg_move = nsp->first_reg_move;
421 int j;
423 fprintf (file, "Node = %d; INSN = %d\n", i,
424 (INSN_UID (g->nodes[i].insn)));
425 fprintf (file, " asap = %d:\n", nsp->asap);
426 fprintf (file, " time = %d:\n", nsp->time);
427 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
428 for (j = 0; j < nsp->nreg_moves; j++)
430 fprintf (file, " reg_move = ");
431 print_rtl_single (file, reg_move);
432 reg_move = PREV_INSN (reg_move);
438 Breaking intra-loop register anti-dependences:
439 Each intra-loop register anti-dependence implies a cross-iteration true
440 dependence of distance 1. Therefore, we can remove such false dependencies
441 and figure out if the partial schedule broke them by checking if (for a
442 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
443 if so generate a register move. The number of such moves is equal to:
444 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
445 nreg_moves = ----------------------------------- + 1 - { dependence.
446 ii { 1 if not.
448 static struct undo_replace_buff_elem *
449 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
451 ddg_ptr g = ps->g;
452 int ii = ps->ii;
453 int i;
454 struct undo_replace_buff_elem *reg_move_replaces = NULL;
456 for (i = 0; i < g->num_nodes; i++)
458 ddg_node_ptr u = &g->nodes[i];
459 ddg_edge_ptr e;
460 int nreg_moves = 0, i_reg_move;
461 sbitmap *uses_of_defs;
462 rtx last_reg_move;
463 rtx prev_reg, old_reg;
465 /* Compute the number of reg_moves needed for u, by looking at life
466 ranges started at u (excluding self-loops). */
467 for (e = u->out; e; e = e->next_out)
468 if (e->type == TRUE_DEP && e->dest != e->src)
470 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
472 if (e->distance == 1)
473 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
475 /* If dest precedes src in the schedule of the kernel, then dest
476 will read before src writes and we can save one reg_copy. */
477 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
478 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
479 nreg_moves4e--;
481 nreg_moves = MAX (nreg_moves, nreg_moves4e);
484 if (nreg_moves == 0)
485 continue;
487 /* Every use of the register defined by node may require a different
488 copy of this register, depending on the time the use is scheduled.
489 Set a bitmap vector, telling which nodes use each copy of this
490 register. */
491 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
492 sbitmap_vector_zero (uses_of_defs, nreg_moves);
493 for (e = u->out; e; e = e->next_out)
494 if (e->type == TRUE_DEP && e->dest != e->src)
496 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
498 if (e->distance == 1)
499 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
501 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
502 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
503 dest_copy--;
505 if (dest_copy)
506 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
509 /* Now generate the reg_moves, attaching relevant uses to them. */
510 SCHED_NREG_MOVES (u) = nreg_moves;
511 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
512 /* Insert the reg-moves right before the notes which precede
513 the insn they relates to. */
514 last_reg_move = u->first_note;
516 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
518 unsigned int i_use = 0;
519 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
520 rtx reg_move = gen_move_insn (new_reg, prev_reg);
521 sbitmap_iterator sbi;
523 add_insn_before (reg_move, last_reg_move, NULL);
524 last_reg_move = reg_move;
526 if (!SCHED_FIRST_REG_MOVE (u))
527 SCHED_FIRST_REG_MOVE (u) = reg_move;
529 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
531 struct undo_replace_buff_elem *rep;
533 rep = (struct undo_replace_buff_elem *)
534 xcalloc (1, sizeof (struct undo_replace_buff_elem));
535 rep->insn = g->nodes[i_use].insn;
536 rep->orig_reg = old_reg;
537 rep->new_reg = new_reg;
539 if (! reg_move_replaces)
540 reg_move_replaces = rep;
541 else
543 rep->next = reg_move_replaces;
544 reg_move_replaces = rep;
547 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
548 if (rescan)
549 df_insn_rescan (g->nodes[i_use].insn);
552 prev_reg = new_reg;
554 sbitmap_vector_free (uses_of_defs);
556 return reg_move_replaces;
559 /* Free memory allocated for the undo buffer. */
560 static void
561 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
564 while (reg_move_replaces)
566 struct undo_replace_buff_elem *rep = reg_move_replaces;
568 reg_move_replaces = reg_move_replaces->next;
569 free (rep);
573 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
574 of SCHED_ROW and SCHED_STAGE. */
575 static void
576 normalize_sched_times (partial_schedule_ptr ps)
578 int row;
579 int amount = PS_MIN_CYCLE (ps);
580 int ii = ps->ii;
581 ps_insn_ptr crr_insn;
583 for (row = 0; row < ii; row++)
584 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
586 ddg_node_ptr u = crr_insn->node;
587 int normalized_time = SCHED_TIME (u) - amount;
589 if (dump_file)
590 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
591 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
592 (u), ps->min_cycle);
593 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
594 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
595 SCHED_TIME (u) = normalized_time;
596 SCHED_ROW (u) = normalized_time % ii;
597 SCHED_STAGE (u) = normalized_time / ii;
601 /* Set SCHED_COLUMN of each node according to its position in PS. */
602 static void
603 set_columns_for_ps (partial_schedule_ptr ps)
605 int row;
607 for (row = 0; row < ps->ii; row++)
609 ps_insn_ptr cur_insn = ps->rows[row];
610 int column = 0;
612 for (; cur_insn; cur_insn = cur_insn->next_in_row)
613 SCHED_COLUMN (cur_insn->node) = column++;
617 /* Permute the insns according to their order in PS, from row 0 to
618 row ii-1, and position them right before LAST. This schedules
619 the insns of the loop kernel. */
620 static void
621 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
623 int ii = ps->ii;
624 int row;
625 ps_insn_ptr ps_ij;
627 for (row = 0; row < ii ; row++)
628 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
629 if (PREV_INSN (last) != ps_ij->node->insn)
630 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
631 PREV_INSN (last));
634 static void
635 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
636 int to_stage, int for_prolog, rtx count_reg)
638 int row;
639 ps_insn_ptr ps_ij;
641 for (row = 0; row < ps->ii; row++)
642 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
644 ddg_node_ptr u_node = ps_ij->node;
645 int j, i_reg_moves;
646 rtx reg_move = NULL_RTX;
648 /* Do not duplicate any insn which refers to count_reg as it
649 belongs to the control part.
650 TODO: This should be done by analyzing the control part of
651 the loop. */
652 if (reg_mentioned_p (count_reg, u_node->insn))
653 continue;
655 if (for_prolog)
657 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
658 number of reg_moves starting with the second occurrence of
659 u_node, which is generated if its SCHED_STAGE <= to_stage. */
660 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
661 i_reg_moves = MAX (i_reg_moves, 0);
662 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
664 /* The reg_moves start from the *first* reg_move backwards. */
665 if (i_reg_moves)
667 reg_move = SCHED_FIRST_REG_MOVE (u_node);
668 for (j = 1; j < i_reg_moves; j++)
669 reg_move = PREV_INSN (reg_move);
672 else /* It's for the epilog. */
674 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
675 starting to decrease one stage after u_node no longer occurs;
676 that is, generate all reg_moves until
677 SCHED_STAGE (u_node) == from_stage - 1. */
678 i_reg_moves = SCHED_NREG_MOVES (u_node)
679 - (from_stage - SCHED_STAGE (u_node) - 1);
680 i_reg_moves = MAX (i_reg_moves, 0);
681 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
683 /* The reg_moves start from the *last* reg_move forwards. */
684 if (i_reg_moves)
686 reg_move = SCHED_FIRST_REG_MOVE (u_node);
687 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
688 reg_move = PREV_INSN (reg_move);
692 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
693 emit_insn (copy_rtx (PATTERN (reg_move)));
694 if (SCHED_STAGE (u_node) >= from_stage
695 && SCHED_STAGE (u_node) <= to_stage)
696 duplicate_insn_chain (u_node->first_note, u_node->insn);
701 /* Generate the instructions (including reg_moves) for prolog & epilog. */
702 static void
703 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
704 rtx count_reg, rtx count_init)
706 int i;
707 int last_stage = PS_STAGE_COUNT (ps) - 1;
708 edge e;
710 /* Generate the prolog, inserting its insns on the loop-entry edge. */
711 start_sequence ();
713 if (!count_init)
715 /* Generate instructions at the beginning of the prolog to
716 adjust the loop count by STAGE_COUNT. If loop count is constant
717 (count_init), this constant is adjusted by STAGE_COUNT in
718 generate_prolog_epilog function. */
719 rtx sub_reg = NULL_RTX;
721 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
722 count_reg, GEN_INT (last_stage),
723 count_reg, 1, OPTAB_DIRECT);
724 gcc_assert (REG_P (sub_reg));
725 if (REGNO (sub_reg) != REGNO (count_reg))
726 emit_move_insn (count_reg, sub_reg);
729 for (i = 0; i < last_stage; i++)
730 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
732 /* Put the prolog on the entry edge. */
733 e = loop_preheader_edge (loop);
734 split_edge_and_insert (e, get_insns ());
736 end_sequence ();
738 /* Generate the epilog, inserting its insns on the loop-exit edge. */
739 start_sequence ();
741 for (i = 0; i < last_stage; i++)
742 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
744 /* Put the epilogue on the exit edge. */
745 gcc_assert (single_exit (loop));
746 e = single_exit (loop);
747 split_edge_and_insert (e, get_insns ());
748 end_sequence ();
751 /* Return true if all the BBs of the loop are empty except the
752 loop header. */
753 static bool
754 loop_single_full_bb_p (struct loop *loop)
756 unsigned i;
757 basic_block *bbs = get_loop_body (loop);
759 for (i = 0; i < loop->num_nodes ; i++)
761 rtx head, tail;
762 bool empty_bb = true;
764 if (bbs[i] == loop->header)
765 continue;
767 /* Make sure that basic blocks other than the header
768 have only notes labels or jumps. */
769 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
770 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
772 if (NOTE_P (head) || LABEL_P (head)
773 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
774 continue;
775 empty_bb = false;
776 break;
779 if (! empty_bb)
781 free (bbs);
782 return false;
785 free (bbs);
786 return true;
789 /* A simple loop from SMS point of view; it is a loop that is composed of
790 either a single basic block or two BBs - a header and a latch. */
791 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
792 && (EDGE_COUNT (loop->latch->preds) == 1) \
793 && (EDGE_COUNT (loop->latch->succs) == 1))
795 /* Return true if the loop is in its canonical form and false if not.
796 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
797 static bool
798 loop_canon_p (struct loop *loop)
801 if (loop->inner || !loop_outer (loop))
803 if (dump_file)
804 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
805 return false;
808 if (!single_exit (loop))
810 if (dump_file)
812 rtx insn = BB_END (loop->header);
814 fprintf (dump_file, "SMS loop many exits ");
815 fprintf (dump_file, " %s %d (file, line)\n",
816 insn_file (insn), insn_line (insn));
818 return false;
821 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
823 if (dump_file)
825 rtx insn = BB_END (loop->header);
827 fprintf (dump_file, "SMS loop many BBs. ");
828 fprintf (dump_file, " %s %d (file, line)\n",
829 insn_file (insn), insn_line (insn));
831 return false;
834 return true;
837 /* If there are more than one entry for the loop,
838 make it one by splitting the first entry edge and
839 redirecting the others to the new BB. */
840 static void
841 canon_loop (struct loop *loop)
843 edge e;
844 edge_iterator i;
846 /* Avoid annoying special cases of edges going to exit
847 block. */
848 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
849 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
850 split_edge (e);
852 if (loop->latch == loop->header
853 || EDGE_COUNT (loop->latch->succs) > 1)
855 FOR_EACH_EDGE (e, i, loop->header->preds)
856 if (e->src == loop->latch)
857 break;
858 split_edge (e);
862 /* Setup infos. */
863 static void
864 setup_sched_infos (void)
866 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
867 sizeof (sms_common_sched_info));
868 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
869 common_sched_info = &sms_common_sched_info;
871 sched_deps_info = &sms_sched_deps_info;
872 current_sched_info = &sms_sched_info;
875 /* Probability in % that the sms-ed loop rolls enough so that optimized
876 version may be entered. Just a guess. */
877 #define PROB_SMS_ENOUGH_ITERATIONS 80
879 /* Used to calculate the upper bound of ii. */
880 #define MAXII_FACTOR 2
882 /* Main entry point, perform SMS scheduling on the loops of the function
883 that consist of single basic blocks. */
884 static void
885 sms_schedule (void)
887 rtx insn;
888 ddg_ptr *g_arr, g;
889 int * node_order;
890 int maxii, max_asap;
891 loop_iterator li;
892 partial_schedule_ptr ps;
893 basic_block bb = NULL;
894 struct loop *loop;
895 basic_block condition_bb = NULL;
896 edge latch_edge;
897 gcov_type trip_count = 0;
899 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
900 | LOOPS_HAVE_RECORDED_EXITS);
901 if (number_of_loops () <= 1)
903 loop_optimizer_finalize ();
904 return; /* There are no loops to schedule. */
907 /* Initialize issue_rate. */
908 if (targetm.sched.issue_rate)
910 int temp = reload_completed;
912 reload_completed = 1;
913 issue_rate = targetm.sched.issue_rate ();
914 reload_completed = temp;
916 else
917 issue_rate = 1;
919 /* Initialize the scheduler. */
920 setup_sched_infos ();
921 haifa_sched_init ();
923 /* Allocate memory to hold the DDG array one entry for each loop.
924 We use loop->num as index into this array. */
925 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
927 if (dump_file)
929 fprintf (dump_file, "\n\nSMS analysis phase\n");
930 fprintf (dump_file, "===================\n\n");
933 /* Build DDGs for all the relevant loops and hold them in G_ARR
934 indexed by the loop index. */
935 FOR_EACH_LOOP (li, loop, 0)
937 rtx head, tail;
938 rtx count_reg;
940 /* For debugging. */
941 if (dbg_cnt (sms_sched_loop) == false)
943 if (dump_file)
944 fprintf (dump_file, "SMS reached max limit... \n");
946 break;
949 if (dump_file)
951 rtx insn = BB_END (loop->header);
953 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
954 loop->num, insn_file (insn), insn_line (insn));
958 if (! loop_canon_p (loop))
959 continue;
961 if (! loop_single_full_bb_p (loop))
963 if (dump_file)
964 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
965 continue;
968 bb = loop->header;
970 get_ebb_head_tail (bb, bb, &head, &tail);
971 latch_edge = loop_latch_edge (loop);
972 gcc_assert (single_exit (loop));
973 if (single_exit (loop)->count)
974 trip_count = latch_edge->count / single_exit (loop)->count;
976 /* Perform SMS only on loops that their average count is above threshold. */
978 if ( latch_edge->count
979 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
981 if (dump_file)
983 fprintf (dump_file, " %s %d (file, line)\n",
984 insn_file (tail), insn_line (tail));
985 fprintf (dump_file, "SMS single-bb-loop\n");
986 if (profile_info && flag_branch_probabilities)
988 fprintf (dump_file, "SMS loop-count ");
989 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
990 (HOST_WIDEST_INT) bb->count);
991 fprintf (dump_file, "\n");
992 fprintf (dump_file, "SMS trip-count ");
993 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
994 (HOST_WIDEST_INT) trip_count);
995 fprintf (dump_file, "\n");
996 fprintf (dump_file, "SMS profile-sum-max ");
997 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
998 (HOST_WIDEST_INT) profile_info->sum_max);
999 fprintf (dump_file, "\n");
1002 continue;
1005 /* Make sure this is a doloop. */
1006 if ( !(count_reg = doloop_register_get (head, tail)))
1008 if (dump_file)
1009 fprintf (dump_file, "SMS doloop_register_get failed\n");
1010 continue;
1013 /* Don't handle BBs with calls or barriers, or !single_set insns,
1014 or auto-increment insns (to avoid creating invalid reg-moves
1015 for the auto-increment insns).
1016 ??? Should handle auto-increment insns.
1017 ??? Should handle insns defining subregs. */
1018 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1020 rtx set;
1022 if (CALL_P (insn)
1023 || BARRIER_P (insn)
1024 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1025 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
1026 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1027 || (INSN_P (insn) && (set = single_set (insn))
1028 && GET_CODE (SET_DEST (set)) == SUBREG))
1029 break;
1032 if (insn != NEXT_INSN (tail))
1034 if (dump_file)
1036 if (CALL_P (insn))
1037 fprintf (dump_file, "SMS loop-with-call\n");
1038 else if (BARRIER_P (insn))
1039 fprintf (dump_file, "SMS loop-with-barrier\n");
1040 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1041 fprintf (dump_file, "SMS reg inc\n");
1042 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1043 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1044 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1045 else
1046 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1047 print_rtl_single (dump_file, insn);
1050 continue;
1053 if (! (g = create_ddg (bb, 0)))
1055 if (dump_file)
1056 fprintf (dump_file, "SMS create_ddg failed\n");
1057 continue;
1060 g_arr[loop->num] = g;
1061 if (dump_file)
1062 fprintf (dump_file, "...OK\n");
1065 if (dump_file)
1067 fprintf (dump_file, "\nSMS transformation phase\n");
1068 fprintf (dump_file, "=========================\n\n");
1071 /* We don't want to perform SMS on new loops - created by versioning. */
1072 FOR_EACH_LOOP (li, loop, 0)
1074 rtx head, tail;
1075 rtx count_reg, count_init;
1076 int mii, rec_mii;
1077 unsigned stage_count = 0;
1078 HOST_WIDEST_INT loop_count = 0;
1080 if (! (g = g_arr[loop->num]))
1081 continue;
1083 if (dump_file)
1085 rtx insn = BB_END (loop->header);
1087 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1088 loop->num, insn_file (insn), insn_line (insn));
1090 print_ddg (dump_file, g);
1093 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1095 latch_edge = loop_latch_edge (loop);
1096 gcc_assert (single_exit (loop));
1097 if (single_exit (loop)->count)
1098 trip_count = latch_edge->count / single_exit (loop)->count;
1100 if (dump_file)
1102 fprintf (dump_file, " %s %d (file, line)\n",
1103 insn_file (tail), insn_line (tail));
1104 fprintf (dump_file, "SMS single-bb-loop\n");
1105 if (profile_info && flag_branch_probabilities)
1107 fprintf (dump_file, "SMS loop-count ");
1108 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1109 (HOST_WIDEST_INT) bb->count);
1110 fprintf (dump_file, "\n");
1111 fprintf (dump_file, "SMS profile-sum-max ");
1112 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1113 (HOST_WIDEST_INT) profile_info->sum_max);
1114 fprintf (dump_file, "\n");
1116 fprintf (dump_file, "SMS doloop\n");
1117 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1118 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1119 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1123 /* In case of th loop have doloop register it gets special
1124 handling. */
1125 count_init = NULL_RTX;
1126 if ((count_reg = doloop_register_get (head, tail)))
1128 basic_block pre_header;
1130 pre_header = loop_preheader_edge (loop)->src;
1131 count_init = const_iteration_count (count_reg, pre_header,
1132 &loop_count);
1134 gcc_assert (count_reg);
1136 if (dump_file && count_init)
1138 fprintf (dump_file, "SMS const-doloop ");
1139 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1140 loop_count);
1141 fprintf (dump_file, "\n");
1144 node_order = XNEWVEC (int, g->num_nodes);
1146 mii = 1; /* Need to pass some estimate of mii. */
1147 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1148 mii = MAX (res_MII (g), rec_mii);
1149 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1151 if (dump_file)
1152 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1153 rec_mii, mii, maxii);
1155 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1156 ASAP. */
1157 set_node_sched_params (g);
1159 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1161 if (ps){
1162 stage_count = PS_STAGE_COUNT (ps);
1163 gcc_assert(stage_count >= 1);
1166 /* Stage count of 1 means that there is no interleaving between
1167 iterations, let the scheduling passes do the job. */
1168 if (stage_count <= 1
1169 || (count_init && (loop_count <= stage_count))
1170 || (flag_branch_probabilities && (trip_count <= stage_count)))
1172 if (dump_file)
1174 fprintf (dump_file, "SMS failed... \n");
1175 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1176 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1177 fprintf (dump_file, ", trip-count=");
1178 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1179 fprintf (dump_file, ")\n");
1181 continue;
1183 else
1185 struct undo_replace_buff_elem *reg_move_replaces;
1187 if (dump_file)
1189 fprintf (dump_file,
1190 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1191 stage_count);
1192 print_partial_schedule (ps, dump_file);
1193 fprintf (dump_file,
1194 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1195 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1198 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1199 the closing_branch was scheduled and should appear in the last (ii-1)
1200 row. Otherwise, we are free to schedule the branch, and we let nodes
1201 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1202 row; this should reduce stage_count to minimum.
1203 TODO: Revisit the issue of scheduling the insns of the
1204 control part relative to the branch when the control part
1205 has more than one insn. */
1206 normalize_sched_times (ps);
1207 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1208 set_columns_for_ps (ps);
1210 canon_loop (loop);
1212 /* case the BCT count is not known , Do loop-versioning */
1213 if (count_reg && ! count_init)
1215 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1216 GEN_INT(stage_count));
1217 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1218 * REG_BR_PROB_BASE) / 100;
1220 loop_version (loop, comp_rtx, &condition_bb,
1221 prob, prob, REG_BR_PROB_BASE - prob,
1222 true);
1225 /* Set new iteration count of loop kernel. */
1226 if (count_reg && count_init)
1227 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1228 - stage_count + 1);
1230 /* Now apply the scheduled kernel to the RTL of the loop. */
1231 permute_partial_schedule (ps, g->closing_branch->first_note);
1233 /* Mark this loop as software pipelined so the later
1234 scheduling passes doesn't touch it. */
1235 if (! flag_resched_modulo_sched)
1236 g->bb->flags |= BB_DISABLE_SCHEDULE;
1237 /* The life-info is not valid any more. */
1238 df_set_bb_dirty (g->bb);
1240 reg_move_replaces = generate_reg_moves (ps, true);
1241 if (dump_file)
1242 print_node_sched_params (dump_file, g->num_nodes, g);
1243 /* Generate prolog and epilog. */
1244 generate_prolog_epilog (ps, loop, count_reg, count_init);
1246 free_undo_replace_buff (reg_move_replaces);
1249 free_partial_schedule (ps);
1250 free (node_sched_params);
1251 free (node_order);
1252 free_ddg (g);
1255 free (g_arr);
1257 /* Release scheduler data, needed until now because of DFA. */
1258 haifa_sched_finish ();
1259 loop_optimizer_finalize ();
1262 /* The SMS scheduling algorithm itself
1263 -----------------------------------
1264 Input: 'O' an ordered list of insns of a loop.
1265 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1267 'Q' is the empty Set
1268 'PS' is the partial schedule; it holds the currently scheduled nodes with
1269 their cycle/slot.
1270 'PSP' previously scheduled predecessors.
1271 'PSS' previously scheduled successors.
1272 't(u)' the cycle where u is scheduled.
1273 'l(u)' is the latency of u.
1274 'd(v,u)' is the dependence distance from v to u.
1275 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1276 the node ordering phase.
1277 'check_hardware_resources_conflicts(u, PS, c)'
1278 run a trace around cycle/slot through DFA model
1279 to check resource conflicts involving instruction u
1280 at cycle c given the partial schedule PS.
1281 'add_to_partial_schedule_at_time(u, PS, c)'
1282 Add the node/instruction u to the partial schedule
1283 PS at time c.
1284 'calculate_register_pressure(PS)'
1285 Given a schedule of instructions, calculate the register
1286 pressure it implies. One implementation could be the
1287 maximum number of overlapping live ranges.
1288 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1289 registers available in the hardware.
1291 1. II = MII.
1292 2. PS = empty list
1293 3. for each node u in O in pre-computed order
1294 4. if (PSP(u) != Q && PSS(u) == Q) then
1295 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1296 6. start = Early_start; end = Early_start + II - 1; step = 1
1297 11. else if (PSP(u) == Q && PSS(u) != Q) then
1298 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1299 13. start = Late_start; end = Late_start - II + 1; step = -1
1300 14. else if (PSP(u) != Q && PSS(u) != Q) then
1301 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1302 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1303 17. start = Early_start;
1304 18. end = min(Early_start + II - 1 , Late_start);
1305 19. step = 1
1306 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1307 21. start = ASAP(u); end = start + II - 1; step = 1
1308 22. endif
1310 23. success = false
1311 24. for (c = start ; c != end ; c += step)
1312 25. if check_hardware_resources_conflicts(u, PS, c) then
1313 26. add_to_partial_schedule_at_time(u, PS, c)
1314 27. success = true
1315 28. break
1316 29. endif
1317 30. endfor
1318 31. if (success == false) then
1319 32. II = II + 1
1320 33. if (II > maxII) then
1321 34. finish - failed to schedule
1322 35. endif
1323 36. goto 2.
1324 37. endif
1325 38. endfor
1326 39. if (calculate_register_pressure(PS) > maxRP) then
1327 40. goto 32.
1328 41. endif
1329 42. compute epilogue & prologue
1330 43. finish - succeeded to schedule
1333 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1334 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1335 set to 0 to save compile time. */
1336 #define DFA_HISTORY SMS_DFA_HISTORY
1338 /* A threshold for the number of repeated unsuccessful attempts to insert
1339 an empty row, before we flush the partial schedule and start over. */
1340 #define MAX_SPLIT_NUM 10
1341 /* Given the partial schedule PS, this function calculates and returns the
1342 cycles in which we can schedule the node with the given index I.
1343 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1344 noticed that there are several cases in which we fail to SMS the loop
1345 because the sched window of a node is empty due to tight data-deps. In
1346 such cases we want to unschedule some of the predecessors/successors
1347 until we get non-empty scheduling window. It returns -1 if the
1348 scheduling window is empty and zero otherwise. */
1350 static int
1351 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1352 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1354 int start, step, end;
1355 ddg_edge_ptr e;
1356 int u = nodes_order [i];
1357 ddg_node_ptr u_node = &ps->g->nodes[u];
1358 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1359 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1360 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1361 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1362 int psp_not_empty;
1363 int pss_not_empty;
1365 /* 1. compute sched window for u (start, end, step). */
1366 sbitmap_zero (psp);
1367 sbitmap_zero (pss);
1368 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1369 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1371 if (psp_not_empty && !pss_not_empty)
1373 int early_start = INT_MIN;
1375 end = INT_MAX;
1376 for (e = u_node->in; e != 0; e = e->next_in)
1378 ddg_node_ptr v_node = e->src;
1380 if (dump_file)
1382 fprintf (dump_file, "\nProcessing edge: ");
1383 print_ddg_edge (dump_file, e);
1384 fprintf (dump_file,
1385 "\nScheduling %d (%d) in psp_not_empty,"
1386 " checking p %d (%d): ", u_node->cuid,
1387 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1388 (v_node->insn));
1391 if (TEST_BIT (sched_nodes, v_node->cuid))
1393 int p_st = SCHED_TIME (v_node);
1395 early_start =
1396 MAX (early_start, p_st + e->latency - (e->distance * ii));
1398 if (dump_file)
1399 fprintf (dump_file,
1400 "pred st = %d; early_start = %d; latency: %d",
1401 p_st, early_start, e->latency);
1403 if (e->data_type == MEM_DEP)
1404 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1406 else if (dump_file)
1407 fprintf (dump_file, "the node is not scheduled\n");
1409 start = early_start;
1410 end = MIN (end, early_start + ii);
1411 /* Schedule the node close to it's predecessors. */
1412 step = 1;
1414 if (dump_file)
1415 fprintf (dump_file,
1416 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1417 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1420 else if (!psp_not_empty && pss_not_empty)
1422 int late_start = INT_MAX;
1424 end = INT_MIN;
1425 for (e = u_node->out; e != 0; e = e->next_out)
1427 ddg_node_ptr v_node = e->dest;
1429 if (dump_file)
1431 fprintf (dump_file, "\nProcessing edge:");
1432 print_ddg_edge (dump_file, e);
1433 fprintf (dump_file,
1434 "\nScheduling %d (%d) in pss_not_empty,"
1435 " checking s %d (%d): ", u_node->cuid,
1436 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1437 (v_node->insn));
1440 if (TEST_BIT (sched_nodes, v_node->cuid))
1442 int s_st = SCHED_TIME (v_node);
1444 late_start = MIN (late_start,
1445 s_st - e->latency + (e->distance * ii));
1447 if (dump_file)
1448 fprintf (dump_file,
1449 "succ st = %d; late_start = %d; latency = %d",
1450 s_st, late_start, e->latency);
1452 if (e->data_type == MEM_DEP)
1453 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1454 if (dump_file)
1455 fprintf (dump_file, "end = %d\n", end);
1458 else if (dump_file)
1459 fprintf (dump_file, "the node is not scheduled\n");
1462 start = late_start;
1463 end = MAX (end, late_start - ii);
1464 /* Schedule the node close to it's successors. */
1465 step = -1;
1467 if (dump_file)
1468 fprintf (dump_file,
1469 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1470 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1474 else if (psp_not_empty && pss_not_empty)
1476 int early_start = INT_MIN;
1477 int late_start = INT_MAX;
1478 int count_preds = 0;
1479 int count_succs = 0;
1481 start = INT_MIN;
1482 end = INT_MAX;
1483 for (e = u_node->in; e != 0; e = e->next_in)
1485 ddg_node_ptr v_node = e->src;
1487 if (dump_file)
1489 fprintf (dump_file, "\nProcessing edge:");
1490 print_ddg_edge (dump_file, e);
1491 fprintf (dump_file,
1492 "\nScheduling %d (%d) in psp_pss_not_empty,"
1493 " checking p %d (%d): ", u_node->cuid, INSN_UID
1494 (u_node->insn), v_node->cuid, INSN_UID
1495 (v_node->insn));
1498 if (TEST_BIT (sched_nodes, v_node->cuid))
1500 int p_st = SCHED_TIME (v_node);
1502 early_start = MAX (early_start,
1503 p_st + e->latency
1504 - (e->distance * ii));
1506 if (dump_file)
1507 fprintf (dump_file,
1508 "pred st = %d; early_start = %d; latency = %d",
1509 p_st, early_start, e->latency);
1511 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1512 count_preds++;
1514 if (e->data_type == MEM_DEP)
1515 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1517 else if (dump_file)
1518 fprintf (dump_file, "the node is not scheduled\n");
1521 for (e = u_node->out; e != 0; e = e->next_out)
1523 ddg_node_ptr v_node = e->dest;
1525 if (dump_file)
1527 fprintf (dump_file, "\nProcessing edge:");
1528 print_ddg_edge (dump_file, e);
1529 fprintf (dump_file,
1530 "\nScheduling %d (%d) in psp_pss_not_empty,"
1531 " checking s %d (%d): ", u_node->cuid, INSN_UID
1532 (u_node->insn), v_node->cuid, INSN_UID
1533 (v_node->insn));
1536 if (TEST_BIT (sched_nodes, v_node->cuid))
1538 int s_st = SCHED_TIME (v_node);
1540 late_start = MIN (late_start,
1541 s_st - e->latency
1542 + (e->distance * ii));
1544 if (dump_file)
1545 fprintf (dump_file,
1546 "succ st = %d; late_start = %d; latency = %d",
1547 s_st, late_start, e->latency);
1549 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1550 count_succs++;
1552 if (e->data_type == MEM_DEP)
1553 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1555 else if (dump_file)
1556 fprintf (dump_file, "the node is not scheduled\n");
1559 start = MAX (start, early_start);
1560 end = MIN (end, MIN (early_start + ii, late_start + 1));
1561 step = 1;
1562 /* If there are more successors than predecessors schedule the
1563 node close to it's successors. */
1564 if (count_succs >= count_preds)
1566 int old_start = start;
1568 start = end - 1;
1569 end = old_start - 1;
1570 step = -1;
1573 else /* psp is empty && pss is empty. */
1575 start = SCHED_ASAP (u_node);
1576 end = start + ii;
1577 step = 1;
1580 *start_p = start;
1581 *step_p = step;
1582 *end_p = end;
1583 sbitmap_free (psp);
1584 sbitmap_free (pss);
1586 if ((start >= end && step == 1) || (start <= end && step == -1))
1588 if (dump_file)
1589 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1590 start, end, step);
1591 return -1;
1594 return 0;
1597 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1598 node currently been scheduled. At the end of the calculation
1599 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1600 U_NODE which are (1) already scheduled in the first/last row of
1601 U_NODE's scheduling window, (2) whose dependence inequality with U
1602 becomes an equality when U is scheduled in this same row, and (3)
1603 whose dependence latency is zero.
1605 The first and last rows are calculated using the following parameters:
1606 START/END rows - The cycles that begins/ends the traversal on the window;
1607 searching for an empty cycle to schedule U_NODE.
1608 STEP - The direction in which we traverse the window.
1609 II - The initiation interval. */
1611 static void
1612 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1613 int step, int ii, sbitmap sched_nodes,
1614 sbitmap must_precede, sbitmap must_follow)
1616 ddg_edge_ptr e;
1617 int first_cycle_in_window, last_cycle_in_window;
1619 gcc_assert (must_precede && must_follow);
1621 /* Consider the following scheduling window:
1622 {first_cycle_in_window, first_cycle_in_window+1, ...,
1623 last_cycle_in_window}. If step is 1 then the following will be
1624 the order we traverse the window: {start=first_cycle_in_window,
1625 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1626 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1627 end=first_cycle_in_window-1} if step is -1. */
1628 first_cycle_in_window = (step == 1) ? start : end - step;
1629 last_cycle_in_window = (step == 1) ? end - step : start;
1631 sbitmap_zero (must_precede);
1632 sbitmap_zero (must_follow);
1634 if (dump_file)
1635 fprintf (dump_file, "\nmust_precede: ");
1637 /* Instead of checking if:
1638 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1639 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1640 first_cycle_in_window)
1641 && e->latency == 0
1642 we use the fact that latency is non-negative:
1643 SCHED_TIME (e->src) - (e->distance * ii) <=
1644 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1645 first_cycle_in_window
1646 and check only if
1647 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1648 for (e = u_node->in; e != 0; e = e->next_in)
1649 if (TEST_BIT (sched_nodes, e->src->cuid)
1650 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1651 first_cycle_in_window))
1653 if (dump_file)
1654 fprintf (dump_file, "%d ", e->src->cuid);
1656 SET_BIT (must_precede, e->src->cuid);
1659 if (dump_file)
1660 fprintf (dump_file, "\nmust_follow: ");
1662 /* Instead of checking if:
1663 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1664 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1665 last_cycle_in_window)
1666 && e->latency == 0
1667 we use the fact that latency is non-negative:
1668 SCHED_TIME (e->dest) + (e->distance * ii) >=
1669 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1670 last_cycle_in_window
1671 and check only if
1672 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1673 for (e = u_node->out; e != 0; e = e->next_out)
1674 if (TEST_BIT (sched_nodes, e->dest->cuid)
1675 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1676 last_cycle_in_window))
1678 if (dump_file)
1679 fprintf (dump_file, "%d ", e->dest->cuid);
1681 SET_BIT (must_follow, e->dest->cuid);
1684 if (dump_file)
1685 fprintf (dump_file, "\n");
1688 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1689 parameters to decide if that's possible:
1690 PS - The partial schedule.
1691 U - The serial number of U_NODE.
1692 NUM_SPLITS - The number of row splits made so far.
1693 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1694 the first row of the scheduling window)
1695 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1696 last row of the scheduling window) */
1698 static bool
1699 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1700 int u, int cycle, sbitmap sched_nodes,
1701 int *num_splits, sbitmap must_precede,
1702 sbitmap must_follow)
1704 ps_insn_ptr psi;
1705 bool success = 0;
1707 verify_partial_schedule (ps, sched_nodes);
1708 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1709 must_precede, must_follow);
1710 if (psi)
1712 SCHED_TIME (u_node) = cycle;
1713 SET_BIT (sched_nodes, u);
1714 success = 1;
1715 *num_splits = 0;
1716 if (dump_file)
1717 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1721 return success;
1724 /* This function implements the scheduling algorithm for SMS according to the
1725 above algorithm. */
1726 static partial_schedule_ptr
1727 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1729 int ii = mii;
1730 int i, c, success, num_splits = 0;
1731 int flush_and_start_over = true;
1732 int num_nodes = g->num_nodes;
1733 int start, end, step; /* Place together into one struct? */
1734 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1735 sbitmap must_precede = sbitmap_alloc (num_nodes);
1736 sbitmap must_follow = sbitmap_alloc (num_nodes);
1737 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1739 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1741 sbitmap_ones (tobe_scheduled);
1742 sbitmap_zero (sched_nodes);
1744 while (flush_and_start_over && (ii < maxii))
1747 if (dump_file)
1748 fprintf (dump_file, "Starting with ii=%d\n", ii);
1749 flush_and_start_over = false;
1750 sbitmap_zero (sched_nodes);
1752 for (i = 0; i < num_nodes; i++)
1754 int u = nodes_order[i];
1755 ddg_node_ptr u_node = &ps->g->nodes[u];
1756 rtx insn = u_node->insn;
1758 if (!NONDEBUG_INSN_P (insn))
1760 RESET_BIT (tobe_scheduled, u);
1761 continue;
1764 if (JUMP_P (insn)) /* Closing branch handled later. */
1766 RESET_BIT (tobe_scheduled, u);
1767 continue;
1770 if (TEST_BIT (sched_nodes, u))
1771 continue;
1773 /* Try to get non-empty scheduling window. */
1774 success = 0;
1775 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1776 &step, &end) == 0)
1778 if (dump_file)
1779 fprintf (dump_file, "\nTrying to schedule node %d \
1780 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1781 (g->nodes[u].insn)), start, end, step);
1783 gcc_assert ((step > 0 && start < end)
1784 || (step < 0 && start > end));
1786 calculate_must_precede_follow (u_node, start, end, step, ii,
1787 sched_nodes, must_precede,
1788 must_follow);
1790 for (c = start; c != end; c += step)
1792 sbitmap tmp_precede = NULL;
1793 sbitmap tmp_follow = NULL;
1795 if (c == start)
1797 if (step == 1)
1798 tmp_precede = must_precede;
1799 else /* step == -1. */
1800 tmp_follow = must_follow;
1802 if (c == end - step)
1804 if (step == 1)
1805 tmp_follow = must_follow;
1806 else /* step == -1. */
1807 tmp_precede = must_precede;
1810 success =
1811 try_scheduling_node_in_cycle (ps, u_node, u, c,
1812 sched_nodes,
1813 &num_splits, tmp_precede,
1814 tmp_follow);
1815 if (success)
1816 break;
1819 verify_partial_schedule (ps, sched_nodes);
1821 if (!success)
1823 int split_row;
1825 if (ii++ == maxii)
1826 break;
1828 if (num_splits >= MAX_SPLIT_NUM)
1830 num_splits = 0;
1831 flush_and_start_over = true;
1832 verify_partial_schedule (ps, sched_nodes);
1833 reset_partial_schedule (ps, ii);
1834 verify_partial_schedule (ps, sched_nodes);
1835 break;
1838 num_splits++;
1839 /* The scheduling window is exclusive of 'end'
1840 whereas compute_split_window() expects an inclusive,
1841 ordered range. */
1842 if (step == 1)
1843 split_row = compute_split_row (sched_nodes, start, end - 1,
1844 ps->ii, u_node);
1845 else
1846 split_row = compute_split_row (sched_nodes, end + 1, start,
1847 ps->ii, u_node);
1849 ps_insert_empty_row (ps, split_row, sched_nodes);
1850 i--; /* Go back and retry node i. */
1852 if (dump_file)
1853 fprintf (dump_file, "num_splits=%d\n", num_splits);
1856 /* ??? If (success), check register pressure estimates. */
1857 } /* Continue with next node. */
1858 } /* While flush_and_start_over. */
1859 if (ii >= maxii)
1861 free_partial_schedule (ps);
1862 ps = NULL;
1864 else
1865 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1867 sbitmap_free (sched_nodes);
1868 sbitmap_free (must_precede);
1869 sbitmap_free (must_follow);
1870 sbitmap_free (tobe_scheduled);
1872 return ps;
1875 /* This function inserts a new empty row into PS at the position
1876 according to SPLITROW, keeping all already scheduled instructions
1877 intact and updating their SCHED_TIME and cycle accordingly. */
1878 static void
1879 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1880 sbitmap sched_nodes)
1882 ps_insn_ptr crr_insn;
1883 ps_insn_ptr *rows_new;
1884 int ii = ps->ii;
1885 int new_ii = ii + 1;
1886 int row;
1888 verify_partial_schedule (ps, sched_nodes);
1890 /* We normalize sched_time and rotate ps to have only non-negative sched
1891 times, for simplicity of updating cycles after inserting new row. */
1892 split_row -= ps->min_cycle;
1893 split_row = SMODULO (split_row, ii);
1894 if (dump_file)
1895 fprintf (dump_file, "split_row=%d\n", split_row);
1897 normalize_sched_times (ps);
1898 rotate_partial_schedule (ps, ps->min_cycle);
1900 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1901 for (row = 0; row < split_row; row++)
1903 rows_new[row] = ps->rows[row];
1904 ps->rows[row] = NULL;
1905 for (crr_insn = rows_new[row];
1906 crr_insn; crr_insn = crr_insn->next_in_row)
1908 ddg_node_ptr u = crr_insn->node;
1909 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1911 SCHED_TIME (u) = new_time;
1912 crr_insn->cycle = new_time;
1913 SCHED_ROW (u) = new_time % new_ii;
1914 SCHED_STAGE (u) = new_time / new_ii;
1919 rows_new[split_row] = NULL;
1921 for (row = split_row; row < ii; row++)
1923 rows_new[row + 1] = ps->rows[row];
1924 ps->rows[row] = NULL;
1925 for (crr_insn = rows_new[row + 1];
1926 crr_insn; crr_insn = crr_insn->next_in_row)
1928 ddg_node_ptr u = crr_insn->node;
1929 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1931 SCHED_TIME (u) = new_time;
1932 crr_insn->cycle = new_time;
1933 SCHED_ROW (u) = new_time % new_ii;
1934 SCHED_STAGE (u) = new_time / new_ii;
1938 /* Updating ps. */
1939 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1940 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1941 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1942 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1943 free (ps->rows);
1944 ps->rows = rows_new;
1945 ps->ii = new_ii;
1946 gcc_assert (ps->min_cycle >= 0);
1948 verify_partial_schedule (ps, sched_nodes);
1950 if (dump_file)
1951 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1952 ps->max_cycle);
1955 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1956 UP which are the boundaries of it's scheduling window; compute using
1957 SCHED_NODES and II a row in the partial schedule that can be split
1958 which will separate a critical predecessor from a critical successor
1959 thereby expanding the window, and return it. */
1960 static int
1961 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1962 ddg_node_ptr u_node)
1964 ddg_edge_ptr e;
1965 int lower = INT_MIN, upper = INT_MAX;
1966 ddg_node_ptr crit_pred = NULL;
1967 ddg_node_ptr crit_succ = NULL;
1968 int crit_cycle;
1970 for (e = u_node->in; e != 0; e = e->next_in)
1972 ddg_node_ptr v_node = e->src;
1974 if (TEST_BIT (sched_nodes, v_node->cuid)
1975 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1976 if (SCHED_TIME (v_node) > lower)
1978 crit_pred = v_node;
1979 lower = SCHED_TIME (v_node);
1983 if (crit_pred != NULL)
1985 crit_cycle = SCHED_TIME (crit_pred) + 1;
1986 return SMODULO (crit_cycle, ii);
1989 for (e = u_node->out; e != 0; e = e->next_out)
1991 ddg_node_ptr v_node = e->dest;
1992 if (TEST_BIT (sched_nodes, v_node->cuid)
1993 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1994 if (SCHED_TIME (v_node) < upper)
1996 crit_succ = v_node;
1997 upper = SCHED_TIME (v_node);
2001 if (crit_succ != NULL)
2003 crit_cycle = SCHED_TIME (crit_succ);
2004 return SMODULO (crit_cycle, ii);
2007 if (dump_file)
2008 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2010 return SMODULO ((low + up + 1) / 2, ii);
2013 static void
2014 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2016 int row;
2017 ps_insn_ptr crr_insn;
2019 for (row = 0; row < ps->ii; row++)
2020 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2022 ddg_node_ptr u = crr_insn->node;
2024 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2025 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2026 popcount (sched_nodes) == number of insns in ps. */
2027 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2028 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2033 /* This page implements the algorithm for ordering the nodes of a DDG
2034 for modulo scheduling, activated through the
2035 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2037 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2038 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2039 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2040 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2041 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2042 #define DEPTH(x) (ASAP ((x)))
2044 typedef struct node_order_params * nopa;
2046 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2047 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2048 static nopa calculate_order_params (ddg_ptr, int, int *);
2049 static int find_max_asap (ddg_ptr, sbitmap);
2050 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2051 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2053 enum sms_direction {BOTTOMUP, TOPDOWN};
2055 struct node_order_params
2057 int asap;
2058 int alap;
2059 int height;
2062 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2063 static void
2064 check_nodes_order (int *node_order, int num_nodes)
2066 int i;
2067 sbitmap tmp = sbitmap_alloc (num_nodes);
2069 sbitmap_zero (tmp);
2071 if (dump_file)
2072 fprintf (dump_file, "SMS final nodes order: \n");
2074 for (i = 0; i < num_nodes; i++)
2076 int u = node_order[i];
2078 if (dump_file)
2079 fprintf (dump_file, "%d ", u);
2080 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2082 SET_BIT (tmp, u);
2085 if (dump_file)
2086 fprintf (dump_file, "\n");
2088 sbitmap_free (tmp);
2091 /* Order the nodes of G for scheduling and pass the result in
2092 NODE_ORDER. Also set aux.count of each node to ASAP.
2093 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2094 static int
2095 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2097 int i;
2098 int rec_mii = 0;
2099 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2101 nopa nops = calculate_order_params (g, mii, pmax_asap);
2103 if (dump_file)
2104 print_sccs (dump_file, sccs, g);
2106 order_nodes_of_sccs (sccs, node_order);
2108 if (sccs->num_sccs > 0)
2109 /* First SCC has the largest recurrence_length. */
2110 rec_mii = sccs->sccs[0]->recurrence_length;
2112 /* Save ASAP before destroying node_order_params. */
2113 for (i = 0; i < g->num_nodes; i++)
2115 ddg_node_ptr v = &g->nodes[i];
2116 v->aux.count = ASAP (v);
2119 free (nops);
2120 free_ddg_all_sccs (sccs);
2121 check_nodes_order (node_order, g->num_nodes);
2123 return rec_mii;
2126 static void
2127 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2129 int i, pos = 0;
2130 ddg_ptr g = all_sccs->ddg;
2131 int num_nodes = g->num_nodes;
2132 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2133 sbitmap on_path = sbitmap_alloc (num_nodes);
2134 sbitmap tmp = sbitmap_alloc (num_nodes);
2135 sbitmap ones = sbitmap_alloc (num_nodes);
2137 sbitmap_zero (prev_sccs);
2138 sbitmap_ones (ones);
2140 /* Perform the node ordering starting from the SCC with the highest recMII.
2141 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2142 for (i = 0; i < all_sccs->num_sccs; i++)
2144 ddg_scc_ptr scc = all_sccs->sccs[i];
2146 /* Add nodes on paths from previous SCCs to the current SCC. */
2147 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2148 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2150 /* Add nodes on paths from the current SCC to previous SCCs. */
2151 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2152 sbitmap_a_or_b (tmp, tmp, on_path);
2154 /* Remove nodes of previous SCCs from current extended SCC. */
2155 sbitmap_difference (tmp, tmp, prev_sccs);
2157 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2158 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2161 /* Handle the remaining nodes that do not belong to any scc. Each call
2162 to order_nodes_in_scc handles a single connected component. */
2163 while (pos < g->num_nodes)
2165 sbitmap_difference (tmp, ones, prev_sccs);
2166 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2168 sbitmap_free (prev_sccs);
2169 sbitmap_free (on_path);
2170 sbitmap_free (tmp);
2171 sbitmap_free (ones);
2174 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2175 static struct node_order_params *
2176 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2178 int u;
2179 int max_asap;
2180 int num_nodes = g->num_nodes;
2181 ddg_edge_ptr e;
2182 /* Allocate a place to hold ordering params for each node in the DDG. */
2183 nopa node_order_params_arr;
2185 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2186 node_order_params_arr = (nopa) xcalloc (num_nodes,
2187 sizeof (struct node_order_params));
2189 /* Set the aux pointer of each node to point to its order_params structure. */
2190 for (u = 0; u < num_nodes; u++)
2191 g->nodes[u].aux.info = &node_order_params_arr[u];
2193 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2194 calculate ASAP, ALAP, mobility, distance, and height for each node
2195 in the dependence (direct acyclic) graph. */
2197 /* We assume that the nodes in the array are in topological order. */
2199 max_asap = 0;
2200 for (u = 0; u < num_nodes; u++)
2202 ddg_node_ptr u_node = &g->nodes[u];
2204 ASAP (u_node) = 0;
2205 for (e = u_node->in; e; e = e->next_in)
2206 if (e->distance == 0)
2207 ASAP (u_node) = MAX (ASAP (u_node),
2208 ASAP (e->src) + e->latency);
2209 max_asap = MAX (max_asap, ASAP (u_node));
2212 for (u = num_nodes - 1; u > -1; u--)
2214 ddg_node_ptr u_node = &g->nodes[u];
2216 ALAP (u_node) = max_asap;
2217 HEIGHT (u_node) = 0;
2218 for (e = u_node->out; e; e = e->next_out)
2219 if (e->distance == 0)
2221 ALAP (u_node) = MIN (ALAP (u_node),
2222 ALAP (e->dest) - e->latency);
2223 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2224 HEIGHT (e->dest) + e->latency);
2227 if (dump_file)
2229 fprintf (dump_file, "\nOrder params\n");
2230 for (u = 0; u < num_nodes; u++)
2232 ddg_node_ptr u_node = &g->nodes[u];
2234 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2235 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2239 *pmax_asap = max_asap;
2240 return node_order_params_arr;
2243 static int
2244 find_max_asap (ddg_ptr g, sbitmap nodes)
2246 unsigned int u = 0;
2247 int max_asap = -1;
2248 int result = -1;
2249 sbitmap_iterator sbi;
2251 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2253 ddg_node_ptr u_node = &g->nodes[u];
2255 if (max_asap < ASAP (u_node))
2257 max_asap = ASAP (u_node);
2258 result = u;
2261 return result;
2264 static int
2265 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2267 unsigned int u = 0;
2268 int max_hv = -1;
2269 int min_mob = INT_MAX;
2270 int result = -1;
2271 sbitmap_iterator sbi;
2273 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2275 ddg_node_ptr u_node = &g->nodes[u];
2277 if (max_hv < HEIGHT (u_node))
2279 max_hv = HEIGHT (u_node);
2280 min_mob = MOB (u_node);
2281 result = u;
2283 else if ((max_hv == HEIGHT (u_node))
2284 && (min_mob > MOB (u_node)))
2286 min_mob = MOB (u_node);
2287 result = u;
2290 return result;
2293 static int
2294 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2296 unsigned int u = 0;
2297 int max_dv = -1;
2298 int min_mob = INT_MAX;
2299 int result = -1;
2300 sbitmap_iterator sbi;
2302 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2304 ddg_node_ptr u_node = &g->nodes[u];
2306 if (max_dv < DEPTH (u_node))
2308 max_dv = DEPTH (u_node);
2309 min_mob = MOB (u_node);
2310 result = u;
2312 else if ((max_dv == DEPTH (u_node))
2313 && (min_mob > MOB (u_node)))
2315 min_mob = MOB (u_node);
2316 result = u;
2319 return result;
2322 /* Places the nodes of SCC into the NODE_ORDER array starting
2323 at position POS, according to the SMS ordering algorithm.
2324 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2325 the NODE_ORDER array, starting from position zero. */
2326 static int
2327 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2328 int * node_order, int pos)
2330 enum sms_direction dir;
2331 int num_nodes = g->num_nodes;
2332 sbitmap workset = sbitmap_alloc (num_nodes);
2333 sbitmap tmp = sbitmap_alloc (num_nodes);
2334 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2335 sbitmap predecessors = sbitmap_alloc (num_nodes);
2336 sbitmap successors = sbitmap_alloc (num_nodes);
2338 sbitmap_zero (predecessors);
2339 find_predecessors (predecessors, g, nodes_ordered);
2341 sbitmap_zero (successors);
2342 find_successors (successors, g, nodes_ordered);
2344 sbitmap_zero (tmp);
2345 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2347 sbitmap_copy (workset, tmp);
2348 dir = BOTTOMUP;
2350 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2352 sbitmap_copy (workset, tmp);
2353 dir = TOPDOWN;
2355 else
2357 int u;
2359 sbitmap_zero (workset);
2360 if ((u = find_max_asap (g, scc)) >= 0)
2361 SET_BIT (workset, u);
2362 dir = BOTTOMUP;
2365 sbitmap_zero (zero_bitmap);
2366 while (!sbitmap_equal (workset, zero_bitmap))
2368 int v;
2369 ddg_node_ptr v_node;
2370 sbitmap v_node_preds;
2371 sbitmap v_node_succs;
2373 if (dir == TOPDOWN)
2375 while (!sbitmap_equal (workset, zero_bitmap))
2377 v = find_max_hv_min_mob (g, workset);
2378 v_node = &g->nodes[v];
2379 node_order[pos++] = v;
2380 v_node_succs = NODE_SUCCESSORS (v_node);
2381 sbitmap_a_and_b (tmp, v_node_succs, scc);
2383 /* Don't consider the already ordered successors again. */
2384 sbitmap_difference (tmp, tmp, nodes_ordered);
2385 sbitmap_a_or_b (workset, workset, tmp);
2386 RESET_BIT (workset, v);
2387 SET_BIT (nodes_ordered, v);
2389 dir = BOTTOMUP;
2390 sbitmap_zero (predecessors);
2391 find_predecessors (predecessors, g, nodes_ordered);
2392 sbitmap_a_and_b (workset, predecessors, scc);
2394 else
2396 while (!sbitmap_equal (workset, zero_bitmap))
2398 v = find_max_dv_min_mob (g, workset);
2399 v_node = &g->nodes[v];
2400 node_order[pos++] = v;
2401 v_node_preds = NODE_PREDECESSORS (v_node);
2402 sbitmap_a_and_b (tmp, v_node_preds, scc);
2404 /* Don't consider the already ordered predecessors again. */
2405 sbitmap_difference (tmp, tmp, nodes_ordered);
2406 sbitmap_a_or_b (workset, workset, tmp);
2407 RESET_BIT (workset, v);
2408 SET_BIT (nodes_ordered, v);
2410 dir = TOPDOWN;
2411 sbitmap_zero (successors);
2412 find_successors (successors, g, nodes_ordered);
2413 sbitmap_a_and_b (workset, successors, scc);
2416 sbitmap_free (tmp);
2417 sbitmap_free (workset);
2418 sbitmap_free (zero_bitmap);
2419 sbitmap_free (predecessors);
2420 sbitmap_free (successors);
2421 return pos;
2425 /* This page contains functions for manipulating partial-schedules during
2426 modulo scheduling. */
2428 /* Create a partial schedule and allocate a memory to hold II rows. */
2430 static partial_schedule_ptr
2431 create_partial_schedule (int ii, ddg_ptr g, int history)
2433 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2434 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2435 ps->ii = ii;
2436 ps->history = history;
2437 ps->min_cycle = INT_MAX;
2438 ps->max_cycle = INT_MIN;
2439 ps->g = g;
2441 return ps;
2444 /* Free the PS_INSNs in rows array of the given partial schedule.
2445 ??? Consider caching the PS_INSN's. */
2446 static void
2447 free_ps_insns (partial_schedule_ptr ps)
2449 int i;
2451 for (i = 0; i < ps->ii; i++)
2453 while (ps->rows[i])
2455 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2457 free (ps->rows[i]);
2458 ps->rows[i] = ps_insn;
2460 ps->rows[i] = NULL;
2464 /* Free all the memory allocated to the partial schedule. */
2466 static void
2467 free_partial_schedule (partial_schedule_ptr ps)
2469 if (!ps)
2470 return;
2471 free_ps_insns (ps);
2472 free (ps->rows);
2473 free (ps);
2476 /* Clear the rows array with its PS_INSNs, and create a new one with
2477 NEW_II rows. */
2479 static void
2480 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2482 if (!ps)
2483 return;
2484 free_ps_insns (ps);
2485 if (new_ii == ps->ii)
2486 return;
2487 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2488 * sizeof (ps_insn_ptr));
2489 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2490 ps->ii = new_ii;
2491 ps->min_cycle = INT_MAX;
2492 ps->max_cycle = INT_MIN;
2495 /* Prints the partial schedule as an ii rows array, for each rows
2496 print the ids of the insns in it. */
2497 void
2498 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2500 int i;
2502 for (i = 0; i < ps->ii; i++)
2504 ps_insn_ptr ps_i = ps->rows[i];
2506 fprintf (dump, "\n[ROW %d ]: ", i);
2507 while (ps_i)
2509 fprintf (dump, "%d, ",
2510 INSN_UID (ps_i->node->insn));
2511 ps_i = ps_i->next_in_row;
2516 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2517 static ps_insn_ptr
2518 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2520 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2522 ps_i->node = node;
2523 ps_i->next_in_row = NULL;
2524 ps_i->prev_in_row = NULL;
2525 ps_i->row_rest_count = rest_count;
2526 ps_i->cycle = cycle;
2528 return ps_i;
2532 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2533 node is not found in the partial schedule, else returns true. */
2534 static bool
2535 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2537 int row;
2539 if (!ps || !ps_i)
2540 return false;
2542 row = SMODULO (ps_i->cycle, ps->ii);
2543 if (! ps_i->prev_in_row)
2545 if (ps_i != ps->rows[row])
2546 return false;
2548 ps->rows[row] = ps_i->next_in_row;
2549 if (ps->rows[row])
2550 ps->rows[row]->prev_in_row = NULL;
2552 else
2554 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2555 if (ps_i->next_in_row)
2556 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2558 free (ps_i);
2559 return true;
2562 /* Unlike what literature describes for modulo scheduling (which focuses
2563 on VLIW machines) the order of the instructions inside a cycle is
2564 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2565 where the current instruction should go relative to the already
2566 scheduled instructions in the given cycle. Go over these
2567 instructions and find the first possible column to put it in. */
2568 static bool
2569 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2570 sbitmap must_precede, sbitmap must_follow)
2572 ps_insn_ptr next_ps_i;
2573 ps_insn_ptr first_must_follow = NULL;
2574 ps_insn_ptr last_must_precede = NULL;
2575 int row;
2577 if (! ps_i)
2578 return false;
2580 row = SMODULO (ps_i->cycle, ps->ii);
2582 /* Find the first must follow and the last must precede
2583 and insert the node immediately after the must precede
2584 but make sure that it there is no must follow after it. */
2585 for (next_ps_i = ps->rows[row];
2586 next_ps_i;
2587 next_ps_i = next_ps_i->next_in_row)
2589 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2590 && ! first_must_follow)
2591 first_must_follow = next_ps_i;
2592 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2594 /* If we have already met a node that must follow, then
2595 there is no possible column. */
2596 if (first_must_follow)
2597 return false;
2598 else
2599 last_must_precede = next_ps_i;
2603 /* Now insert the node after INSERT_AFTER_PSI. */
2605 if (! last_must_precede)
2607 ps_i->next_in_row = ps->rows[row];
2608 ps_i->prev_in_row = NULL;
2609 if (ps_i->next_in_row)
2610 ps_i->next_in_row->prev_in_row = ps_i;
2611 ps->rows[row] = ps_i;
2613 else
2615 ps_i->next_in_row = last_must_precede->next_in_row;
2616 last_must_precede->next_in_row = ps_i;
2617 ps_i->prev_in_row = last_must_precede;
2618 if (ps_i->next_in_row)
2619 ps_i->next_in_row->prev_in_row = ps_i;
2622 return true;
2625 /* Advances the PS_INSN one column in its current row; returns false
2626 in failure and true in success. Bit N is set in MUST_FOLLOW if
2627 the node with cuid N must be come after the node pointed to by
2628 PS_I when scheduled in the same cycle. */
2629 static int
2630 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2631 sbitmap must_follow)
2633 ps_insn_ptr prev, next;
2634 int row;
2635 ddg_node_ptr next_node;
2637 if (!ps || !ps_i)
2638 return false;
2640 row = SMODULO (ps_i->cycle, ps->ii);
2642 if (! ps_i->next_in_row)
2643 return false;
2645 next_node = ps_i->next_in_row->node;
2647 /* Check if next_in_row is dependent on ps_i, both having same sched
2648 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2649 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2650 return false;
2652 /* Advance PS_I over its next_in_row in the doubly linked list. */
2653 prev = ps_i->prev_in_row;
2654 next = ps_i->next_in_row;
2656 if (ps_i == ps->rows[row])
2657 ps->rows[row] = next;
2659 ps_i->next_in_row = next->next_in_row;
2661 if (next->next_in_row)
2662 next->next_in_row->prev_in_row = ps_i;
2664 next->next_in_row = ps_i;
2665 ps_i->prev_in_row = next;
2667 next->prev_in_row = prev;
2668 if (prev)
2669 prev->next_in_row = next;
2671 return true;
2674 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2675 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2676 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2677 before/after (respectively) the node pointed to by PS_I when scheduled
2678 in the same cycle. */
2679 static ps_insn_ptr
2680 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2681 sbitmap must_precede, sbitmap must_follow)
2683 ps_insn_ptr ps_i;
2684 int rest_count = 1;
2685 int row = SMODULO (cycle, ps->ii);
2687 if (ps->rows[row]
2688 && ps->rows[row]->row_rest_count >= issue_rate)
2689 return NULL;
2691 if (ps->rows[row])
2692 rest_count += ps->rows[row]->row_rest_count;
2694 ps_i = create_ps_insn (node, rest_count, cycle);
2696 /* Finds and inserts PS_I according to MUST_FOLLOW and
2697 MUST_PRECEDE. */
2698 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2700 free (ps_i);
2701 return NULL;
2704 return ps_i;
2707 /* Advance time one cycle. Assumes DFA is being used. */
2708 static void
2709 advance_one_cycle (void)
2711 if (targetm.sched.dfa_pre_cycle_insn)
2712 state_transition (curr_state,
2713 targetm.sched.dfa_pre_cycle_insn ());
2715 state_transition (curr_state, NULL);
2717 if (targetm.sched.dfa_post_cycle_insn)
2718 state_transition (curr_state,
2719 targetm.sched.dfa_post_cycle_insn ());
2724 /* Checks if PS has resource conflicts according to DFA, starting from
2725 FROM cycle to TO cycle; returns true if there are conflicts and false
2726 if there are no conflicts. Assumes DFA is being used. */
2727 static int
2728 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2730 int cycle;
2732 state_reset (curr_state);
2734 for (cycle = from; cycle <= to; cycle++)
2736 ps_insn_ptr crr_insn;
2737 /* Holds the remaining issue slots in the current row. */
2738 int can_issue_more = issue_rate;
2740 /* Walk through the DFA for the current row. */
2741 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2742 crr_insn;
2743 crr_insn = crr_insn->next_in_row)
2745 rtx insn = crr_insn->node->insn;
2747 if (!NONDEBUG_INSN_P (insn))
2748 continue;
2750 /* Check if there is room for the current insn. */
2751 if (!can_issue_more || state_dead_lock_p (curr_state))
2752 return true;
2754 /* Update the DFA state and return with failure if the DFA found
2755 resource conflicts. */
2756 if (state_transition (curr_state, insn) >= 0)
2757 return true;
2759 if (targetm.sched.variable_issue)
2760 can_issue_more =
2761 targetm.sched.variable_issue (sched_dump, sched_verbose,
2762 insn, can_issue_more);
2763 /* A naked CLOBBER or USE generates no instruction, so don't
2764 let them consume issue slots. */
2765 else if (GET_CODE (PATTERN (insn)) != USE
2766 && GET_CODE (PATTERN (insn)) != CLOBBER)
2767 can_issue_more--;
2770 /* Advance the DFA to the next cycle. */
2771 advance_one_cycle ();
2773 return false;
2776 /* Checks if the given node causes resource conflicts when added to PS at
2777 cycle C. If not the node is added to PS and returned; otherwise zero
2778 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2779 cuid N must be come before/after (respectively) the node pointed to by
2780 PS_I when scheduled in the same cycle. */
2781 ps_insn_ptr
2782 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2783 int c, sbitmap must_precede,
2784 sbitmap must_follow)
2786 int has_conflicts = 0;
2787 ps_insn_ptr ps_i;
2789 /* First add the node to the PS, if this succeeds check for
2790 conflicts, trying different issue slots in the same row. */
2791 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2792 return NULL; /* Failed to insert the node at the given cycle. */
2794 has_conflicts = ps_has_conflicts (ps, c, c)
2795 || (ps->history > 0
2796 && ps_has_conflicts (ps,
2797 c - ps->history,
2798 c + ps->history));
2800 /* Try different issue slots to find one that the given node can be
2801 scheduled in without conflicts. */
2802 while (has_conflicts)
2804 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2805 break;
2806 has_conflicts = ps_has_conflicts (ps, c, c)
2807 || (ps->history > 0
2808 && ps_has_conflicts (ps,
2809 c - ps->history,
2810 c + ps->history));
2813 if (has_conflicts)
2815 remove_node_from_ps (ps, ps_i);
2816 return NULL;
2819 ps->min_cycle = MIN (ps->min_cycle, c);
2820 ps->max_cycle = MAX (ps->max_cycle, c);
2821 return ps_i;
2824 /* Rotate the rows of PS such that insns scheduled at time
2825 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2826 void
2827 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2829 int i, row, backward_rotates;
2830 int last_row = ps->ii - 1;
2832 if (start_cycle == 0)
2833 return;
2835 backward_rotates = SMODULO (start_cycle, ps->ii);
2837 /* Revisit later and optimize this into a single loop. */
2838 for (i = 0; i < backward_rotates; i++)
2840 ps_insn_ptr first_row = ps->rows[0];
2842 for (row = 0; row < last_row; row++)
2843 ps->rows[row] = ps->rows[row+1];
2845 ps->rows[last_row] = first_row;
2848 ps->max_cycle -= start_cycle;
2849 ps->min_cycle -= start_cycle;
2852 #endif /* INSN_SCHEDULING */
2854 static bool
2855 gate_handle_sms (void)
2857 return (optimize > 0 && flag_modulo_sched);
2861 /* Run instruction scheduler. */
2862 /* Perform SMS module scheduling. */
2863 static unsigned int
2864 rest_of_handle_sms (void)
2866 #ifdef INSN_SCHEDULING
2867 basic_block bb;
2869 /* Collect loop information to be used in SMS. */
2870 cfg_layout_initialize (0);
2871 sms_schedule ();
2873 /* Update the life information, because we add pseudos. */
2874 max_regno = max_reg_num ();
2876 /* Finalize layout changes. */
2877 FOR_EACH_BB (bb)
2878 if (bb->next_bb != EXIT_BLOCK_PTR)
2879 bb->aux = bb->next_bb;
2880 free_dominance_info (CDI_DOMINATORS);
2881 cfg_layout_finalize ();
2882 #endif /* INSN_SCHEDULING */
2883 return 0;
2886 struct rtl_opt_pass pass_sms =
2889 RTL_PASS,
2890 "sms", /* name */
2891 gate_handle_sms, /* gate */
2892 rest_of_handle_sms, /* execute */
2893 NULL, /* sub */
2894 NULL, /* next */
2895 0, /* static_pass_number */
2896 TV_SMS, /* tv_id */
2897 0, /* properties_required */
2898 0, /* properties_provided */
2899 0, /* properties_destroyed */
2900 TODO_dump_func, /* todo_flags_start */
2901 TODO_df_finish | TODO_verify_rtl_sharing |
2902 TODO_dump_func |
2903 TODO_ggc_collect /* todo_flags_finish */