PR bootstrap/49769
[official-gcc.git] / gcc / modulo-sched.c
blob668aa22cafaf0b722851a501ad3a0593703d9b5a
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "diagnostic-core.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "recog.h"
38 #include "sched-int.h"
39 #include "target.h"
40 #include "cfglayout.h"
41 #include "cfgloop.h"
42 #include "cfghooks.h"
43 #include "expr.h"
44 #include "params.h"
45 #include "gcov-io.h"
46 #include "ddg.h"
47 #include "timevar.h"
48 #include "tree-pass.h"
49 #include "dbgcnt.h"
50 #include "df.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops whose loop count can be easily
88 adjusted. This is because we peel a constant number of iterations
89 into a prologue and epilogue for which we want to avoid emitting
90 the control part, and a kernel which is to iterate that constant
91 number of iterations less than the original loop. So the control
92 part should be a set of insns clearly identified and having its
93 own iv, not otherwise used in the loop (at-least for now), which
94 initializes a register before the loop to the number of iterations.
95 Currently SMS relies on the do-loop pattern to recognize such loops,
96 where (1) the control part comprises of all insns defining and/or
97 using a certain 'count' register and (2) the loop count can be
98 adjusted by modifying this register prior to the loop.
99 TODO: Rely on cfgloop analysis instead. */
101 /* This page defines partial-schedule structures and functions for
102 modulo scheduling. */
104 typedef struct partial_schedule *partial_schedule_ptr;
105 typedef struct ps_insn *ps_insn_ptr;
107 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
108 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
111 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113 /* Perform signed modulo, always returning a non-negative value. */
114 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116 /* The number of different iterations the nodes in ps span, assuming
117 the stage boundaries are placed efficiently. */
118 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
119 + 1 + ii - 1) / ii)
120 /* The stage count of ps. */
121 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123 /* A single instruction in the partial schedule. */
124 struct ps_insn
126 /* The corresponding DDG_NODE. */
127 ddg_node_ptr node;
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
131 int cycle;
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
135 prev_in_row;
139 /* Holds the partial schedule as an array of II rows. Each entry of the
140 array points to a linked list of PS_INSNs, which represents the
141 instructions that are scheduled for that row. */
142 struct partial_schedule
144 int ii; /* Number of rows in the partial schedule. */
145 int history; /* Threshold for conflict checking using DFA. */
147 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
148 ps_insn_ptr *rows;
150 /* rows_length[i] holds the number of instructions in the row.
151 It is used only (as an optimization) to back off quickly from
152 trying to schedule a node in a full row; that is, to avoid running
153 through futile DFA state transitions. */
154 int *rows_length;
156 /* The earliest absolute cycle of an insn in the partial schedule. */
157 int min_cycle;
159 /* The latest absolute cycle of an insn in the partial schedule. */
160 int max_cycle;
162 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
164 int stage_count; /* The stage count of the partial schedule. */
167 /* We use this to record all the register replacements we do in
168 the kernel so we can undo SMS if it is not profitable. */
169 struct undo_replace_buff_elem
171 rtx insn;
172 rtx orig_reg;
173 rtx new_reg;
174 struct undo_replace_buff_elem *next;
179 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
180 static void free_partial_schedule (partial_schedule_ptr);
181 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
182 void print_partial_schedule (partial_schedule_ptr, FILE *);
183 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
184 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
185 ddg_node_ptr node, int cycle,
186 sbitmap must_precede,
187 sbitmap must_follow);
188 static void rotate_partial_schedule (partial_schedule_ptr, int);
189 void set_row_column_for_ps (partial_schedule_ptr);
190 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
191 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
194 /* This page defines constants and structures for the modulo scheduling
195 driver. */
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
205 static int calculate_stage_count (partial_schedule_ptr ps);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
247 static char tmp[80];
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset used ATTRIBUTE_UNUSED)
259 static struct common_sched_info_def sms_common_sched_info;
261 static struct sched_deps_info_def sms_sched_deps_info =
263 compute_jump_reg_dependencies,
264 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
265 NULL,
266 0, 0, 0
269 static struct haifa_sched_info sms_sched_info =
271 NULL,
272 NULL,
273 NULL,
274 NULL,
275 NULL,
276 sms_print_insn,
277 NULL,
278 NULL, /* insn_finishes_block_p */
279 NULL, NULL,
280 NULL, NULL,
281 0, 0,
283 NULL, NULL, NULL, NULL,
284 NULL, NULL,
288 /* Given HEAD and TAIL which are the first and last insns in a loop;
289 return the register which controls the loop. Return zero if it has
290 more than one occurrence in the loop besides the control part or the
291 do-loop pattern is not of the form we expect. */
292 static rtx
293 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
295 #ifdef HAVE_doloop_end
296 rtx reg, condition, insn, first_insn_not_to_check;
298 if (!JUMP_P (tail))
299 return NULL_RTX;
301 /* TODO: Free SMS's dependence on doloop_condition_get. */
302 condition = doloop_condition_get (tail);
303 if (! condition)
304 return NULL_RTX;
306 if (REG_P (XEXP (condition, 0)))
307 reg = XEXP (condition, 0);
308 else if (GET_CODE (XEXP (condition, 0)) == PLUS
309 && REG_P (XEXP (XEXP (condition, 0), 0)))
310 reg = XEXP (XEXP (condition, 0), 0);
311 else
312 gcc_unreachable ();
314 /* Check that the COUNT_REG has no other occurrences in the loop
315 until the decrement. We assume the control part consists of
316 either a single (parallel) branch-on-count or a (non-parallel)
317 branch immediately preceded by a single (decrement) insn. */
318 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
319 : prev_nondebug_insn (tail));
321 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
322 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
324 if (dump_file)
326 fprintf (dump_file, "SMS count_reg found ");
327 print_rtl_single (dump_file, reg);
328 fprintf (dump_file, " outside control in insn:\n");
329 print_rtl_single (dump_file, insn);
332 return NULL_RTX;
335 return reg;
336 #else
337 return NULL_RTX;
338 #endif
341 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
342 that the number of iterations is a compile-time constant. If so,
343 return the rtx that sets COUNT_REG to a constant, and set COUNT to
344 this constant. Otherwise return 0. */
345 static rtx
346 const_iteration_count (rtx count_reg, basic_block pre_header,
347 HOST_WIDEST_INT * count)
349 rtx insn;
350 rtx head, tail;
352 if (! pre_header)
353 return NULL_RTX;
355 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
357 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
358 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
359 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
361 rtx pat = single_set (insn);
363 if (CONST_INT_P (SET_SRC (pat)))
365 *count = INTVAL (SET_SRC (pat));
366 return insn;
369 return NULL_RTX;
372 return NULL_RTX;
375 /* A very simple resource-based lower bound on the initiation interval.
376 ??? Improve the accuracy of this bound by considering the
377 utilization of various units. */
378 static int
379 res_MII (ddg_ptr g)
381 if (targetm.sched.sms_res_mii)
382 return targetm.sched.sms_res_mii (g);
384 return ((g->num_nodes - g->num_debug) / issue_rate);
388 /* Points to the array that contains the sched data for each node. */
389 static node_sched_params_ptr node_sched_params;
391 /* Allocate sched_params for each node and initialize it. Assumes that
392 the aux field of each node contain the asap bound (computed earlier),
393 and copies it into the sched_params field. */
394 static void
395 set_node_sched_params (ddg_ptr g)
397 int i;
399 /* Allocate for each node in the DDG a place to hold the "sched_data". */
400 /* Initialize ASAP/ALAP/HIGHT to zero. */
401 node_sched_params = (node_sched_params_ptr)
402 xcalloc (g->num_nodes,
403 sizeof (struct node_sched_params));
405 /* Set the pointer of the general data of the node to point to the
406 appropriate sched_params structure. */
407 for (i = 0; i < g->num_nodes; i++)
409 /* Watch out for aliasing problems? */
410 node_sched_params[i].asap = g->nodes[i].aux.count;
411 g->nodes[i].aux.info = &node_sched_params[i];
415 static void
416 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
418 int i;
420 if (! file)
421 return;
422 for (i = 0; i < num_nodes; i++)
424 node_sched_params_ptr nsp = &node_sched_params[i];
425 rtx reg_move = nsp->first_reg_move;
426 int j;
428 fprintf (file, "Node = %d; INSN = %d\n", i,
429 (INSN_UID (g->nodes[i].insn)));
430 fprintf (file, " asap = %d:\n", nsp->asap);
431 fprintf (file, " time = %d:\n", nsp->time);
432 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
433 for (j = 0; j < nsp->nreg_moves; j++)
435 fprintf (file, " reg_move = ");
436 print_rtl_single (file, reg_move);
437 reg_move = PREV_INSN (reg_move);
443 Breaking intra-loop register anti-dependences:
444 Each intra-loop register anti-dependence implies a cross-iteration true
445 dependence of distance 1. Therefore, we can remove such false dependencies
446 and figure out if the partial schedule broke them by checking if (for a
447 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
448 if so generate a register move. The number of such moves is equal to:
449 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
450 nreg_moves = ----------------------------------- + 1 - { dependence.
451 ii { 1 if not.
453 static struct undo_replace_buff_elem *
454 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
456 ddg_ptr g = ps->g;
457 int ii = ps->ii;
458 int i;
459 struct undo_replace_buff_elem *reg_move_replaces = NULL;
461 for (i = 0; i < g->num_nodes; i++)
463 ddg_node_ptr u = &g->nodes[i];
464 ddg_edge_ptr e;
465 int nreg_moves = 0, i_reg_move;
466 sbitmap *uses_of_defs;
467 rtx last_reg_move;
468 rtx prev_reg, old_reg;
470 /* Compute the number of reg_moves needed for u, by looking at life
471 ranges started at u (excluding self-loops). */
472 for (e = u->out; e; e = e->next_out)
473 if (e->type == TRUE_DEP && e->dest != e->src)
475 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
477 if (e->distance == 1)
478 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
480 /* If dest precedes src in the schedule of the kernel, then dest
481 will read before src writes and we can save one reg_copy. */
482 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
483 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
484 nreg_moves4e--;
486 nreg_moves = MAX (nreg_moves, nreg_moves4e);
489 if (nreg_moves == 0)
490 continue;
492 /* Every use of the register defined by node may require a different
493 copy of this register, depending on the time the use is scheduled.
494 Set a bitmap vector, telling which nodes use each copy of this
495 register. */
496 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
497 sbitmap_vector_zero (uses_of_defs, nreg_moves);
498 for (e = u->out; e; e = e->next_out)
499 if (e->type == TRUE_DEP && e->dest != e->src)
501 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
503 if (e->distance == 1)
504 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
506 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
507 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
508 dest_copy--;
510 if (dest_copy)
511 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
514 /* Now generate the reg_moves, attaching relevant uses to them. */
515 SCHED_NREG_MOVES (u) = nreg_moves;
516 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
517 /* Insert the reg-moves right before the notes which precede
518 the insn they relates to. */
519 last_reg_move = u->first_note;
521 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
523 unsigned int i_use = 0;
524 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
525 rtx reg_move = gen_move_insn (new_reg, prev_reg);
526 sbitmap_iterator sbi;
528 add_insn_before (reg_move, last_reg_move, NULL);
529 last_reg_move = reg_move;
531 if (!SCHED_FIRST_REG_MOVE (u))
532 SCHED_FIRST_REG_MOVE (u) = reg_move;
534 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
536 struct undo_replace_buff_elem *rep;
538 rep = (struct undo_replace_buff_elem *)
539 xcalloc (1, sizeof (struct undo_replace_buff_elem));
540 rep->insn = g->nodes[i_use].insn;
541 rep->orig_reg = old_reg;
542 rep->new_reg = new_reg;
544 if (! reg_move_replaces)
545 reg_move_replaces = rep;
546 else
548 rep->next = reg_move_replaces;
549 reg_move_replaces = rep;
552 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
553 if (rescan)
554 df_insn_rescan (g->nodes[i_use].insn);
557 prev_reg = new_reg;
559 sbitmap_vector_free (uses_of_defs);
561 return reg_move_replaces;
564 /* Free memory allocated for the undo buffer. */
565 static void
566 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
569 while (reg_move_replaces)
571 struct undo_replace_buff_elem *rep = reg_move_replaces;
573 reg_move_replaces = reg_move_replaces->next;
574 free (rep);
578 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
579 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
580 will move to cycle zero. */
581 static void
582 reset_sched_times (partial_schedule_ptr ps, int amount)
584 int row;
585 int ii = ps->ii;
586 ps_insn_ptr crr_insn;
588 for (row = 0; row < ii; row++)
589 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
591 ddg_node_ptr u = crr_insn->node;
592 int normalized_time = SCHED_TIME (u) - amount;
593 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
594 int sc_until_cycle_zero, stage;
596 if (dump_file)
598 /* Print the scheduling times after the rotation. */
599 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
600 "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
601 INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
602 normalized_time);
603 if (JUMP_P (crr_insn->node->insn))
604 fprintf (dump_file, " (branch)");
605 fprintf (dump_file, "\n");
608 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
609 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
610 SCHED_TIME (u) = normalized_time;
611 SCHED_ROW (u) = SMODULO (normalized_time, ii);
613 /* The calculation of stage count is done adding the number
614 of stages before cycle zero and after cycle zero. */
615 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
617 if (SCHED_TIME (u) < 0)
619 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
620 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
622 else
624 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
625 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
630 /* Set SCHED_COLUMN of each node according to its position in PS. */
631 static void
632 set_columns_for_ps (partial_schedule_ptr ps)
634 int row;
636 for (row = 0; row < ps->ii; row++)
638 ps_insn_ptr cur_insn = ps->rows[row];
639 int column = 0;
641 for (; cur_insn; cur_insn = cur_insn->next_in_row)
642 SCHED_COLUMN (cur_insn->node) = column++;
646 /* Permute the insns according to their order in PS, from row 0 to
647 row ii-1, and position them right before LAST. This schedules
648 the insns of the loop kernel. */
649 static void
650 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
652 int ii = ps->ii;
653 int row;
654 ps_insn_ptr ps_ij;
656 for (row = 0; row < ii ; row++)
657 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
658 if (PREV_INSN (last) != ps_ij->node->insn)
659 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
660 PREV_INSN (last));
663 static void
664 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
665 int to_stage, int for_prolog, rtx count_reg)
667 int row;
668 ps_insn_ptr ps_ij;
670 for (row = 0; row < ps->ii; row++)
671 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
673 ddg_node_ptr u_node = ps_ij->node;
674 int j, i_reg_moves;
675 rtx reg_move = NULL_RTX;
677 /* Do not duplicate any insn which refers to count_reg as it
678 belongs to the control part.
679 The closing branch is scheduled as well and thus should
680 be ignored.
681 TODO: This should be done by analyzing the control part of
682 the loop. */
683 if (reg_mentioned_p (count_reg, u_node->insn)
684 || JUMP_P (ps_ij->node->insn))
685 continue;
687 if (for_prolog)
689 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
690 number of reg_moves starting with the second occurrence of
691 u_node, which is generated if its SCHED_STAGE <= to_stage. */
692 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
693 i_reg_moves = MAX (i_reg_moves, 0);
694 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
696 /* The reg_moves start from the *first* reg_move backwards. */
697 if (i_reg_moves)
699 reg_move = SCHED_FIRST_REG_MOVE (u_node);
700 for (j = 1; j < i_reg_moves; j++)
701 reg_move = PREV_INSN (reg_move);
704 else /* It's for the epilog. */
706 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
707 starting to decrease one stage after u_node no longer occurs;
708 that is, generate all reg_moves until
709 SCHED_STAGE (u_node) == from_stage - 1. */
710 i_reg_moves = SCHED_NREG_MOVES (u_node)
711 - (from_stage - SCHED_STAGE (u_node) - 1);
712 i_reg_moves = MAX (i_reg_moves, 0);
713 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
715 /* The reg_moves start from the *last* reg_move forwards. */
716 if (i_reg_moves)
718 reg_move = SCHED_FIRST_REG_MOVE (u_node);
719 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
720 reg_move = PREV_INSN (reg_move);
724 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
725 emit_insn (copy_rtx (PATTERN (reg_move)));
726 if (SCHED_STAGE (u_node) >= from_stage
727 && SCHED_STAGE (u_node) <= to_stage)
728 duplicate_insn_chain (u_node->first_note, u_node->insn);
733 /* Generate the instructions (including reg_moves) for prolog & epilog. */
734 static void
735 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
736 rtx count_reg, rtx count_init)
738 int i;
739 int last_stage = PS_STAGE_COUNT (ps) - 1;
740 edge e;
742 /* Generate the prolog, inserting its insns on the loop-entry edge. */
743 start_sequence ();
745 if (!count_init)
747 /* Generate instructions at the beginning of the prolog to
748 adjust the loop count by STAGE_COUNT. If loop count is constant
749 (count_init), this constant is adjusted by STAGE_COUNT in
750 generate_prolog_epilog function. */
751 rtx sub_reg = NULL_RTX;
753 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
754 count_reg, GEN_INT (last_stage),
755 count_reg, 1, OPTAB_DIRECT);
756 gcc_assert (REG_P (sub_reg));
757 if (REGNO (sub_reg) != REGNO (count_reg))
758 emit_move_insn (count_reg, sub_reg);
761 for (i = 0; i < last_stage; i++)
762 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
764 /* Put the prolog on the entry edge. */
765 e = loop_preheader_edge (loop);
766 split_edge_and_insert (e, get_insns ());
768 end_sequence ();
770 /* Generate the epilog, inserting its insns on the loop-exit edge. */
771 start_sequence ();
773 for (i = 0; i < last_stage; i++)
774 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
776 /* Put the epilogue on the exit edge. */
777 gcc_assert (single_exit (loop));
778 e = single_exit (loop);
779 split_edge_and_insert (e, get_insns ());
780 end_sequence ();
783 /* Return true if all the BBs of the loop are empty except the
784 loop header. */
785 static bool
786 loop_single_full_bb_p (struct loop *loop)
788 unsigned i;
789 basic_block *bbs = get_loop_body (loop);
791 for (i = 0; i < loop->num_nodes ; i++)
793 rtx head, tail;
794 bool empty_bb = true;
796 if (bbs[i] == loop->header)
797 continue;
799 /* Make sure that basic blocks other than the header
800 have only notes labels or jumps. */
801 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
802 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
804 if (NOTE_P (head) || LABEL_P (head)
805 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
806 continue;
807 empty_bb = false;
808 break;
811 if (! empty_bb)
813 free (bbs);
814 return false;
817 free (bbs);
818 return true;
821 /* A simple loop from SMS point of view; it is a loop that is composed of
822 either a single basic block or two BBs - a header and a latch. */
823 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
824 && (EDGE_COUNT (loop->latch->preds) == 1) \
825 && (EDGE_COUNT (loop->latch->succs) == 1))
827 /* Return true if the loop is in its canonical form and false if not.
828 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
829 static bool
830 loop_canon_p (struct loop *loop)
833 if (loop->inner || !loop_outer (loop))
835 if (dump_file)
836 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
837 return false;
840 if (!single_exit (loop))
842 if (dump_file)
844 rtx insn = BB_END (loop->header);
846 fprintf (dump_file, "SMS loop many exits ");
847 fprintf (dump_file, " %s %d (file, line)\n",
848 insn_file (insn), insn_line (insn));
850 return false;
853 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
855 if (dump_file)
857 rtx insn = BB_END (loop->header);
859 fprintf (dump_file, "SMS loop many BBs. ");
860 fprintf (dump_file, " %s %d (file, line)\n",
861 insn_file (insn), insn_line (insn));
863 return false;
866 return true;
869 /* If there are more than one entry for the loop,
870 make it one by splitting the first entry edge and
871 redirecting the others to the new BB. */
872 static void
873 canon_loop (struct loop *loop)
875 edge e;
876 edge_iterator i;
878 /* Avoid annoying special cases of edges going to exit
879 block. */
880 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
881 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
882 split_edge (e);
884 if (loop->latch == loop->header
885 || EDGE_COUNT (loop->latch->succs) > 1)
887 FOR_EACH_EDGE (e, i, loop->header->preds)
888 if (e->src == loop->latch)
889 break;
890 split_edge (e);
894 /* Setup infos. */
895 static void
896 setup_sched_infos (void)
898 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
899 sizeof (sms_common_sched_info));
900 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
901 common_sched_info = &sms_common_sched_info;
903 sched_deps_info = &sms_sched_deps_info;
904 current_sched_info = &sms_sched_info;
907 /* Probability in % that the sms-ed loop rolls enough so that optimized
908 version may be entered. Just a guess. */
909 #define PROB_SMS_ENOUGH_ITERATIONS 80
911 /* Used to calculate the upper bound of ii. */
912 #define MAXII_FACTOR 2
914 /* Main entry point, perform SMS scheduling on the loops of the function
915 that consist of single basic blocks. */
916 static void
917 sms_schedule (void)
919 rtx insn;
920 ddg_ptr *g_arr, g;
921 int * node_order;
922 int maxii, max_asap;
923 loop_iterator li;
924 partial_schedule_ptr ps;
925 basic_block bb = NULL;
926 struct loop *loop;
927 basic_block condition_bb = NULL;
928 edge latch_edge;
929 gcov_type trip_count = 0;
931 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
932 | LOOPS_HAVE_RECORDED_EXITS);
933 if (number_of_loops () <= 1)
935 loop_optimizer_finalize ();
936 return; /* There are no loops to schedule. */
939 /* Initialize issue_rate. */
940 if (targetm.sched.issue_rate)
942 int temp = reload_completed;
944 reload_completed = 1;
945 issue_rate = targetm.sched.issue_rate ();
946 reload_completed = temp;
948 else
949 issue_rate = 1;
951 /* Initialize the scheduler. */
952 setup_sched_infos ();
953 haifa_sched_init ();
955 /* Allocate memory to hold the DDG array one entry for each loop.
956 We use loop->num as index into this array. */
957 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
959 if (dump_file)
961 fprintf (dump_file, "\n\nSMS analysis phase\n");
962 fprintf (dump_file, "===================\n\n");
965 /* Build DDGs for all the relevant loops and hold them in G_ARR
966 indexed by the loop index. */
967 FOR_EACH_LOOP (li, loop, 0)
969 rtx head, tail;
970 rtx count_reg;
972 /* For debugging. */
973 if (dbg_cnt (sms_sched_loop) == false)
975 if (dump_file)
976 fprintf (dump_file, "SMS reached max limit... \n");
978 break;
981 if (dump_file)
983 rtx insn = BB_END (loop->header);
985 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
986 loop->num, insn_file (insn), insn_line (insn));
990 if (! loop_canon_p (loop))
991 continue;
993 if (! loop_single_full_bb_p (loop))
995 if (dump_file)
996 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
997 continue;
1000 bb = loop->header;
1002 get_ebb_head_tail (bb, bb, &head, &tail);
1003 latch_edge = loop_latch_edge (loop);
1004 gcc_assert (single_exit (loop));
1005 if (single_exit (loop)->count)
1006 trip_count = latch_edge->count / single_exit (loop)->count;
1008 /* Perform SMS only on loops that their average count is above threshold. */
1010 if ( latch_edge->count
1011 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1013 if (dump_file)
1015 fprintf (dump_file, " %s %d (file, line)\n",
1016 insn_file (tail), insn_line (tail));
1017 fprintf (dump_file, "SMS single-bb-loop\n");
1018 if (profile_info && flag_branch_probabilities)
1020 fprintf (dump_file, "SMS loop-count ");
1021 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1022 (HOST_WIDEST_INT) bb->count);
1023 fprintf (dump_file, "\n");
1024 fprintf (dump_file, "SMS trip-count ");
1025 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1026 (HOST_WIDEST_INT) trip_count);
1027 fprintf (dump_file, "\n");
1028 fprintf (dump_file, "SMS profile-sum-max ");
1029 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1030 (HOST_WIDEST_INT) profile_info->sum_max);
1031 fprintf (dump_file, "\n");
1034 continue;
1037 /* Make sure this is a doloop. */
1038 if ( !(count_reg = doloop_register_get (head, tail)))
1040 if (dump_file)
1041 fprintf (dump_file, "SMS doloop_register_get failed\n");
1042 continue;
1045 /* Don't handle BBs with calls or barriers or auto-increment insns
1046 (to avoid creating invalid reg-moves for the auto-increment insns),
1047 or !single_set with the exception of instructions that include
1048 count_reg---these instructions are part of the control part
1049 that do-loop recognizes.
1050 ??? Should handle auto-increment insns.
1051 ??? Should handle insns defining subregs. */
1052 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1054 rtx set;
1056 if (CALL_P (insn)
1057 || BARRIER_P (insn)
1058 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1059 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1060 && !reg_mentioned_p (count_reg, insn))
1061 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1062 || (INSN_P (insn) && (set = single_set (insn))
1063 && GET_CODE (SET_DEST (set)) == SUBREG))
1064 break;
1067 if (insn != NEXT_INSN (tail))
1069 if (dump_file)
1071 if (CALL_P (insn))
1072 fprintf (dump_file, "SMS loop-with-call\n");
1073 else if (BARRIER_P (insn))
1074 fprintf (dump_file, "SMS loop-with-barrier\n");
1075 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1076 fprintf (dump_file, "SMS reg inc\n");
1077 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1078 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1079 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1080 else
1081 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1082 print_rtl_single (dump_file, insn);
1085 continue;
1088 /* Always schedule the closing branch with the rest of the
1089 instructions. The branch is rotated to be in row ii-1 at the
1090 end of the scheduling procedure to make sure it's the last
1091 instruction in the iteration. */
1092 if (! (g = create_ddg (bb, 1)))
1094 if (dump_file)
1095 fprintf (dump_file, "SMS create_ddg failed\n");
1096 continue;
1099 g_arr[loop->num] = g;
1100 if (dump_file)
1101 fprintf (dump_file, "...OK\n");
1104 if (dump_file)
1106 fprintf (dump_file, "\nSMS transformation phase\n");
1107 fprintf (dump_file, "=========================\n\n");
1110 /* We don't want to perform SMS on new loops - created by versioning. */
1111 FOR_EACH_LOOP (li, loop, 0)
1113 rtx head, tail;
1114 rtx count_reg, count_init;
1115 int mii, rec_mii;
1116 unsigned stage_count = 0;
1117 HOST_WIDEST_INT loop_count = 0;
1119 if (! (g = g_arr[loop->num]))
1120 continue;
1122 if (dump_file)
1124 rtx insn = BB_END (loop->header);
1126 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1127 loop->num, insn_file (insn), insn_line (insn));
1129 print_ddg (dump_file, g);
1132 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1134 latch_edge = loop_latch_edge (loop);
1135 gcc_assert (single_exit (loop));
1136 if (single_exit (loop)->count)
1137 trip_count = latch_edge->count / single_exit (loop)->count;
1139 if (dump_file)
1141 fprintf (dump_file, " %s %d (file, line)\n",
1142 insn_file (tail), insn_line (tail));
1143 fprintf (dump_file, "SMS single-bb-loop\n");
1144 if (profile_info && flag_branch_probabilities)
1146 fprintf (dump_file, "SMS loop-count ");
1147 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1148 (HOST_WIDEST_INT) bb->count);
1149 fprintf (dump_file, "\n");
1150 fprintf (dump_file, "SMS profile-sum-max ");
1151 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1152 (HOST_WIDEST_INT) profile_info->sum_max);
1153 fprintf (dump_file, "\n");
1155 fprintf (dump_file, "SMS doloop\n");
1156 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1157 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1158 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1162 /* In case of th loop have doloop register it gets special
1163 handling. */
1164 count_init = NULL_RTX;
1165 if ((count_reg = doloop_register_get (head, tail)))
1167 basic_block pre_header;
1169 pre_header = loop_preheader_edge (loop)->src;
1170 count_init = const_iteration_count (count_reg, pre_header,
1171 &loop_count);
1173 gcc_assert (count_reg);
1175 if (dump_file && count_init)
1177 fprintf (dump_file, "SMS const-doloop ");
1178 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1179 loop_count);
1180 fprintf (dump_file, "\n");
1183 node_order = XNEWVEC (int, g->num_nodes);
1185 mii = 1; /* Need to pass some estimate of mii. */
1186 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1187 mii = MAX (res_MII (g), rec_mii);
1188 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1190 if (dump_file)
1191 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1192 rec_mii, mii, maxii);
1194 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1195 ASAP. */
1196 set_node_sched_params (g);
1198 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1200 if (ps)
1202 stage_count = calculate_stage_count (ps);
1203 gcc_assert(stage_count >= 1);
1204 PS_STAGE_COUNT(ps) = stage_count;
1207 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1208 1 means that there is no interleaving between iterations thus
1209 we let the scheduling passes do the job in this case. */
1210 if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1211 || (count_init && (loop_count <= stage_count))
1212 || (flag_branch_probabilities && (trip_count <= stage_count)))
1214 if (dump_file)
1216 fprintf (dump_file, "SMS failed... \n");
1217 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1218 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1219 fprintf (dump_file, ", trip-count=");
1220 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1221 fprintf (dump_file, ")\n");
1224 else
1226 struct undo_replace_buff_elem *reg_move_replaces;
1227 int amount = SCHED_TIME (g->closing_branch) + 1;
1229 /* Set the stage boundaries. The closing_branch was scheduled
1230 and should appear in the last (ii-1) row. */
1231 reset_sched_times (ps, amount);
1232 rotate_partial_schedule (ps, amount);
1233 set_columns_for_ps (ps);
1235 canon_loop (loop);
1237 if (dump_file)
1239 fprintf (dump_file,
1240 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1241 stage_count);
1242 print_partial_schedule (ps, dump_file);
1245 /* case the BCT count is not known , Do loop-versioning */
1246 if (count_reg && ! count_init)
1248 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1249 GEN_INT(stage_count));
1250 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1251 * REG_BR_PROB_BASE) / 100;
1253 loop_version (loop, comp_rtx, &condition_bb,
1254 prob, prob, REG_BR_PROB_BASE - prob,
1255 true);
1258 /* Set new iteration count of loop kernel. */
1259 if (count_reg && count_init)
1260 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1261 - stage_count + 1);
1263 /* Now apply the scheduled kernel to the RTL of the loop. */
1264 permute_partial_schedule (ps, g->closing_branch->first_note);
1266 /* Mark this loop as software pipelined so the later
1267 scheduling passes doesn't touch it. */
1268 if (! flag_resched_modulo_sched)
1269 g->bb->flags |= BB_DISABLE_SCHEDULE;
1270 /* The life-info is not valid any more. */
1271 df_set_bb_dirty (g->bb);
1273 reg_move_replaces = generate_reg_moves (ps, true);
1274 if (dump_file)
1275 print_node_sched_params (dump_file, g->num_nodes, g);
1276 /* Generate prolog and epilog. */
1277 generate_prolog_epilog (ps, loop, count_reg, count_init);
1279 free_undo_replace_buff (reg_move_replaces);
1282 free_partial_schedule (ps);
1283 free (node_sched_params);
1284 free (node_order);
1285 free_ddg (g);
1288 free (g_arr);
1290 /* Release scheduler data, needed until now because of DFA. */
1291 haifa_sched_finish ();
1292 loop_optimizer_finalize ();
1295 /* The SMS scheduling algorithm itself
1296 -----------------------------------
1297 Input: 'O' an ordered list of insns of a loop.
1298 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1300 'Q' is the empty Set
1301 'PS' is the partial schedule; it holds the currently scheduled nodes with
1302 their cycle/slot.
1303 'PSP' previously scheduled predecessors.
1304 'PSS' previously scheduled successors.
1305 't(u)' the cycle where u is scheduled.
1306 'l(u)' is the latency of u.
1307 'd(v,u)' is the dependence distance from v to u.
1308 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1309 the node ordering phase.
1310 'check_hardware_resources_conflicts(u, PS, c)'
1311 run a trace around cycle/slot through DFA model
1312 to check resource conflicts involving instruction u
1313 at cycle c given the partial schedule PS.
1314 'add_to_partial_schedule_at_time(u, PS, c)'
1315 Add the node/instruction u to the partial schedule
1316 PS at time c.
1317 'calculate_register_pressure(PS)'
1318 Given a schedule of instructions, calculate the register
1319 pressure it implies. One implementation could be the
1320 maximum number of overlapping live ranges.
1321 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1322 registers available in the hardware.
1324 1. II = MII.
1325 2. PS = empty list
1326 3. for each node u in O in pre-computed order
1327 4. if (PSP(u) != Q && PSS(u) == Q) then
1328 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1329 6. start = Early_start; end = Early_start + II - 1; step = 1
1330 11. else if (PSP(u) == Q && PSS(u) != Q) then
1331 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1332 13. start = Late_start; end = Late_start - II + 1; step = -1
1333 14. else if (PSP(u) != Q && PSS(u) != Q) then
1334 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1335 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1336 17. start = Early_start;
1337 18. end = min(Early_start + II - 1 , Late_start);
1338 19. step = 1
1339 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1340 21. start = ASAP(u); end = start + II - 1; step = 1
1341 22. endif
1343 23. success = false
1344 24. for (c = start ; c != end ; c += step)
1345 25. if check_hardware_resources_conflicts(u, PS, c) then
1346 26. add_to_partial_schedule_at_time(u, PS, c)
1347 27. success = true
1348 28. break
1349 29. endif
1350 30. endfor
1351 31. if (success == false) then
1352 32. II = II + 1
1353 33. if (II > maxII) then
1354 34. finish - failed to schedule
1355 35. endif
1356 36. goto 2.
1357 37. endif
1358 38. endfor
1359 39. if (calculate_register_pressure(PS) > maxRP) then
1360 40. goto 32.
1361 41. endif
1362 42. compute epilogue & prologue
1363 43. finish - succeeded to schedule
1366 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1367 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1368 set to 0 to save compile time. */
1369 #define DFA_HISTORY SMS_DFA_HISTORY
1371 /* A threshold for the number of repeated unsuccessful attempts to insert
1372 an empty row, before we flush the partial schedule and start over. */
1373 #define MAX_SPLIT_NUM 10
1374 /* Given the partial schedule PS, this function calculates and returns the
1375 cycles in which we can schedule the node with the given index I.
1376 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1377 noticed that there are several cases in which we fail to SMS the loop
1378 because the sched window of a node is empty due to tight data-deps. In
1379 such cases we want to unschedule some of the predecessors/successors
1380 until we get non-empty scheduling window. It returns -1 if the
1381 scheduling window is empty and zero otherwise. */
1383 static int
1384 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1385 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1387 int start, step, end;
1388 ddg_edge_ptr e;
1389 int u = nodes_order [i];
1390 ddg_node_ptr u_node = &ps->g->nodes[u];
1391 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1392 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1393 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1394 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1395 int psp_not_empty;
1396 int pss_not_empty;
1398 /* 1. compute sched window for u (start, end, step). */
1399 sbitmap_zero (psp);
1400 sbitmap_zero (pss);
1401 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1402 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1404 if (psp_not_empty && !pss_not_empty)
1406 int early_start = INT_MIN;
1408 end = INT_MAX;
1409 for (e = u_node->in; e != 0; e = e->next_in)
1411 ddg_node_ptr v_node = e->src;
1413 if (dump_file)
1415 fprintf (dump_file, "\nProcessing edge: ");
1416 print_ddg_edge (dump_file, e);
1417 fprintf (dump_file,
1418 "\nScheduling %d (%d) in psp_not_empty,"
1419 " checking p %d (%d): ", u_node->cuid,
1420 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1421 (v_node->insn));
1424 if (TEST_BIT (sched_nodes, v_node->cuid))
1426 int p_st = SCHED_TIME (v_node);
1428 early_start =
1429 MAX (early_start, p_st + e->latency - (e->distance * ii));
1431 if (dump_file)
1432 fprintf (dump_file,
1433 "pred st = %d; early_start = %d; latency: %d",
1434 p_st, early_start, e->latency);
1436 if (e->data_type == MEM_DEP)
1437 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1439 else if (dump_file)
1440 fprintf (dump_file, "the node is not scheduled\n");
1442 start = early_start;
1443 end = MIN (end, early_start + ii);
1444 /* Schedule the node close to it's predecessors. */
1445 step = 1;
1447 if (dump_file)
1448 fprintf (dump_file,
1449 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1450 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1453 else if (!psp_not_empty && pss_not_empty)
1455 int late_start = INT_MAX;
1457 end = INT_MIN;
1458 for (e = u_node->out; e != 0; e = e->next_out)
1460 ddg_node_ptr v_node = e->dest;
1462 if (dump_file)
1464 fprintf (dump_file, "\nProcessing edge:");
1465 print_ddg_edge (dump_file, e);
1466 fprintf (dump_file,
1467 "\nScheduling %d (%d) in pss_not_empty,"
1468 " checking s %d (%d): ", u_node->cuid,
1469 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1470 (v_node->insn));
1473 if (TEST_BIT (sched_nodes, v_node->cuid))
1475 int s_st = SCHED_TIME (v_node);
1477 late_start = MIN (late_start,
1478 s_st - e->latency + (e->distance * ii));
1480 if (dump_file)
1481 fprintf (dump_file,
1482 "succ st = %d; late_start = %d; latency = %d",
1483 s_st, late_start, e->latency);
1485 if (e->data_type == MEM_DEP)
1486 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1487 if (dump_file)
1488 fprintf (dump_file, "end = %d\n", end);
1491 else if (dump_file)
1492 fprintf (dump_file, "the node is not scheduled\n");
1495 start = late_start;
1496 end = MAX (end, late_start - ii);
1497 /* Schedule the node close to it's successors. */
1498 step = -1;
1500 if (dump_file)
1501 fprintf (dump_file,
1502 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1503 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1507 else if (psp_not_empty && pss_not_empty)
1509 int early_start = INT_MIN;
1510 int late_start = INT_MAX;
1511 int count_preds = 0;
1512 int count_succs = 0;
1514 start = INT_MIN;
1515 end = INT_MAX;
1516 for (e = u_node->in; e != 0; e = e->next_in)
1518 ddg_node_ptr v_node = e->src;
1520 if (dump_file)
1522 fprintf (dump_file, "\nProcessing edge:");
1523 print_ddg_edge (dump_file, e);
1524 fprintf (dump_file,
1525 "\nScheduling %d (%d) in psp_pss_not_empty,"
1526 " checking p %d (%d): ", u_node->cuid, INSN_UID
1527 (u_node->insn), v_node->cuid, INSN_UID
1528 (v_node->insn));
1531 if (TEST_BIT (sched_nodes, v_node->cuid))
1533 int p_st = SCHED_TIME (v_node);
1535 early_start = MAX (early_start,
1536 p_st + e->latency
1537 - (e->distance * ii));
1539 if (dump_file)
1540 fprintf (dump_file,
1541 "pred st = %d; early_start = %d; latency = %d",
1542 p_st, early_start, e->latency);
1544 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1545 count_preds++;
1547 if (e->data_type == MEM_DEP)
1548 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1550 else if (dump_file)
1551 fprintf (dump_file, "the node is not scheduled\n");
1554 for (e = u_node->out; e != 0; e = e->next_out)
1556 ddg_node_ptr v_node = e->dest;
1558 if (dump_file)
1560 fprintf (dump_file, "\nProcessing edge:");
1561 print_ddg_edge (dump_file, e);
1562 fprintf (dump_file,
1563 "\nScheduling %d (%d) in psp_pss_not_empty,"
1564 " checking s %d (%d): ", u_node->cuid, INSN_UID
1565 (u_node->insn), v_node->cuid, INSN_UID
1566 (v_node->insn));
1569 if (TEST_BIT (sched_nodes, v_node->cuid))
1571 int s_st = SCHED_TIME (v_node);
1573 late_start = MIN (late_start,
1574 s_st - e->latency
1575 + (e->distance * ii));
1577 if (dump_file)
1578 fprintf (dump_file,
1579 "succ st = %d; late_start = %d; latency = %d",
1580 s_st, late_start, e->latency);
1582 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1583 count_succs++;
1585 if (e->data_type == MEM_DEP)
1586 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1588 else if (dump_file)
1589 fprintf (dump_file, "the node is not scheduled\n");
1592 start = MAX (start, early_start);
1593 end = MIN (end, MIN (early_start + ii, late_start + 1));
1594 step = 1;
1595 /* If there are more successors than predecessors schedule the
1596 node close to it's successors. */
1597 if (count_succs >= count_preds)
1599 int old_start = start;
1601 start = end - 1;
1602 end = old_start - 1;
1603 step = -1;
1606 else /* psp is empty && pss is empty. */
1608 start = SCHED_ASAP (u_node);
1609 end = start + ii;
1610 step = 1;
1613 *start_p = start;
1614 *step_p = step;
1615 *end_p = end;
1616 sbitmap_free (psp);
1617 sbitmap_free (pss);
1619 if ((start >= end && step == 1) || (start <= end && step == -1))
1621 if (dump_file)
1622 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1623 start, end, step);
1624 return -1;
1627 return 0;
1630 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1631 node currently been scheduled. At the end of the calculation
1632 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1633 U_NODE which are (1) already scheduled in the first/last row of
1634 U_NODE's scheduling window, (2) whose dependence inequality with U
1635 becomes an equality when U is scheduled in this same row, and (3)
1636 whose dependence latency is zero.
1638 The first and last rows are calculated using the following parameters:
1639 START/END rows - The cycles that begins/ends the traversal on the window;
1640 searching for an empty cycle to schedule U_NODE.
1641 STEP - The direction in which we traverse the window.
1642 II - The initiation interval. */
1644 static void
1645 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1646 int step, int ii, sbitmap sched_nodes,
1647 sbitmap must_precede, sbitmap must_follow)
1649 ddg_edge_ptr e;
1650 int first_cycle_in_window, last_cycle_in_window;
1652 gcc_assert (must_precede && must_follow);
1654 /* Consider the following scheduling window:
1655 {first_cycle_in_window, first_cycle_in_window+1, ...,
1656 last_cycle_in_window}. If step is 1 then the following will be
1657 the order we traverse the window: {start=first_cycle_in_window,
1658 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1659 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1660 end=first_cycle_in_window-1} if step is -1. */
1661 first_cycle_in_window = (step == 1) ? start : end - step;
1662 last_cycle_in_window = (step == 1) ? end - step : start;
1664 sbitmap_zero (must_precede);
1665 sbitmap_zero (must_follow);
1667 if (dump_file)
1668 fprintf (dump_file, "\nmust_precede: ");
1670 /* Instead of checking if:
1671 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1672 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1673 first_cycle_in_window)
1674 && e->latency == 0
1675 we use the fact that latency is non-negative:
1676 SCHED_TIME (e->src) - (e->distance * ii) <=
1677 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1678 first_cycle_in_window
1679 and check only if
1680 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1681 for (e = u_node->in; e != 0; e = e->next_in)
1682 if (TEST_BIT (sched_nodes, e->src->cuid)
1683 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1684 first_cycle_in_window))
1686 if (dump_file)
1687 fprintf (dump_file, "%d ", e->src->cuid);
1689 SET_BIT (must_precede, e->src->cuid);
1692 if (dump_file)
1693 fprintf (dump_file, "\nmust_follow: ");
1695 /* Instead of checking if:
1696 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1697 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1698 last_cycle_in_window)
1699 && e->latency == 0
1700 we use the fact that latency is non-negative:
1701 SCHED_TIME (e->dest) + (e->distance * ii) >=
1702 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1703 last_cycle_in_window
1704 and check only if
1705 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1706 for (e = u_node->out; e != 0; e = e->next_out)
1707 if (TEST_BIT (sched_nodes, e->dest->cuid)
1708 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1709 last_cycle_in_window))
1711 if (dump_file)
1712 fprintf (dump_file, "%d ", e->dest->cuid);
1714 SET_BIT (must_follow, e->dest->cuid);
1717 if (dump_file)
1718 fprintf (dump_file, "\n");
1721 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1722 parameters to decide if that's possible:
1723 PS - The partial schedule.
1724 U - The serial number of U_NODE.
1725 NUM_SPLITS - The number of row splits made so far.
1726 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1727 the first row of the scheduling window)
1728 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1729 last row of the scheduling window) */
1731 static bool
1732 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1733 int u, int cycle, sbitmap sched_nodes,
1734 int *num_splits, sbitmap must_precede,
1735 sbitmap must_follow)
1737 ps_insn_ptr psi;
1738 bool success = 0;
1740 verify_partial_schedule (ps, sched_nodes);
1741 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1742 must_precede, must_follow);
1743 if (psi)
1745 SCHED_TIME (u_node) = cycle;
1746 SET_BIT (sched_nodes, u);
1747 success = 1;
1748 *num_splits = 0;
1749 if (dump_file)
1750 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1754 return success;
1757 /* This function implements the scheduling algorithm for SMS according to the
1758 above algorithm. */
1759 static partial_schedule_ptr
1760 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1762 int ii = mii;
1763 int i, c, success, num_splits = 0;
1764 int flush_and_start_over = true;
1765 int num_nodes = g->num_nodes;
1766 int start, end, step; /* Place together into one struct? */
1767 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1768 sbitmap must_precede = sbitmap_alloc (num_nodes);
1769 sbitmap must_follow = sbitmap_alloc (num_nodes);
1770 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1772 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1774 sbitmap_ones (tobe_scheduled);
1775 sbitmap_zero (sched_nodes);
1777 while (flush_and_start_over && (ii < maxii))
1780 if (dump_file)
1781 fprintf (dump_file, "Starting with ii=%d\n", ii);
1782 flush_and_start_over = false;
1783 sbitmap_zero (sched_nodes);
1785 for (i = 0; i < num_nodes; i++)
1787 int u = nodes_order[i];
1788 ddg_node_ptr u_node = &ps->g->nodes[u];
1789 rtx insn = u_node->insn;
1791 if (!NONDEBUG_INSN_P (insn))
1793 RESET_BIT (tobe_scheduled, u);
1794 continue;
1797 if (TEST_BIT (sched_nodes, u))
1798 continue;
1800 /* Try to get non-empty scheduling window. */
1801 success = 0;
1802 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1803 &step, &end) == 0)
1805 if (dump_file)
1806 fprintf (dump_file, "\nTrying to schedule node %d \
1807 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1808 (g->nodes[u].insn)), start, end, step);
1810 gcc_assert ((step > 0 && start < end)
1811 || (step < 0 && start > end));
1813 calculate_must_precede_follow (u_node, start, end, step, ii,
1814 sched_nodes, must_precede,
1815 must_follow);
1817 for (c = start; c != end; c += step)
1819 sbitmap tmp_precede = NULL;
1820 sbitmap tmp_follow = NULL;
1822 if (c == start)
1824 if (step == 1)
1825 tmp_precede = must_precede;
1826 else /* step == -1. */
1827 tmp_follow = must_follow;
1829 if (c == end - step)
1831 if (step == 1)
1832 tmp_follow = must_follow;
1833 else /* step == -1. */
1834 tmp_precede = must_precede;
1837 success =
1838 try_scheduling_node_in_cycle (ps, u_node, u, c,
1839 sched_nodes,
1840 &num_splits, tmp_precede,
1841 tmp_follow);
1842 if (success)
1843 break;
1846 verify_partial_schedule (ps, sched_nodes);
1848 if (!success)
1850 int split_row;
1852 if (ii++ == maxii)
1853 break;
1855 if (num_splits >= MAX_SPLIT_NUM)
1857 num_splits = 0;
1858 flush_and_start_over = true;
1859 verify_partial_schedule (ps, sched_nodes);
1860 reset_partial_schedule (ps, ii);
1861 verify_partial_schedule (ps, sched_nodes);
1862 break;
1865 num_splits++;
1866 /* The scheduling window is exclusive of 'end'
1867 whereas compute_split_window() expects an inclusive,
1868 ordered range. */
1869 if (step == 1)
1870 split_row = compute_split_row (sched_nodes, start, end - 1,
1871 ps->ii, u_node);
1872 else
1873 split_row = compute_split_row (sched_nodes, end + 1, start,
1874 ps->ii, u_node);
1876 ps_insert_empty_row (ps, split_row, sched_nodes);
1877 i--; /* Go back and retry node i. */
1879 if (dump_file)
1880 fprintf (dump_file, "num_splits=%d\n", num_splits);
1883 /* ??? If (success), check register pressure estimates. */
1884 } /* Continue with next node. */
1885 } /* While flush_and_start_over. */
1886 if (ii >= maxii)
1888 free_partial_schedule (ps);
1889 ps = NULL;
1891 else
1892 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1894 sbitmap_free (sched_nodes);
1895 sbitmap_free (must_precede);
1896 sbitmap_free (must_follow);
1897 sbitmap_free (tobe_scheduled);
1899 return ps;
1902 /* This function inserts a new empty row into PS at the position
1903 according to SPLITROW, keeping all already scheduled instructions
1904 intact and updating their SCHED_TIME and cycle accordingly. */
1905 static void
1906 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1907 sbitmap sched_nodes)
1909 ps_insn_ptr crr_insn;
1910 ps_insn_ptr *rows_new;
1911 int ii = ps->ii;
1912 int new_ii = ii + 1;
1913 int row;
1914 int *rows_length_new;
1916 verify_partial_schedule (ps, sched_nodes);
1918 /* We normalize sched_time and rotate ps to have only non-negative sched
1919 times, for simplicity of updating cycles after inserting new row. */
1920 split_row -= ps->min_cycle;
1921 split_row = SMODULO (split_row, ii);
1922 if (dump_file)
1923 fprintf (dump_file, "split_row=%d\n", split_row);
1925 reset_sched_times (ps, PS_MIN_CYCLE (ps));
1926 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1928 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1929 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
1930 for (row = 0; row < split_row; row++)
1932 rows_new[row] = ps->rows[row];
1933 rows_length_new[row] = ps->rows_length[row];
1934 ps->rows[row] = NULL;
1935 for (crr_insn = rows_new[row];
1936 crr_insn; crr_insn = crr_insn->next_in_row)
1938 ddg_node_ptr u = crr_insn->node;
1939 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1941 SCHED_TIME (u) = new_time;
1942 crr_insn->cycle = new_time;
1943 SCHED_ROW (u) = new_time % new_ii;
1944 SCHED_STAGE (u) = new_time / new_ii;
1949 rows_new[split_row] = NULL;
1951 for (row = split_row; row < ii; row++)
1953 rows_new[row + 1] = ps->rows[row];
1954 rows_length_new[row + 1] = ps->rows_length[row];
1955 ps->rows[row] = NULL;
1956 for (crr_insn = rows_new[row + 1];
1957 crr_insn; crr_insn = crr_insn->next_in_row)
1959 ddg_node_ptr u = crr_insn->node;
1960 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1962 SCHED_TIME (u) = new_time;
1963 crr_insn->cycle = new_time;
1964 SCHED_ROW (u) = new_time % new_ii;
1965 SCHED_STAGE (u) = new_time / new_ii;
1969 /* Updating ps. */
1970 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1971 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1972 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1973 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1974 free (ps->rows);
1975 ps->rows = rows_new;
1976 free (ps->rows_length);
1977 ps->rows_length = rows_length_new;
1978 ps->ii = new_ii;
1979 gcc_assert (ps->min_cycle >= 0);
1981 verify_partial_schedule (ps, sched_nodes);
1983 if (dump_file)
1984 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1985 ps->max_cycle);
1988 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1989 UP which are the boundaries of it's scheduling window; compute using
1990 SCHED_NODES and II a row in the partial schedule that can be split
1991 which will separate a critical predecessor from a critical successor
1992 thereby expanding the window, and return it. */
1993 static int
1994 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1995 ddg_node_ptr u_node)
1997 ddg_edge_ptr e;
1998 int lower = INT_MIN, upper = INT_MAX;
1999 ddg_node_ptr crit_pred = NULL;
2000 ddg_node_ptr crit_succ = NULL;
2001 int crit_cycle;
2003 for (e = u_node->in; e != 0; e = e->next_in)
2005 ddg_node_ptr v_node = e->src;
2007 if (TEST_BIT (sched_nodes, v_node->cuid)
2008 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
2009 if (SCHED_TIME (v_node) > lower)
2011 crit_pred = v_node;
2012 lower = SCHED_TIME (v_node);
2016 if (crit_pred != NULL)
2018 crit_cycle = SCHED_TIME (crit_pred) + 1;
2019 return SMODULO (crit_cycle, ii);
2022 for (e = u_node->out; e != 0; e = e->next_out)
2024 ddg_node_ptr v_node = e->dest;
2025 if (TEST_BIT (sched_nodes, v_node->cuid)
2026 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
2027 if (SCHED_TIME (v_node) < upper)
2029 crit_succ = v_node;
2030 upper = SCHED_TIME (v_node);
2034 if (crit_succ != NULL)
2036 crit_cycle = SCHED_TIME (crit_succ);
2037 return SMODULO (crit_cycle, ii);
2040 if (dump_file)
2041 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2043 return SMODULO ((low + up + 1) / 2, ii);
2046 static void
2047 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2049 int row;
2050 ps_insn_ptr crr_insn;
2052 for (row = 0; row < ps->ii; row++)
2054 int length = 0;
2056 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2058 ddg_node_ptr u = crr_insn->node;
2060 length++;
2061 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2062 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2063 popcount (sched_nodes) == number of insns in ps. */
2064 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2065 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2068 gcc_assert (ps->rows_length[row] == length);
2073 /* This page implements the algorithm for ordering the nodes of a DDG
2074 for modulo scheduling, activated through the
2075 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2077 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2078 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2079 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2080 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2081 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2082 #define DEPTH(x) (ASAP ((x)))
2084 typedef struct node_order_params * nopa;
2086 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2087 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2088 static nopa calculate_order_params (ddg_ptr, int, int *);
2089 static int find_max_asap (ddg_ptr, sbitmap);
2090 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2091 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2093 enum sms_direction {BOTTOMUP, TOPDOWN};
2095 struct node_order_params
2097 int asap;
2098 int alap;
2099 int height;
2102 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2103 static void
2104 check_nodes_order (int *node_order, int num_nodes)
2106 int i;
2107 sbitmap tmp = sbitmap_alloc (num_nodes);
2109 sbitmap_zero (tmp);
2111 if (dump_file)
2112 fprintf (dump_file, "SMS final nodes order: \n");
2114 for (i = 0; i < num_nodes; i++)
2116 int u = node_order[i];
2118 if (dump_file)
2119 fprintf (dump_file, "%d ", u);
2120 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2122 SET_BIT (tmp, u);
2125 if (dump_file)
2126 fprintf (dump_file, "\n");
2128 sbitmap_free (tmp);
2131 /* Order the nodes of G for scheduling and pass the result in
2132 NODE_ORDER. Also set aux.count of each node to ASAP.
2133 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2134 static int
2135 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2137 int i;
2138 int rec_mii = 0;
2139 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2141 nopa nops = calculate_order_params (g, mii, pmax_asap);
2143 if (dump_file)
2144 print_sccs (dump_file, sccs, g);
2146 order_nodes_of_sccs (sccs, node_order);
2148 if (sccs->num_sccs > 0)
2149 /* First SCC has the largest recurrence_length. */
2150 rec_mii = sccs->sccs[0]->recurrence_length;
2152 /* Save ASAP before destroying node_order_params. */
2153 for (i = 0; i < g->num_nodes; i++)
2155 ddg_node_ptr v = &g->nodes[i];
2156 v->aux.count = ASAP (v);
2159 free (nops);
2160 free_ddg_all_sccs (sccs);
2161 check_nodes_order (node_order, g->num_nodes);
2163 return rec_mii;
2166 static void
2167 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2169 int i, pos = 0;
2170 ddg_ptr g = all_sccs->ddg;
2171 int num_nodes = g->num_nodes;
2172 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2173 sbitmap on_path = sbitmap_alloc (num_nodes);
2174 sbitmap tmp = sbitmap_alloc (num_nodes);
2175 sbitmap ones = sbitmap_alloc (num_nodes);
2177 sbitmap_zero (prev_sccs);
2178 sbitmap_ones (ones);
2180 /* Perform the node ordering starting from the SCC with the highest recMII.
2181 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2182 for (i = 0; i < all_sccs->num_sccs; i++)
2184 ddg_scc_ptr scc = all_sccs->sccs[i];
2186 /* Add nodes on paths from previous SCCs to the current SCC. */
2187 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2188 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2190 /* Add nodes on paths from the current SCC to previous SCCs. */
2191 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2192 sbitmap_a_or_b (tmp, tmp, on_path);
2194 /* Remove nodes of previous SCCs from current extended SCC. */
2195 sbitmap_difference (tmp, tmp, prev_sccs);
2197 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2198 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2201 /* Handle the remaining nodes that do not belong to any scc. Each call
2202 to order_nodes_in_scc handles a single connected component. */
2203 while (pos < g->num_nodes)
2205 sbitmap_difference (tmp, ones, prev_sccs);
2206 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2208 sbitmap_free (prev_sccs);
2209 sbitmap_free (on_path);
2210 sbitmap_free (tmp);
2211 sbitmap_free (ones);
2214 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2215 static struct node_order_params *
2216 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2218 int u;
2219 int max_asap;
2220 int num_nodes = g->num_nodes;
2221 ddg_edge_ptr e;
2222 /* Allocate a place to hold ordering params for each node in the DDG. */
2223 nopa node_order_params_arr;
2225 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2226 node_order_params_arr = (nopa) xcalloc (num_nodes,
2227 sizeof (struct node_order_params));
2229 /* Set the aux pointer of each node to point to its order_params structure. */
2230 for (u = 0; u < num_nodes; u++)
2231 g->nodes[u].aux.info = &node_order_params_arr[u];
2233 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2234 calculate ASAP, ALAP, mobility, distance, and height for each node
2235 in the dependence (direct acyclic) graph. */
2237 /* We assume that the nodes in the array are in topological order. */
2239 max_asap = 0;
2240 for (u = 0; u < num_nodes; u++)
2242 ddg_node_ptr u_node = &g->nodes[u];
2244 ASAP (u_node) = 0;
2245 for (e = u_node->in; e; e = e->next_in)
2246 if (e->distance == 0)
2247 ASAP (u_node) = MAX (ASAP (u_node),
2248 ASAP (e->src) + e->latency);
2249 max_asap = MAX (max_asap, ASAP (u_node));
2252 for (u = num_nodes - 1; u > -1; u--)
2254 ddg_node_ptr u_node = &g->nodes[u];
2256 ALAP (u_node) = max_asap;
2257 HEIGHT (u_node) = 0;
2258 for (e = u_node->out; e; e = e->next_out)
2259 if (e->distance == 0)
2261 ALAP (u_node) = MIN (ALAP (u_node),
2262 ALAP (e->dest) - e->latency);
2263 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2264 HEIGHT (e->dest) + e->latency);
2267 if (dump_file)
2269 fprintf (dump_file, "\nOrder params\n");
2270 for (u = 0; u < num_nodes; u++)
2272 ddg_node_ptr u_node = &g->nodes[u];
2274 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2275 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2279 *pmax_asap = max_asap;
2280 return node_order_params_arr;
2283 static int
2284 find_max_asap (ddg_ptr g, sbitmap nodes)
2286 unsigned int u = 0;
2287 int max_asap = -1;
2288 int result = -1;
2289 sbitmap_iterator sbi;
2291 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2293 ddg_node_ptr u_node = &g->nodes[u];
2295 if (max_asap < ASAP (u_node))
2297 max_asap = ASAP (u_node);
2298 result = u;
2301 return result;
2304 static int
2305 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2307 unsigned int u = 0;
2308 int max_hv = -1;
2309 int min_mob = INT_MAX;
2310 int result = -1;
2311 sbitmap_iterator sbi;
2313 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2315 ddg_node_ptr u_node = &g->nodes[u];
2317 if (max_hv < HEIGHT (u_node))
2319 max_hv = HEIGHT (u_node);
2320 min_mob = MOB (u_node);
2321 result = u;
2323 else if ((max_hv == HEIGHT (u_node))
2324 && (min_mob > MOB (u_node)))
2326 min_mob = MOB (u_node);
2327 result = u;
2330 return result;
2333 static int
2334 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2336 unsigned int u = 0;
2337 int max_dv = -1;
2338 int min_mob = INT_MAX;
2339 int result = -1;
2340 sbitmap_iterator sbi;
2342 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2344 ddg_node_ptr u_node = &g->nodes[u];
2346 if (max_dv < DEPTH (u_node))
2348 max_dv = DEPTH (u_node);
2349 min_mob = MOB (u_node);
2350 result = u;
2352 else if ((max_dv == DEPTH (u_node))
2353 && (min_mob > MOB (u_node)))
2355 min_mob = MOB (u_node);
2356 result = u;
2359 return result;
2362 /* Places the nodes of SCC into the NODE_ORDER array starting
2363 at position POS, according to the SMS ordering algorithm.
2364 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2365 the NODE_ORDER array, starting from position zero. */
2366 static int
2367 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2368 int * node_order, int pos)
2370 enum sms_direction dir;
2371 int num_nodes = g->num_nodes;
2372 sbitmap workset = sbitmap_alloc (num_nodes);
2373 sbitmap tmp = sbitmap_alloc (num_nodes);
2374 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2375 sbitmap predecessors = sbitmap_alloc (num_nodes);
2376 sbitmap successors = sbitmap_alloc (num_nodes);
2378 sbitmap_zero (predecessors);
2379 find_predecessors (predecessors, g, nodes_ordered);
2381 sbitmap_zero (successors);
2382 find_successors (successors, g, nodes_ordered);
2384 sbitmap_zero (tmp);
2385 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2387 sbitmap_copy (workset, tmp);
2388 dir = BOTTOMUP;
2390 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2392 sbitmap_copy (workset, tmp);
2393 dir = TOPDOWN;
2395 else
2397 int u;
2399 sbitmap_zero (workset);
2400 if ((u = find_max_asap (g, scc)) >= 0)
2401 SET_BIT (workset, u);
2402 dir = BOTTOMUP;
2405 sbitmap_zero (zero_bitmap);
2406 while (!sbitmap_equal (workset, zero_bitmap))
2408 int v;
2409 ddg_node_ptr v_node;
2410 sbitmap v_node_preds;
2411 sbitmap v_node_succs;
2413 if (dir == TOPDOWN)
2415 while (!sbitmap_equal (workset, zero_bitmap))
2417 v = find_max_hv_min_mob (g, workset);
2418 v_node = &g->nodes[v];
2419 node_order[pos++] = v;
2420 v_node_succs = NODE_SUCCESSORS (v_node);
2421 sbitmap_a_and_b (tmp, v_node_succs, scc);
2423 /* Don't consider the already ordered successors again. */
2424 sbitmap_difference (tmp, tmp, nodes_ordered);
2425 sbitmap_a_or_b (workset, workset, tmp);
2426 RESET_BIT (workset, v);
2427 SET_BIT (nodes_ordered, v);
2429 dir = BOTTOMUP;
2430 sbitmap_zero (predecessors);
2431 find_predecessors (predecessors, g, nodes_ordered);
2432 sbitmap_a_and_b (workset, predecessors, scc);
2434 else
2436 while (!sbitmap_equal (workset, zero_bitmap))
2438 v = find_max_dv_min_mob (g, workset);
2439 v_node = &g->nodes[v];
2440 node_order[pos++] = v;
2441 v_node_preds = NODE_PREDECESSORS (v_node);
2442 sbitmap_a_and_b (tmp, v_node_preds, scc);
2444 /* Don't consider the already ordered predecessors again. */
2445 sbitmap_difference (tmp, tmp, nodes_ordered);
2446 sbitmap_a_or_b (workset, workset, tmp);
2447 RESET_BIT (workset, v);
2448 SET_BIT (nodes_ordered, v);
2450 dir = TOPDOWN;
2451 sbitmap_zero (successors);
2452 find_successors (successors, g, nodes_ordered);
2453 sbitmap_a_and_b (workset, successors, scc);
2456 sbitmap_free (tmp);
2457 sbitmap_free (workset);
2458 sbitmap_free (zero_bitmap);
2459 sbitmap_free (predecessors);
2460 sbitmap_free (successors);
2461 return pos;
2465 /* This page contains functions for manipulating partial-schedules during
2466 modulo scheduling. */
2468 /* Create a partial schedule and allocate a memory to hold II rows. */
2470 static partial_schedule_ptr
2471 create_partial_schedule (int ii, ddg_ptr g, int history)
2473 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2474 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2475 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2476 ps->ii = ii;
2477 ps->history = history;
2478 ps->min_cycle = INT_MAX;
2479 ps->max_cycle = INT_MIN;
2480 ps->g = g;
2482 return ps;
2485 /* Free the PS_INSNs in rows array of the given partial schedule.
2486 ??? Consider caching the PS_INSN's. */
2487 static void
2488 free_ps_insns (partial_schedule_ptr ps)
2490 int i;
2492 for (i = 0; i < ps->ii; i++)
2494 while (ps->rows[i])
2496 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2498 free (ps->rows[i]);
2499 ps->rows[i] = ps_insn;
2501 ps->rows[i] = NULL;
2505 /* Free all the memory allocated to the partial schedule. */
2507 static void
2508 free_partial_schedule (partial_schedule_ptr ps)
2510 if (!ps)
2511 return;
2512 free_ps_insns (ps);
2513 free (ps->rows);
2514 free (ps->rows_length);
2515 free (ps);
2518 /* Clear the rows array with its PS_INSNs, and create a new one with
2519 NEW_II rows. */
2521 static void
2522 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2524 if (!ps)
2525 return;
2526 free_ps_insns (ps);
2527 if (new_ii == ps->ii)
2528 return;
2529 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2530 * sizeof (ps_insn_ptr));
2531 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2532 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2533 memset (ps->rows_length, 0, new_ii * sizeof (int));
2534 ps->ii = new_ii;
2535 ps->min_cycle = INT_MAX;
2536 ps->max_cycle = INT_MIN;
2539 /* Prints the partial schedule as an ii rows array, for each rows
2540 print the ids of the insns in it. */
2541 void
2542 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2544 int i;
2546 for (i = 0; i < ps->ii; i++)
2548 ps_insn_ptr ps_i = ps->rows[i];
2550 fprintf (dump, "\n[ROW %d ]: ", i);
2551 while (ps_i)
2553 fprintf (dump, "%d, ",
2554 INSN_UID (ps_i->node->insn));
2555 ps_i = ps_i->next_in_row;
2560 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2561 static ps_insn_ptr
2562 create_ps_insn (ddg_node_ptr node, int cycle)
2564 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2566 ps_i->node = node;
2567 ps_i->next_in_row = NULL;
2568 ps_i->prev_in_row = NULL;
2569 ps_i->cycle = cycle;
2571 return ps_i;
2575 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2576 node is not found in the partial schedule, else returns true. */
2577 static bool
2578 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2580 int row;
2582 if (!ps || !ps_i)
2583 return false;
2585 row = SMODULO (ps_i->cycle, ps->ii);
2586 if (! ps_i->prev_in_row)
2588 if (ps_i != ps->rows[row])
2589 return false;
2591 ps->rows[row] = ps_i->next_in_row;
2592 if (ps->rows[row])
2593 ps->rows[row]->prev_in_row = NULL;
2595 else
2597 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2598 if (ps_i->next_in_row)
2599 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2602 ps->rows_length[row] -= 1;
2603 free (ps_i);
2604 return true;
2607 /* Unlike what literature describes for modulo scheduling (which focuses
2608 on VLIW machines) the order of the instructions inside a cycle is
2609 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2610 where the current instruction should go relative to the already
2611 scheduled instructions in the given cycle. Go over these
2612 instructions and find the first possible column to put it in. */
2613 static bool
2614 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2615 sbitmap must_precede, sbitmap must_follow)
2617 ps_insn_ptr next_ps_i;
2618 ps_insn_ptr first_must_follow = NULL;
2619 ps_insn_ptr last_must_precede = NULL;
2620 ps_insn_ptr last_in_row = NULL;
2621 int row;
2623 if (! ps_i)
2624 return false;
2626 row = SMODULO (ps_i->cycle, ps->ii);
2628 /* Find the first must follow and the last must precede
2629 and insert the node immediately after the must precede
2630 but make sure that it there is no must follow after it. */
2631 for (next_ps_i = ps->rows[row];
2632 next_ps_i;
2633 next_ps_i = next_ps_i->next_in_row)
2635 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2636 && ! first_must_follow)
2637 first_must_follow = next_ps_i;
2638 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2640 /* If we have already met a node that must follow, then
2641 there is no possible column. */
2642 if (first_must_follow)
2643 return false;
2644 else
2645 last_must_precede = next_ps_i;
2647 /* The closing branch must be the last in the row. */
2648 if (must_precede
2649 && TEST_BIT (must_precede, next_ps_i->node->cuid)
2650 && JUMP_P (next_ps_i->node->insn))
2651 return false;
2653 last_in_row = next_ps_i;
2656 /* The closing branch is scheduled as well. Make sure there is no
2657 dependent instruction after it as the branch should be the last
2658 instruction in the row. */
2659 if (JUMP_P (ps_i->node->insn))
2661 if (first_must_follow)
2662 return false;
2663 if (last_in_row)
2665 /* Make the branch the last in the row. New instructions
2666 will be inserted at the beginning of the row or after the
2667 last must_precede instruction thus the branch is guaranteed
2668 to remain the last instruction in the row. */
2669 last_in_row->next_in_row = ps_i;
2670 ps_i->prev_in_row = last_in_row;
2671 ps_i->next_in_row = NULL;
2673 else
2674 ps->rows[row] = ps_i;
2675 return true;
2678 /* Now insert the node after INSERT_AFTER_PSI. */
2680 if (! last_must_precede)
2682 ps_i->next_in_row = ps->rows[row];
2683 ps_i->prev_in_row = NULL;
2684 if (ps_i->next_in_row)
2685 ps_i->next_in_row->prev_in_row = ps_i;
2686 ps->rows[row] = ps_i;
2688 else
2690 ps_i->next_in_row = last_must_precede->next_in_row;
2691 last_must_precede->next_in_row = ps_i;
2692 ps_i->prev_in_row = last_must_precede;
2693 if (ps_i->next_in_row)
2694 ps_i->next_in_row->prev_in_row = ps_i;
2697 return true;
2700 /* Advances the PS_INSN one column in its current row; returns false
2701 in failure and true in success. Bit N is set in MUST_FOLLOW if
2702 the node with cuid N must be come after the node pointed to by
2703 PS_I when scheduled in the same cycle. */
2704 static int
2705 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2706 sbitmap must_follow)
2708 ps_insn_ptr prev, next;
2709 int row;
2710 ddg_node_ptr next_node;
2712 if (!ps || !ps_i)
2713 return false;
2715 row = SMODULO (ps_i->cycle, ps->ii);
2717 if (! ps_i->next_in_row)
2718 return false;
2720 next_node = ps_i->next_in_row->node;
2722 /* Check if next_in_row is dependent on ps_i, both having same sched
2723 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2724 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2725 return false;
2727 /* Advance PS_I over its next_in_row in the doubly linked list. */
2728 prev = ps_i->prev_in_row;
2729 next = ps_i->next_in_row;
2731 if (ps_i == ps->rows[row])
2732 ps->rows[row] = next;
2734 ps_i->next_in_row = next->next_in_row;
2736 if (next->next_in_row)
2737 next->next_in_row->prev_in_row = ps_i;
2739 next->next_in_row = ps_i;
2740 ps_i->prev_in_row = next;
2742 next->prev_in_row = prev;
2743 if (prev)
2744 prev->next_in_row = next;
2746 return true;
2749 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2750 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2751 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2752 before/after (respectively) the node pointed to by PS_I when scheduled
2753 in the same cycle. */
2754 static ps_insn_ptr
2755 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2756 sbitmap must_precede, sbitmap must_follow)
2758 ps_insn_ptr ps_i;
2759 int row = SMODULO (cycle, ps->ii);
2761 if (ps->rows_length[row] >= issue_rate)
2762 return NULL;
2764 ps_i = create_ps_insn (node, cycle);
2766 /* Finds and inserts PS_I according to MUST_FOLLOW and
2767 MUST_PRECEDE. */
2768 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2770 free (ps_i);
2771 return NULL;
2774 ps->rows_length[row] += 1;
2775 return ps_i;
2778 /* Advance time one cycle. Assumes DFA is being used. */
2779 static void
2780 advance_one_cycle (void)
2782 if (targetm.sched.dfa_pre_cycle_insn)
2783 state_transition (curr_state,
2784 targetm.sched.dfa_pre_cycle_insn ());
2786 state_transition (curr_state, NULL);
2788 if (targetm.sched.dfa_post_cycle_insn)
2789 state_transition (curr_state,
2790 targetm.sched.dfa_post_cycle_insn ());
2795 /* Checks if PS has resource conflicts according to DFA, starting from
2796 FROM cycle to TO cycle; returns true if there are conflicts and false
2797 if there are no conflicts. Assumes DFA is being used. */
2798 static int
2799 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2801 int cycle;
2803 state_reset (curr_state);
2805 for (cycle = from; cycle <= to; cycle++)
2807 ps_insn_ptr crr_insn;
2808 /* Holds the remaining issue slots in the current row. */
2809 int can_issue_more = issue_rate;
2811 /* Walk through the DFA for the current row. */
2812 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2813 crr_insn;
2814 crr_insn = crr_insn->next_in_row)
2816 rtx insn = crr_insn->node->insn;
2818 if (!NONDEBUG_INSN_P (insn))
2819 continue;
2821 /* Check if there is room for the current insn. */
2822 if (!can_issue_more || state_dead_lock_p (curr_state))
2823 return true;
2825 /* Update the DFA state and return with failure if the DFA found
2826 resource conflicts. */
2827 if (state_transition (curr_state, insn) >= 0)
2828 return true;
2830 if (targetm.sched.variable_issue)
2831 can_issue_more =
2832 targetm.sched.variable_issue (sched_dump, sched_verbose,
2833 insn, can_issue_more);
2834 /* A naked CLOBBER or USE generates no instruction, so don't
2835 let them consume issue slots. */
2836 else if (GET_CODE (PATTERN (insn)) != USE
2837 && GET_CODE (PATTERN (insn)) != CLOBBER)
2838 can_issue_more--;
2841 /* Advance the DFA to the next cycle. */
2842 advance_one_cycle ();
2844 return false;
2847 /* Checks if the given node causes resource conflicts when added to PS at
2848 cycle C. If not the node is added to PS and returned; otherwise zero
2849 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2850 cuid N must be come before/after (respectively) the node pointed to by
2851 PS_I when scheduled in the same cycle. */
2852 ps_insn_ptr
2853 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2854 int c, sbitmap must_precede,
2855 sbitmap must_follow)
2857 int has_conflicts = 0;
2858 ps_insn_ptr ps_i;
2860 /* First add the node to the PS, if this succeeds check for
2861 conflicts, trying different issue slots in the same row. */
2862 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2863 return NULL; /* Failed to insert the node at the given cycle. */
2865 has_conflicts = ps_has_conflicts (ps, c, c)
2866 || (ps->history > 0
2867 && ps_has_conflicts (ps,
2868 c - ps->history,
2869 c + ps->history));
2871 /* Try different issue slots to find one that the given node can be
2872 scheduled in without conflicts. */
2873 while (has_conflicts)
2875 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2876 break;
2877 has_conflicts = ps_has_conflicts (ps, c, c)
2878 || (ps->history > 0
2879 && ps_has_conflicts (ps,
2880 c - ps->history,
2881 c + ps->history));
2884 if (has_conflicts)
2886 remove_node_from_ps (ps, ps_i);
2887 return NULL;
2890 ps->min_cycle = MIN (ps->min_cycle, c);
2891 ps->max_cycle = MAX (ps->max_cycle, c);
2892 return ps_i;
2895 /* Calculate the stage count of the partial schedule PS. The calculation
2896 takes into account the rotation to bring the closing branch to row
2897 ii-1. */
2899 calculate_stage_count (partial_schedule_ptr ps)
2901 int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
2902 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
2903 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
2904 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
2906 /* The calculation of stage count is done adding the number of stages
2907 before cycle zero and after cycle zero. */
2908 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
2910 return stage_count;
2913 /* Rotate the rows of PS such that insns scheduled at time
2914 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2915 void
2916 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2918 int i, row, backward_rotates;
2919 int last_row = ps->ii - 1;
2921 if (start_cycle == 0)
2922 return;
2924 backward_rotates = SMODULO (start_cycle, ps->ii);
2926 /* Revisit later and optimize this into a single loop. */
2927 for (i = 0; i < backward_rotates; i++)
2929 ps_insn_ptr first_row = ps->rows[0];
2930 int first_row_length = ps->rows_length[0];
2932 for (row = 0; row < last_row; row++)
2934 ps->rows[row] = ps->rows[row + 1];
2935 ps->rows_length[row] = ps->rows_length[row + 1];
2938 ps->rows[last_row] = first_row;
2939 ps->rows_length[last_row] = first_row_length;
2942 ps->max_cycle -= start_cycle;
2943 ps->min_cycle -= start_cycle;
2946 #endif /* INSN_SCHEDULING */
2948 static bool
2949 gate_handle_sms (void)
2951 return (optimize > 0 && flag_modulo_sched);
2955 /* Run instruction scheduler. */
2956 /* Perform SMS module scheduling. */
2957 static unsigned int
2958 rest_of_handle_sms (void)
2960 #ifdef INSN_SCHEDULING
2961 basic_block bb;
2963 /* Collect loop information to be used in SMS. */
2964 cfg_layout_initialize (0);
2965 sms_schedule ();
2967 /* Update the life information, because we add pseudos. */
2968 max_regno = max_reg_num ();
2970 /* Finalize layout changes. */
2971 FOR_EACH_BB (bb)
2972 if (bb->next_bb != EXIT_BLOCK_PTR)
2973 bb->aux = bb->next_bb;
2974 free_dominance_info (CDI_DOMINATORS);
2975 cfg_layout_finalize ();
2976 #endif /* INSN_SCHEDULING */
2977 return 0;
2980 struct rtl_opt_pass pass_sms =
2983 RTL_PASS,
2984 "sms", /* name */
2985 gate_handle_sms, /* gate */
2986 rest_of_handle_sms, /* execute */
2987 NULL, /* sub */
2988 NULL, /* next */
2989 0, /* static_pass_number */
2990 TV_SMS, /* tv_id */
2991 0, /* properties_required */
2992 0, /* properties_provided */
2993 0, /* properties_destroyed */
2994 0, /* todo_flags_start */
2995 TODO_df_finish
2996 | TODO_verify_flow
2997 | TODO_verify_rtl_sharing
2998 | TODO_ggc_collect /* todo_flags_finish */