re checking -fdump-passes
[official-gcc.git] / gcc / modulo-sched.c
blob327c09aefeda750d7ceb01edcf150a7cc428384b
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "diagnostic-core.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "recog.h"
38 #include "sched-int.h"
39 #include "target.h"
40 #include "cfglayout.h"
41 #include "cfgloop.h"
42 #include "cfghooks.h"
43 #include "expr.h"
44 #include "params.h"
45 #include "gcov-io.h"
46 #include "ddg.h"
47 #include "timevar.h"
48 #include "tree-pass.h"
49 #include "dbgcnt.h"
50 #include "df.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops whose loop count can be easily
88 adjusted. This is because we peel a constant number of iterations
89 into a prologue and epilogue for which we want to avoid emitting
90 the control part, and a kernel which is to iterate that constant
91 number of iterations less than the original loop. So the control
92 part should be a set of insns clearly identified and having its
93 own iv, not otherwise used in the loop (at-least for now), which
94 initializes a register before the loop to the number of iterations.
95 Currently SMS relies on the do-loop pattern to recognize such loops,
96 where (1) the control part comprises of all insns defining and/or
97 using a certain 'count' register and (2) the loop count can be
98 adjusted by modifying this register prior to the loop.
99 TODO: Rely on cfgloop analysis instead. */
101 /* This page defines partial-schedule structures and functions for
102 modulo scheduling. */
104 typedef struct partial_schedule *partial_schedule_ptr;
105 typedef struct ps_insn *ps_insn_ptr;
107 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
108 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
111 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113 /* Perform signed modulo, always returning a non-negative value. */
114 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116 /* The number of different iterations the nodes in ps span, assuming
117 the stage boundaries are placed efficiently. */
118 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
119 + 1 + ii - 1) / ii)
120 /* The stage count of ps. */
121 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123 /* A single instruction in the partial schedule. */
124 struct ps_insn
126 /* The corresponding DDG_NODE. */
127 ddg_node_ptr node;
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
131 int cycle;
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
135 prev_in_row;
137 /* The number of nodes in the same row that come after this node. */
138 int row_rest_count;
141 /* Holds the partial schedule as an array of II rows. Each entry of the
142 array points to a linked list of PS_INSNs, which represents the
143 instructions that are scheduled for that row. */
144 struct partial_schedule
146 int ii; /* Number of rows in the partial schedule. */
147 int history; /* Threshold for conflict checking using DFA. */
149 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
150 ps_insn_ptr *rows;
152 /* The earliest absolute cycle of an insn in the partial schedule. */
153 int min_cycle;
155 /* The latest absolute cycle of an insn in the partial schedule. */
156 int max_cycle;
158 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
160 int stage_count; /* The stage count of the partial schedule. */
163 /* We use this to record all the register replacements we do in
164 the kernel so we can undo SMS if it is not profitable. */
165 struct undo_replace_buff_elem
167 rtx insn;
168 rtx orig_reg;
169 rtx new_reg;
170 struct undo_replace_buff_elem *next;
175 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
176 static void free_partial_schedule (partial_schedule_ptr);
177 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
178 void print_partial_schedule (partial_schedule_ptr, FILE *);
179 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
180 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
181 ddg_node_ptr node, int cycle,
182 sbitmap must_precede,
183 sbitmap must_follow);
184 static void rotate_partial_schedule (partial_schedule_ptr, int);
185 void set_row_column_for_ps (partial_schedule_ptr);
186 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
187 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
190 /* This page defines constants and structures for the modulo scheduling
191 driver. */
193 static int sms_order_nodes (ddg_ptr, int, int *, int *);
194 static void set_node_sched_params (ddg_ptr);
195 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
196 static void permute_partial_schedule (partial_schedule_ptr, rtx);
197 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
198 rtx, rtx);
199 static void duplicate_insns_of_cycles (partial_schedule_ptr,
200 int, int, int, rtx);
201 static int calculate_stage_count (partial_schedule_ptr ps);
202 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
203 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
204 #define SCHED_FIRST_REG_MOVE(x) \
205 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
206 #define SCHED_NREG_MOVES(x) \
207 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
208 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
209 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
210 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
212 /* The scheduling parameters held for each node. */
213 typedef struct node_sched_params
215 int asap; /* A lower-bound on the absolute scheduling cycle. */
216 int time; /* The absolute scheduling cycle (time >= asap). */
218 /* The following field (first_reg_move) is a pointer to the first
219 register-move instruction added to handle the modulo-variable-expansion
220 of the register defined by this node. This register-move copies the
221 original register defined by the node. */
222 rtx first_reg_move;
224 /* The number of register-move instructions added, immediately preceding
225 first_reg_move. */
226 int nreg_moves;
228 int row; /* Holds time % ii. */
229 int stage; /* Holds time / ii. */
231 /* The column of a node inside the ps. If nodes u, v are on the same row,
232 u will precede v if column (u) < column (v). */
233 int column;
234 } *node_sched_params_ptr;
237 /* The following three functions are copied from the current scheduler
238 code in order to use sched_analyze() for computing the dependencies.
239 They are used when initializing the sched_info structure. */
240 static const char *
241 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
243 static char tmp[80];
245 sprintf (tmp, "i%4d", INSN_UID (insn));
246 return tmp;
249 static void
250 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
251 regset cond_exec ATTRIBUTE_UNUSED,
252 regset used ATTRIBUTE_UNUSED,
253 regset set ATTRIBUTE_UNUSED)
257 static struct common_sched_info_def sms_common_sched_info;
259 static struct sched_deps_info_def sms_sched_deps_info =
261 compute_jump_reg_dependencies,
262 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
263 NULL,
264 0, 0, 0
267 static struct haifa_sched_info sms_sched_info =
269 NULL,
270 NULL,
271 NULL,
272 NULL,
273 NULL,
274 sms_print_insn,
275 NULL,
276 NULL, /* insn_finishes_block_p */
277 NULL, NULL,
278 NULL, NULL,
279 0, 0,
281 NULL, NULL, NULL, NULL,
285 /* Given HEAD and TAIL which are the first and last insns in a loop;
286 return the register which controls the loop. Return zero if it has
287 more than one occurrence in the loop besides the control part or the
288 do-loop pattern is not of the form we expect. */
289 static rtx
290 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
292 #ifdef HAVE_doloop_end
293 rtx reg, condition, insn, first_insn_not_to_check;
295 if (!JUMP_P (tail))
296 return NULL_RTX;
298 /* TODO: Free SMS's dependence on doloop_condition_get. */
299 condition = doloop_condition_get (tail);
300 if (! condition)
301 return NULL_RTX;
303 if (REG_P (XEXP (condition, 0)))
304 reg = XEXP (condition, 0);
305 else if (GET_CODE (XEXP (condition, 0)) == PLUS
306 && REG_P (XEXP (XEXP (condition, 0), 0)))
307 reg = XEXP (XEXP (condition, 0), 0);
308 else
309 gcc_unreachable ();
311 /* Check that the COUNT_REG has no other occurrences in the loop
312 until the decrement. We assume the control part consists of
313 either a single (parallel) branch-on-count or a (non-parallel)
314 branch immediately preceded by a single (decrement) insn. */
315 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
316 : prev_nondebug_insn (tail));
318 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
319 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
321 if (dump_file)
323 fprintf (dump_file, "SMS count_reg found ");
324 print_rtl_single (dump_file, reg);
325 fprintf (dump_file, " outside control in insn:\n");
326 print_rtl_single (dump_file, insn);
329 return NULL_RTX;
332 return reg;
333 #else
334 return NULL_RTX;
335 #endif
338 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
339 that the number of iterations is a compile-time constant. If so,
340 return the rtx that sets COUNT_REG to a constant, and set COUNT to
341 this constant. Otherwise return 0. */
342 static rtx
343 const_iteration_count (rtx count_reg, basic_block pre_header,
344 HOST_WIDEST_INT * count)
346 rtx insn;
347 rtx head, tail;
349 if (! pre_header)
350 return NULL_RTX;
352 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
354 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
355 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
356 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
358 rtx pat = single_set (insn);
360 if (CONST_INT_P (SET_SRC (pat)))
362 *count = INTVAL (SET_SRC (pat));
363 return insn;
366 return NULL_RTX;
369 return NULL_RTX;
372 /* A very simple resource-based lower bound on the initiation interval.
373 ??? Improve the accuracy of this bound by considering the
374 utilization of various units. */
375 static int
376 res_MII (ddg_ptr g)
378 if (targetm.sched.sms_res_mii)
379 return targetm.sched.sms_res_mii (g);
381 return ((g->num_nodes - g->num_debug) / issue_rate);
385 /* Points to the array that contains the sched data for each node. */
386 static node_sched_params_ptr node_sched_params;
388 /* Allocate sched_params for each node and initialize it. Assumes that
389 the aux field of each node contain the asap bound (computed earlier),
390 and copies it into the sched_params field. */
391 static void
392 set_node_sched_params (ddg_ptr g)
394 int i;
396 /* Allocate for each node in the DDG a place to hold the "sched_data". */
397 /* Initialize ASAP/ALAP/HIGHT to zero. */
398 node_sched_params = (node_sched_params_ptr)
399 xcalloc (g->num_nodes,
400 sizeof (struct node_sched_params));
402 /* Set the pointer of the general data of the node to point to the
403 appropriate sched_params structure. */
404 for (i = 0; i < g->num_nodes; i++)
406 /* Watch out for aliasing problems? */
407 node_sched_params[i].asap = g->nodes[i].aux.count;
408 g->nodes[i].aux.info = &node_sched_params[i];
412 static void
413 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
415 int i;
417 if (! file)
418 return;
419 for (i = 0; i < num_nodes; i++)
421 node_sched_params_ptr nsp = &node_sched_params[i];
422 rtx reg_move = nsp->first_reg_move;
423 int j;
425 fprintf (file, "Node = %d; INSN = %d\n", i,
426 (INSN_UID (g->nodes[i].insn)));
427 fprintf (file, " asap = %d:\n", nsp->asap);
428 fprintf (file, " time = %d:\n", nsp->time);
429 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
430 for (j = 0; j < nsp->nreg_moves; j++)
432 fprintf (file, " reg_move = ");
433 print_rtl_single (file, reg_move);
434 reg_move = PREV_INSN (reg_move);
440 Breaking intra-loop register anti-dependences:
441 Each intra-loop register anti-dependence implies a cross-iteration true
442 dependence of distance 1. Therefore, we can remove such false dependencies
443 and figure out if the partial schedule broke them by checking if (for a
444 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
445 if so generate a register move. The number of such moves is equal to:
446 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
447 nreg_moves = ----------------------------------- + 1 - { dependence.
448 ii { 1 if not.
450 static struct undo_replace_buff_elem *
451 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
453 ddg_ptr g = ps->g;
454 int ii = ps->ii;
455 int i;
456 struct undo_replace_buff_elem *reg_move_replaces = NULL;
458 for (i = 0; i < g->num_nodes; i++)
460 ddg_node_ptr u = &g->nodes[i];
461 ddg_edge_ptr e;
462 int nreg_moves = 0, i_reg_move;
463 sbitmap *uses_of_defs;
464 rtx last_reg_move;
465 rtx prev_reg, old_reg;
467 /* Compute the number of reg_moves needed for u, by looking at life
468 ranges started at u (excluding self-loops). */
469 for (e = u->out; e; e = e->next_out)
470 if (e->type == TRUE_DEP && e->dest != e->src)
472 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
474 if (e->distance == 1)
475 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
477 /* If dest precedes src in the schedule of the kernel, then dest
478 will read before src writes and we can save one reg_copy. */
479 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
480 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
481 nreg_moves4e--;
483 nreg_moves = MAX (nreg_moves, nreg_moves4e);
486 if (nreg_moves == 0)
487 continue;
489 /* Every use of the register defined by node may require a different
490 copy of this register, depending on the time the use is scheduled.
491 Set a bitmap vector, telling which nodes use each copy of this
492 register. */
493 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
494 sbitmap_vector_zero (uses_of_defs, nreg_moves);
495 for (e = u->out; e; e = e->next_out)
496 if (e->type == TRUE_DEP && e->dest != e->src)
498 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
500 if (e->distance == 1)
501 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
503 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
504 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
505 dest_copy--;
507 if (dest_copy)
508 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
511 /* Now generate the reg_moves, attaching relevant uses to them. */
512 SCHED_NREG_MOVES (u) = nreg_moves;
513 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
514 /* Insert the reg-moves right before the notes which precede
515 the insn they relates to. */
516 last_reg_move = u->first_note;
518 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
520 unsigned int i_use = 0;
521 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
522 rtx reg_move = gen_move_insn (new_reg, prev_reg);
523 sbitmap_iterator sbi;
525 add_insn_before (reg_move, last_reg_move, NULL);
526 last_reg_move = reg_move;
528 if (!SCHED_FIRST_REG_MOVE (u))
529 SCHED_FIRST_REG_MOVE (u) = reg_move;
531 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
533 struct undo_replace_buff_elem *rep;
535 rep = (struct undo_replace_buff_elem *)
536 xcalloc (1, sizeof (struct undo_replace_buff_elem));
537 rep->insn = g->nodes[i_use].insn;
538 rep->orig_reg = old_reg;
539 rep->new_reg = new_reg;
541 if (! reg_move_replaces)
542 reg_move_replaces = rep;
543 else
545 rep->next = reg_move_replaces;
546 reg_move_replaces = rep;
549 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
550 if (rescan)
551 df_insn_rescan (g->nodes[i_use].insn);
554 prev_reg = new_reg;
556 sbitmap_vector_free (uses_of_defs);
558 return reg_move_replaces;
561 /* Free memory allocated for the undo buffer. */
562 static void
563 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
566 while (reg_move_replaces)
568 struct undo_replace_buff_elem *rep = reg_move_replaces;
570 reg_move_replaces = reg_move_replaces->next;
571 free (rep);
575 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
576 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
577 will move to cycle zero. */
578 static void
579 reset_sched_times (partial_schedule_ptr ps, int amount)
581 int row;
582 int ii = ps->ii;
583 ps_insn_ptr crr_insn;
585 for (row = 0; row < ii; row++)
586 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
588 ddg_node_ptr u = crr_insn->node;
589 int normalized_time = SCHED_TIME (u) - amount;
590 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
591 int sc_until_cycle_zero, stage;
593 if (dump_file)
595 /* Print the scheduling times after the rotation. */
596 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
597 "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
598 INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
599 normalized_time);
600 if (JUMP_P (crr_insn->node->insn))
601 fprintf (dump_file, " (branch)");
602 fprintf (dump_file, "\n");
605 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
606 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
607 SCHED_TIME (u) = normalized_time;
608 SCHED_ROW (u) = SMODULO (normalized_time, ii);
610 /* The calculation of stage count is done adding the number
611 of stages before cycle zero and after cycle zero. */
612 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
614 if (SCHED_TIME (u) < 0)
616 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
617 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
619 else
621 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
622 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
627 /* Set SCHED_COLUMN of each node according to its position in PS. */
628 static void
629 set_columns_for_ps (partial_schedule_ptr ps)
631 int row;
633 for (row = 0; row < ps->ii; row++)
635 ps_insn_ptr cur_insn = ps->rows[row];
636 int column = 0;
638 for (; cur_insn; cur_insn = cur_insn->next_in_row)
639 SCHED_COLUMN (cur_insn->node) = column++;
643 /* Permute the insns according to their order in PS, from row 0 to
644 row ii-1, and position them right before LAST. This schedules
645 the insns of the loop kernel. */
646 static void
647 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
649 int ii = ps->ii;
650 int row;
651 ps_insn_ptr ps_ij;
653 for (row = 0; row < ii ; row++)
654 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
655 if (PREV_INSN (last) != ps_ij->node->insn)
656 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
657 PREV_INSN (last));
660 static void
661 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
662 int to_stage, int for_prolog, rtx count_reg)
664 int row;
665 ps_insn_ptr ps_ij;
667 for (row = 0; row < ps->ii; row++)
668 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
670 ddg_node_ptr u_node = ps_ij->node;
671 int j, i_reg_moves;
672 rtx reg_move = NULL_RTX;
674 /* Do not duplicate any insn which refers to count_reg as it
675 belongs to the control part.
676 The closing branch is scheduled as well and thus should
677 be ignored.
678 TODO: This should be done by analyzing the control part of
679 the loop. */
680 if (reg_mentioned_p (count_reg, u_node->insn)
681 || JUMP_P (ps_ij->node->insn))
682 continue;
684 if (for_prolog)
686 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
687 number of reg_moves starting with the second occurrence of
688 u_node, which is generated if its SCHED_STAGE <= to_stage. */
689 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
690 i_reg_moves = MAX (i_reg_moves, 0);
691 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
693 /* The reg_moves start from the *first* reg_move backwards. */
694 if (i_reg_moves)
696 reg_move = SCHED_FIRST_REG_MOVE (u_node);
697 for (j = 1; j < i_reg_moves; j++)
698 reg_move = PREV_INSN (reg_move);
701 else /* It's for the epilog. */
703 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
704 starting to decrease one stage after u_node no longer occurs;
705 that is, generate all reg_moves until
706 SCHED_STAGE (u_node) == from_stage - 1. */
707 i_reg_moves = SCHED_NREG_MOVES (u_node)
708 - (from_stage - SCHED_STAGE (u_node) - 1);
709 i_reg_moves = MAX (i_reg_moves, 0);
710 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
712 /* The reg_moves start from the *last* reg_move forwards. */
713 if (i_reg_moves)
715 reg_move = SCHED_FIRST_REG_MOVE (u_node);
716 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
717 reg_move = PREV_INSN (reg_move);
721 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
722 emit_insn (copy_rtx (PATTERN (reg_move)));
723 if (SCHED_STAGE (u_node) >= from_stage
724 && SCHED_STAGE (u_node) <= to_stage)
725 duplicate_insn_chain (u_node->first_note, u_node->insn);
730 /* Generate the instructions (including reg_moves) for prolog & epilog. */
731 static void
732 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
733 rtx count_reg, rtx count_init)
735 int i;
736 int last_stage = PS_STAGE_COUNT (ps) - 1;
737 edge e;
739 /* Generate the prolog, inserting its insns on the loop-entry edge. */
740 start_sequence ();
742 if (!count_init)
744 /* Generate instructions at the beginning of the prolog to
745 adjust the loop count by STAGE_COUNT. If loop count is constant
746 (count_init), this constant is adjusted by STAGE_COUNT in
747 generate_prolog_epilog function. */
748 rtx sub_reg = NULL_RTX;
750 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
751 count_reg, GEN_INT (last_stage),
752 count_reg, 1, OPTAB_DIRECT);
753 gcc_assert (REG_P (sub_reg));
754 if (REGNO (sub_reg) != REGNO (count_reg))
755 emit_move_insn (count_reg, sub_reg);
758 for (i = 0; i < last_stage; i++)
759 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
761 /* Put the prolog on the entry edge. */
762 e = loop_preheader_edge (loop);
763 split_edge_and_insert (e, get_insns ());
765 end_sequence ();
767 /* Generate the epilog, inserting its insns on the loop-exit edge. */
768 start_sequence ();
770 for (i = 0; i < last_stage; i++)
771 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
773 /* Put the epilogue on the exit edge. */
774 gcc_assert (single_exit (loop));
775 e = single_exit (loop);
776 split_edge_and_insert (e, get_insns ());
777 end_sequence ();
780 /* Return true if all the BBs of the loop are empty except the
781 loop header. */
782 static bool
783 loop_single_full_bb_p (struct loop *loop)
785 unsigned i;
786 basic_block *bbs = get_loop_body (loop);
788 for (i = 0; i < loop->num_nodes ; i++)
790 rtx head, tail;
791 bool empty_bb = true;
793 if (bbs[i] == loop->header)
794 continue;
796 /* Make sure that basic blocks other than the header
797 have only notes labels or jumps. */
798 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
799 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
801 if (NOTE_P (head) || LABEL_P (head)
802 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
803 continue;
804 empty_bb = false;
805 break;
808 if (! empty_bb)
810 free (bbs);
811 return false;
814 free (bbs);
815 return true;
818 /* A simple loop from SMS point of view; it is a loop that is composed of
819 either a single basic block or two BBs - a header and a latch. */
820 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
821 && (EDGE_COUNT (loop->latch->preds) == 1) \
822 && (EDGE_COUNT (loop->latch->succs) == 1))
824 /* Return true if the loop is in its canonical form and false if not.
825 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
826 static bool
827 loop_canon_p (struct loop *loop)
830 if (loop->inner || !loop_outer (loop))
832 if (dump_file)
833 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
834 return false;
837 if (!single_exit (loop))
839 if (dump_file)
841 rtx insn = BB_END (loop->header);
843 fprintf (dump_file, "SMS loop many exits ");
844 fprintf (dump_file, " %s %d (file, line)\n",
845 insn_file (insn), insn_line (insn));
847 return false;
850 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
852 if (dump_file)
854 rtx insn = BB_END (loop->header);
856 fprintf (dump_file, "SMS loop many BBs. ");
857 fprintf (dump_file, " %s %d (file, line)\n",
858 insn_file (insn), insn_line (insn));
860 return false;
863 return true;
866 /* If there are more than one entry for the loop,
867 make it one by splitting the first entry edge and
868 redirecting the others to the new BB. */
869 static void
870 canon_loop (struct loop *loop)
872 edge e;
873 edge_iterator i;
875 /* Avoid annoying special cases of edges going to exit
876 block. */
877 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
878 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
879 split_edge (e);
881 if (loop->latch == loop->header
882 || EDGE_COUNT (loop->latch->succs) > 1)
884 FOR_EACH_EDGE (e, i, loop->header->preds)
885 if (e->src == loop->latch)
886 break;
887 split_edge (e);
891 /* Setup infos. */
892 static void
893 setup_sched_infos (void)
895 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
896 sizeof (sms_common_sched_info));
897 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
898 common_sched_info = &sms_common_sched_info;
900 sched_deps_info = &sms_sched_deps_info;
901 current_sched_info = &sms_sched_info;
904 /* Probability in % that the sms-ed loop rolls enough so that optimized
905 version may be entered. Just a guess. */
906 #define PROB_SMS_ENOUGH_ITERATIONS 80
908 /* Used to calculate the upper bound of ii. */
909 #define MAXII_FACTOR 2
911 /* Main entry point, perform SMS scheduling on the loops of the function
912 that consist of single basic blocks. */
913 static void
914 sms_schedule (void)
916 rtx insn;
917 ddg_ptr *g_arr, g;
918 int * node_order;
919 int maxii, max_asap;
920 loop_iterator li;
921 partial_schedule_ptr ps;
922 basic_block bb = NULL;
923 struct loop *loop;
924 basic_block condition_bb = NULL;
925 edge latch_edge;
926 gcov_type trip_count = 0;
928 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
929 | LOOPS_HAVE_RECORDED_EXITS);
930 if (number_of_loops () <= 1)
932 loop_optimizer_finalize ();
933 return; /* There are no loops to schedule. */
936 /* Initialize issue_rate. */
937 if (targetm.sched.issue_rate)
939 int temp = reload_completed;
941 reload_completed = 1;
942 issue_rate = targetm.sched.issue_rate ();
943 reload_completed = temp;
945 else
946 issue_rate = 1;
948 /* Initialize the scheduler. */
949 setup_sched_infos ();
950 haifa_sched_init ();
952 /* Allocate memory to hold the DDG array one entry for each loop.
953 We use loop->num as index into this array. */
954 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
956 if (dump_file)
958 fprintf (dump_file, "\n\nSMS analysis phase\n");
959 fprintf (dump_file, "===================\n\n");
962 /* Build DDGs for all the relevant loops and hold them in G_ARR
963 indexed by the loop index. */
964 FOR_EACH_LOOP (li, loop, 0)
966 rtx head, tail;
967 rtx count_reg;
969 /* For debugging. */
970 if (dbg_cnt (sms_sched_loop) == false)
972 if (dump_file)
973 fprintf (dump_file, "SMS reached max limit... \n");
975 break;
978 if (dump_file)
980 rtx insn = BB_END (loop->header);
982 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
983 loop->num, insn_file (insn), insn_line (insn));
987 if (! loop_canon_p (loop))
988 continue;
990 if (! loop_single_full_bb_p (loop))
992 if (dump_file)
993 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
994 continue;
997 bb = loop->header;
999 get_ebb_head_tail (bb, bb, &head, &tail);
1000 latch_edge = loop_latch_edge (loop);
1001 gcc_assert (single_exit (loop));
1002 if (single_exit (loop)->count)
1003 trip_count = latch_edge->count / single_exit (loop)->count;
1005 /* Perform SMS only on loops that their average count is above threshold. */
1007 if ( latch_edge->count
1008 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1010 if (dump_file)
1012 fprintf (dump_file, " %s %d (file, line)\n",
1013 insn_file (tail), insn_line (tail));
1014 fprintf (dump_file, "SMS single-bb-loop\n");
1015 if (profile_info && flag_branch_probabilities)
1017 fprintf (dump_file, "SMS loop-count ");
1018 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1019 (HOST_WIDEST_INT) bb->count);
1020 fprintf (dump_file, "\n");
1021 fprintf (dump_file, "SMS trip-count ");
1022 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1023 (HOST_WIDEST_INT) trip_count);
1024 fprintf (dump_file, "\n");
1025 fprintf (dump_file, "SMS profile-sum-max ");
1026 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1027 (HOST_WIDEST_INT) profile_info->sum_max);
1028 fprintf (dump_file, "\n");
1031 continue;
1034 /* Make sure this is a doloop. */
1035 if ( !(count_reg = doloop_register_get (head, tail)))
1037 if (dump_file)
1038 fprintf (dump_file, "SMS doloop_register_get failed\n");
1039 continue;
1042 /* Don't handle BBs with calls or barriers or auto-increment insns
1043 (to avoid creating invalid reg-moves for the auto-increment insns),
1044 or !single_set with the exception of instructions that include
1045 count_reg---these instructions are part of the control part
1046 that do-loop recognizes.
1047 ??? Should handle auto-increment insns.
1048 ??? Should handle insns defining subregs. */
1049 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1051 rtx set;
1053 if (CALL_P (insn)
1054 || BARRIER_P (insn)
1055 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1056 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1057 && !reg_mentioned_p (count_reg, insn))
1058 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1059 || (INSN_P (insn) && (set = single_set (insn))
1060 && GET_CODE (SET_DEST (set)) == SUBREG))
1061 break;
1064 if (insn != NEXT_INSN (tail))
1066 if (dump_file)
1068 if (CALL_P (insn))
1069 fprintf (dump_file, "SMS loop-with-call\n");
1070 else if (BARRIER_P (insn))
1071 fprintf (dump_file, "SMS loop-with-barrier\n");
1072 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1073 fprintf (dump_file, "SMS reg inc\n");
1074 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1075 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1076 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1077 else
1078 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1079 print_rtl_single (dump_file, insn);
1082 continue;
1085 /* Always schedule the closing branch with the rest of the
1086 instructions. The branch is rotated to be in row ii-1 at the
1087 end of the scheduling procedure to make sure it's the last
1088 instruction in the iteration. */
1089 if (! (g = create_ddg (bb, 1)))
1091 if (dump_file)
1092 fprintf (dump_file, "SMS create_ddg failed\n");
1093 continue;
1096 g_arr[loop->num] = g;
1097 if (dump_file)
1098 fprintf (dump_file, "...OK\n");
1101 if (dump_file)
1103 fprintf (dump_file, "\nSMS transformation phase\n");
1104 fprintf (dump_file, "=========================\n\n");
1107 /* We don't want to perform SMS on new loops - created by versioning. */
1108 FOR_EACH_LOOP (li, loop, 0)
1110 rtx head, tail;
1111 rtx count_reg, count_init;
1112 int mii, rec_mii;
1113 unsigned stage_count = 0;
1114 HOST_WIDEST_INT loop_count = 0;
1116 if (! (g = g_arr[loop->num]))
1117 continue;
1119 if (dump_file)
1121 rtx insn = BB_END (loop->header);
1123 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1124 loop->num, insn_file (insn), insn_line (insn));
1126 print_ddg (dump_file, g);
1129 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1131 latch_edge = loop_latch_edge (loop);
1132 gcc_assert (single_exit (loop));
1133 if (single_exit (loop)->count)
1134 trip_count = latch_edge->count / single_exit (loop)->count;
1136 if (dump_file)
1138 fprintf (dump_file, " %s %d (file, line)\n",
1139 insn_file (tail), insn_line (tail));
1140 fprintf (dump_file, "SMS single-bb-loop\n");
1141 if (profile_info && flag_branch_probabilities)
1143 fprintf (dump_file, "SMS loop-count ");
1144 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1145 (HOST_WIDEST_INT) bb->count);
1146 fprintf (dump_file, "\n");
1147 fprintf (dump_file, "SMS profile-sum-max ");
1148 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1149 (HOST_WIDEST_INT) profile_info->sum_max);
1150 fprintf (dump_file, "\n");
1152 fprintf (dump_file, "SMS doloop\n");
1153 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1154 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1155 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1159 /* In case of th loop have doloop register it gets special
1160 handling. */
1161 count_init = NULL_RTX;
1162 if ((count_reg = doloop_register_get (head, tail)))
1164 basic_block pre_header;
1166 pre_header = loop_preheader_edge (loop)->src;
1167 count_init = const_iteration_count (count_reg, pre_header,
1168 &loop_count);
1170 gcc_assert (count_reg);
1172 if (dump_file && count_init)
1174 fprintf (dump_file, "SMS const-doloop ");
1175 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1176 loop_count);
1177 fprintf (dump_file, "\n");
1180 node_order = XNEWVEC (int, g->num_nodes);
1182 mii = 1; /* Need to pass some estimate of mii. */
1183 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1184 mii = MAX (res_MII (g), rec_mii);
1185 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1187 if (dump_file)
1188 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1189 rec_mii, mii, maxii);
1191 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1192 ASAP. */
1193 set_node_sched_params (g);
1195 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1197 if (ps)
1199 stage_count = calculate_stage_count (ps);
1200 gcc_assert(stage_count >= 1);
1201 PS_STAGE_COUNT(ps) = stage_count;
1204 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1205 1 means that there is no interleaving between iterations thus
1206 we let the scheduling passes do the job in this case. */
1207 if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1208 || (count_init && (loop_count <= stage_count))
1209 || (flag_branch_probabilities && (trip_count <= stage_count)))
1211 if (dump_file)
1213 fprintf (dump_file, "SMS failed... \n");
1214 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1215 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1216 fprintf (dump_file, ", trip-count=");
1217 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1218 fprintf (dump_file, ")\n");
1221 else
1223 struct undo_replace_buff_elem *reg_move_replaces;
1224 int amount = SCHED_TIME (g->closing_branch) + 1;
1226 /* Set the stage boundaries. The closing_branch was scheduled
1227 and should appear in the last (ii-1) row. */
1228 reset_sched_times (ps, amount);
1229 rotate_partial_schedule (ps, amount);
1230 set_columns_for_ps (ps);
1232 canon_loop (loop);
1234 if (dump_file)
1236 fprintf (dump_file,
1237 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1238 stage_count);
1239 print_partial_schedule (ps, dump_file);
1242 /* case the BCT count is not known , Do loop-versioning */
1243 if (count_reg && ! count_init)
1245 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1246 GEN_INT(stage_count));
1247 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1248 * REG_BR_PROB_BASE) / 100;
1250 loop_version (loop, comp_rtx, &condition_bb,
1251 prob, prob, REG_BR_PROB_BASE - prob,
1252 true);
1255 /* Set new iteration count of loop kernel. */
1256 if (count_reg && count_init)
1257 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1258 - stage_count + 1);
1260 /* Now apply the scheduled kernel to the RTL of the loop. */
1261 permute_partial_schedule (ps, g->closing_branch->first_note);
1263 /* Mark this loop as software pipelined so the later
1264 scheduling passes doesn't touch it. */
1265 if (! flag_resched_modulo_sched)
1266 g->bb->flags |= BB_DISABLE_SCHEDULE;
1267 /* The life-info is not valid any more. */
1268 df_set_bb_dirty (g->bb);
1270 reg_move_replaces = generate_reg_moves (ps, true);
1271 if (dump_file)
1272 print_node_sched_params (dump_file, g->num_nodes, g);
1273 /* Generate prolog and epilog. */
1274 generate_prolog_epilog (ps, loop, count_reg, count_init);
1276 free_undo_replace_buff (reg_move_replaces);
1279 free_partial_schedule (ps);
1280 free (node_sched_params);
1281 free (node_order);
1282 free_ddg (g);
1285 free (g_arr);
1287 /* Release scheduler data, needed until now because of DFA. */
1288 haifa_sched_finish ();
1289 loop_optimizer_finalize ();
1292 /* The SMS scheduling algorithm itself
1293 -----------------------------------
1294 Input: 'O' an ordered list of insns of a loop.
1295 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1297 'Q' is the empty Set
1298 'PS' is the partial schedule; it holds the currently scheduled nodes with
1299 their cycle/slot.
1300 'PSP' previously scheduled predecessors.
1301 'PSS' previously scheduled successors.
1302 't(u)' the cycle where u is scheduled.
1303 'l(u)' is the latency of u.
1304 'd(v,u)' is the dependence distance from v to u.
1305 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1306 the node ordering phase.
1307 'check_hardware_resources_conflicts(u, PS, c)'
1308 run a trace around cycle/slot through DFA model
1309 to check resource conflicts involving instruction u
1310 at cycle c given the partial schedule PS.
1311 'add_to_partial_schedule_at_time(u, PS, c)'
1312 Add the node/instruction u to the partial schedule
1313 PS at time c.
1314 'calculate_register_pressure(PS)'
1315 Given a schedule of instructions, calculate the register
1316 pressure it implies. One implementation could be the
1317 maximum number of overlapping live ranges.
1318 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1319 registers available in the hardware.
1321 1. II = MII.
1322 2. PS = empty list
1323 3. for each node u in O in pre-computed order
1324 4. if (PSP(u) != Q && PSS(u) == Q) then
1325 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1326 6. start = Early_start; end = Early_start + II - 1; step = 1
1327 11. else if (PSP(u) == Q && PSS(u) != Q) then
1328 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1329 13. start = Late_start; end = Late_start - II + 1; step = -1
1330 14. else if (PSP(u) != Q && PSS(u) != Q) then
1331 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1332 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1333 17. start = Early_start;
1334 18. end = min(Early_start + II - 1 , Late_start);
1335 19. step = 1
1336 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1337 21. start = ASAP(u); end = start + II - 1; step = 1
1338 22. endif
1340 23. success = false
1341 24. for (c = start ; c != end ; c += step)
1342 25. if check_hardware_resources_conflicts(u, PS, c) then
1343 26. add_to_partial_schedule_at_time(u, PS, c)
1344 27. success = true
1345 28. break
1346 29. endif
1347 30. endfor
1348 31. if (success == false) then
1349 32. II = II + 1
1350 33. if (II > maxII) then
1351 34. finish - failed to schedule
1352 35. endif
1353 36. goto 2.
1354 37. endif
1355 38. endfor
1356 39. if (calculate_register_pressure(PS) > maxRP) then
1357 40. goto 32.
1358 41. endif
1359 42. compute epilogue & prologue
1360 43. finish - succeeded to schedule
1363 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1364 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1365 set to 0 to save compile time. */
1366 #define DFA_HISTORY SMS_DFA_HISTORY
1368 /* A threshold for the number of repeated unsuccessful attempts to insert
1369 an empty row, before we flush the partial schedule and start over. */
1370 #define MAX_SPLIT_NUM 10
1371 /* Given the partial schedule PS, this function calculates and returns the
1372 cycles in which we can schedule the node with the given index I.
1373 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1374 noticed that there are several cases in which we fail to SMS the loop
1375 because the sched window of a node is empty due to tight data-deps. In
1376 such cases we want to unschedule some of the predecessors/successors
1377 until we get non-empty scheduling window. It returns -1 if the
1378 scheduling window is empty and zero otherwise. */
1380 static int
1381 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1382 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1384 int start, step, end;
1385 ddg_edge_ptr e;
1386 int u = nodes_order [i];
1387 ddg_node_ptr u_node = &ps->g->nodes[u];
1388 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1389 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1390 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1391 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1392 int psp_not_empty;
1393 int pss_not_empty;
1395 /* 1. compute sched window for u (start, end, step). */
1396 sbitmap_zero (psp);
1397 sbitmap_zero (pss);
1398 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1399 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1401 if (psp_not_empty && !pss_not_empty)
1403 int early_start = INT_MIN;
1405 end = INT_MAX;
1406 for (e = u_node->in; e != 0; e = e->next_in)
1408 ddg_node_ptr v_node = e->src;
1410 if (dump_file)
1412 fprintf (dump_file, "\nProcessing edge: ");
1413 print_ddg_edge (dump_file, e);
1414 fprintf (dump_file,
1415 "\nScheduling %d (%d) in psp_not_empty,"
1416 " checking p %d (%d): ", u_node->cuid,
1417 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1418 (v_node->insn));
1421 if (TEST_BIT (sched_nodes, v_node->cuid))
1423 int p_st = SCHED_TIME (v_node);
1425 early_start =
1426 MAX (early_start, p_st + e->latency - (e->distance * ii));
1428 if (dump_file)
1429 fprintf (dump_file,
1430 "pred st = %d; early_start = %d; latency: %d",
1431 p_st, early_start, e->latency);
1433 if (e->data_type == MEM_DEP)
1434 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1436 else if (dump_file)
1437 fprintf (dump_file, "the node is not scheduled\n");
1439 start = early_start;
1440 end = MIN (end, early_start + ii);
1441 /* Schedule the node close to it's predecessors. */
1442 step = 1;
1444 if (dump_file)
1445 fprintf (dump_file,
1446 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1447 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1450 else if (!psp_not_empty && pss_not_empty)
1452 int late_start = INT_MAX;
1454 end = INT_MIN;
1455 for (e = u_node->out; e != 0; e = e->next_out)
1457 ddg_node_ptr v_node = e->dest;
1459 if (dump_file)
1461 fprintf (dump_file, "\nProcessing edge:");
1462 print_ddg_edge (dump_file, e);
1463 fprintf (dump_file,
1464 "\nScheduling %d (%d) in pss_not_empty,"
1465 " checking s %d (%d): ", u_node->cuid,
1466 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1467 (v_node->insn));
1470 if (TEST_BIT (sched_nodes, v_node->cuid))
1472 int s_st = SCHED_TIME (v_node);
1474 late_start = MIN (late_start,
1475 s_st - e->latency + (e->distance * ii));
1477 if (dump_file)
1478 fprintf (dump_file,
1479 "succ st = %d; late_start = %d; latency = %d",
1480 s_st, late_start, e->latency);
1482 if (e->data_type == MEM_DEP)
1483 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1484 if (dump_file)
1485 fprintf (dump_file, "end = %d\n", end);
1488 else if (dump_file)
1489 fprintf (dump_file, "the node is not scheduled\n");
1492 start = late_start;
1493 end = MAX (end, late_start - ii);
1494 /* Schedule the node close to it's successors. */
1495 step = -1;
1497 if (dump_file)
1498 fprintf (dump_file,
1499 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1500 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1504 else if (psp_not_empty && pss_not_empty)
1506 int early_start = INT_MIN;
1507 int late_start = INT_MAX;
1508 int count_preds = 0;
1509 int count_succs = 0;
1511 start = INT_MIN;
1512 end = INT_MAX;
1513 for (e = u_node->in; e != 0; e = e->next_in)
1515 ddg_node_ptr v_node = e->src;
1517 if (dump_file)
1519 fprintf (dump_file, "\nProcessing edge:");
1520 print_ddg_edge (dump_file, e);
1521 fprintf (dump_file,
1522 "\nScheduling %d (%d) in psp_pss_not_empty,"
1523 " checking p %d (%d): ", u_node->cuid, INSN_UID
1524 (u_node->insn), v_node->cuid, INSN_UID
1525 (v_node->insn));
1528 if (TEST_BIT (sched_nodes, v_node->cuid))
1530 int p_st = SCHED_TIME (v_node);
1532 early_start = MAX (early_start,
1533 p_st + e->latency
1534 - (e->distance * ii));
1536 if (dump_file)
1537 fprintf (dump_file,
1538 "pred st = %d; early_start = %d; latency = %d",
1539 p_st, early_start, e->latency);
1541 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1542 count_preds++;
1544 if (e->data_type == MEM_DEP)
1545 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1547 else if (dump_file)
1548 fprintf (dump_file, "the node is not scheduled\n");
1551 for (e = u_node->out; e != 0; e = e->next_out)
1553 ddg_node_ptr v_node = e->dest;
1555 if (dump_file)
1557 fprintf (dump_file, "\nProcessing edge:");
1558 print_ddg_edge (dump_file, e);
1559 fprintf (dump_file,
1560 "\nScheduling %d (%d) in psp_pss_not_empty,"
1561 " checking s %d (%d): ", u_node->cuid, INSN_UID
1562 (u_node->insn), v_node->cuid, INSN_UID
1563 (v_node->insn));
1566 if (TEST_BIT (sched_nodes, v_node->cuid))
1568 int s_st = SCHED_TIME (v_node);
1570 late_start = MIN (late_start,
1571 s_st - e->latency
1572 + (e->distance * ii));
1574 if (dump_file)
1575 fprintf (dump_file,
1576 "succ st = %d; late_start = %d; latency = %d",
1577 s_st, late_start, e->latency);
1579 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1580 count_succs++;
1582 if (e->data_type == MEM_DEP)
1583 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1585 else if (dump_file)
1586 fprintf (dump_file, "the node is not scheduled\n");
1589 start = MAX (start, early_start);
1590 end = MIN (end, MIN (early_start + ii, late_start + 1));
1591 step = 1;
1592 /* If there are more successors than predecessors schedule the
1593 node close to it's successors. */
1594 if (count_succs >= count_preds)
1596 int old_start = start;
1598 start = end - 1;
1599 end = old_start - 1;
1600 step = -1;
1603 else /* psp is empty && pss is empty. */
1605 start = SCHED_ASAP (u_node);
1606 end = start + ii;
1607 step = 1;
1610 *start_p = start;
1611 *step_p = step;
1612 *end_p = end;
1613 sbitmap_free (psp);
1614 sbitmap_free (pss);
1616 if ((start >= end && step == 1) || (start <= end && step == -1))
1618 if (dump_file)
1619 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1620 start, end, step);
1621 return -1;
1624 return 0;
1627 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1628 node currently been scheduled. At the end of the calculation
1629 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1630 U_NODE which are (1) already scheduled in the first/last row of
1631 U_NODE's scheduling window, (2) whose dependence inequality with U
1632 becomes an equality when U is scheduled in this same row, and (3)
1633 whose dependence latency is zero.
1635 The first and last rows are calculated using the following parameters:
1636 START/END rows - The cycles that begins/ends the traversal on the window;
1637 searching for an empty cycle to schedule U_NODE.
1638 STEP - The direction in which we traverse the window.
1639 II - The initiation interval. */
1641 static void
1642 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1643 int step, int ii, sbitmap sched_nodes,
1644 sbitmap must_precede, sbitmap must_follow)
1646 ddg_edge_ptr e;
1647 int first_cycle_in_window, last_cycle_in_window;
1649 gcc_assert (must_precede && must_follow);
1651 /* Consider the following scheduling window:
1652 {first_cycle_in_window, first_cycle_in_window+1, ...,
1653 last_cycle_in_window}. If step is 1 then the following will be
1654 the order we traverse the window: {start=first_cycle_in_window,
1655 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1656 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1657 end=first_cycle_in_window-1} if step is -1. */
1658 first_cycle_in_window = (step == 1) ? start : end - step;
1659 last_cycle_in_window = (step == 1) ? end - step : start;
1661 sbitmap_zero (must_precede);
1662 sbitmap_zero (must_follow);
1664 if (dump_file)
1665 fprintf (dump_file, "\nmust_precede: ");
1667 /* Instead of checking if:
1668 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1669 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1670 first_cycle_in_window)
1671 && e->latency == 0
1672 we use the fact that latency is non-negative:
1673 SCHED_TIME (e->src) - (e->distance * ii) <=
1674 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1675 first_cycle_in_window
1676 and check only if
1677 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1678 for (e = u_node->in; e != 0; e = e->next_in)
1679 if (TEST_BIT (sched_nodes, e->src->cuid)
1680 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1681 first_cycle_in_window))
1683 if (dump_file)
1684 fprintf (dump_file, "%d ", e->src->cuid);
1686 SET_BIT (must_precede, e->src->cuid);
1689 if (dump_file)
1690 fprintf (dump_file, "\nmust_follow: ");
1692 /* Instead of checking if:
1693 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1694 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1695 last_cycle_in_window)
1696 && e->latency == 0
1697 we use the fact that latency is non-negative:
1698 SCHED_TIME (e->dest) + (e->distance * ii) >=
1699 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1700 last_cycle_in_window
1701 and check only if
1702 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1703 for (e = u_node->out; e != 0; e = e->next_out)
1704 if (TEST_BIT (sched_nodes, e->dest->cuid)
1705 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1706 last_cycle_in_window))
1708 if (dump_file)
1709 fprintf (dump_file, "%d ", e->dest->cuid);
1711 SET_BIT (must_follow, e->dest->cuid);
1714 if (dump_file)
1715 fprintf (dump_file, "\n");
1718 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1719 parameters to decide if that's possible:
1720 PS - The partial schedule.
1721 U - The serial number of U_NODE.
1722 NUM_SPLITS - The number of row splits made so far.
1723 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1724 the first row of the scheduling window)
1725 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1726 last row of the scheduling window) */
1728 static bool
1729 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1730 int u, int cycle, sbitmap sched_nodes,
1731 int *num_splits, sbitmap must_precede,
1732 sbitmap must_follow)
1734 ps_insn_ptr psi;
1735 bool success = 0;
1737 verify_partial_schedule (ps, sched_nodes);
1738 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1739 must_precede, must_follow);
1740 if (psi)
1742 SCHED_TIME (u_node) = cycle;
1743 SET_BIT (sched_nodes, u);
1744 success = 1;
1745 *num_splits = 0;
1746 if (dump_file)
1747 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1751 return success;
1754 /* This function implements the scheduling algorithm for SMS according to the
1755 above algorithm. */
1756 static partial_schedule_ptr
1757 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1759 int ii = mii;
1760 int i, c, success, num_splits = 0;
1761 int flush_and_start_over = true;
1762 int num_nodes = g->num_nodes;
1763 int start, end, step; /* Place together into one struct? */
1764 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1765 sbitmap must_precede = sbitmap_alloc (num_nodes);
1766 sbitmap must_follow = sbitmap_alloc (num_nodes);
1767 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1769 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1771 sbitmap_ones (tobe_scheduled);
1772 sbitmap_zero (sched_nodes);
1774 while (flush_and_start_over && (ii < maxii))
1777 if (dump_file)
1778 fprintf (dump_file, "Starting with ii=%d\n", ii);
1779 flush_and_start_over = false;
1780 sbitmap_zero (sched_nodes);
1782 for (i = 0; i < num_nodes; i++)
1784 int u = nodes_order[i];
1785 ddg_node_ptr u_node = &ps->g->nodes[u];
1786 rtx insn = u_node->insn;
1788 if (!NONDEBUG_INSN_P (insn))
1790 RESET_BIT (tobe_scheduled, u);
1791 continue;
1794 if (TEST_BIT (sched_nodes, u))
1795 continue;
1797 /* Try to get non-empty scheduling window. */
1798 success = 0;
1799 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1800 &step, &end) == 0)
1802 if (dump_file)
1803 fprintf (dump_file, "\nTrying to schedule node %d \
1804 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1805 (g->nodes[u].insn)), start, end, step);
1807 gcc_assert ((step > 0 && start < end)
1808 || (step < 0 && start > end));
1810 calculate_must_precede_follow (u_node, start, end, step, ii,
1811 sched_nodes, must_precede,
1812 must_follow);
1814 for (c = start; c != end; c += step)
1816 sbitmap tmp_precede = NULL;
1817 sbitmap tmp_follow = NULL;
1819 if (c == start)
1821 if (step == 1)
1822 tmp_precede = must_precede;
1823 else /* step == -1. */
1824 tmp_follow = must_follow;
1826 if (c == end - step)
1828 if (step == 1)
1829 tmp_follow = must_follow;
1830 else /* step == -1. */
1831 tmp_precede = must_precede;
1834 success =
1835 try_scheduling_node_in_cycle (ps, u_node, u, c,
1836 sched_nodes,
1837 &num_splits, tmp_precede,
1838 tmp_follow);
1839 if (success)
1840 break;
1843 verify_partial_schedule (ps, sched_nodes);
1845 if (!success)
1847 int split_row;
1849 if (ii++ == maxii)
1850 break;
1852 if (num_splits >= MAX_SPLIT_NUM)
1854 num_splits = 0;
1855 flush_and_start_over = true;
1856 verify_partial_schedule (ps, sched_nodes);
1857 reset_partial_schedule (ps, ii);
1858 verify_partial_schedule (ps, sched_nodes);
1859 break;
1862 num_splits++;
1863 /* The scheduling window is exclusive of 'end'
1864 whereas compute_split_window() expects an inclusive,
1865 ordered range. */
1866 if (step == 1)
1867 split_row = compute_split_row (sched_nodes, start, end - 1,
1868 ps->ii, u_node);
1869 else
1870 split_row = compute_split_row (sched_nodes, end + 1, start,
1871 ps->ii, u_node);
1873 ps_insert_empty_row (ps, split_row, sched_nodes);
1874 i--; /* Go back and retry node i. */
1876 if (dump_file)
1877 fprintf (dump_file, "num_splits=%d\n", num_splits);
1880 /* ??? If (success), check register pressure estimates. */
1881 } /* Continue with next node. */
1882 } /* While flush_and_start_over. */
1883 if (ii >= maxii)
1885 free_partial_schedule (ps);
1886 ps = NULL;
1888 else
1889 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1891 sbitmap_free (sched_nodes);
1892 sbitmap_free (must_precede);
1893 sbitmap_free (must_follow);
1894 sbitmap_free (tobe_scheduled);
1896 return ps;
1899 /* This function inserts a new empty row into PS at the position
1900 according to SPLITROW, keeping all already scheduled instructions
1901 intact and updating their SCHED_TIME and cycle accordingly. */
1902 static void
1903 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1904 sbitmap sched_nodes)
1906 ps_insn_ptr crr_insn;
1907 ps_insn_ptr *rows_new;
1908 int ii = ps->ii;
1909 int new_ii = ii + 1;
1910 int row;
1912 verify_partial_schedule (ps, sched_nodes);
1914 /* We normalize sched_time and rotate ps to have only non-negative sched
1915 times, for simplicity of updating cycles after inserting new row. */
1916 split_row -= ps->min_cycle;
1917 split_row = SMODULO (split_row, ii);
1918 if (dump_file)
1919 fprintf (dump_file, "split_row=%d\n", split_row);
1921 reset_sched_times (ps, PS_MIN_CYCLE (ps));
1922 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1924 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1925 for (row = 0; row < split_row; row++)
1927 rows_new[row] = ps->rows[row];
1928 ps->rows[row] = NULL;
1929 for (crr_insn = rows_new[row];
1930 crr_insn; crr_insn = crr_insn->next_in_row)
1932 ddg_node_ptr u = crr_insn->node;
1933 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1935 SCHED_TIME (u) = new_time;
1936 crr_insn->cycle = new_time;
1937 SCHED_ROW (u) = new_time % new_ii;
1938 SCHED_STAGE (u) = new_time / new_ii;
1943 rows_new[split_row] = NULL;
1945 for (row = split_row; row < ii; row++)
1947 rows_new[row + 1] = ps->rows[row];
1948 ps->rows[row] = NULL;
1949 for (crr_insn = rows_new[row + 1];
1950 crr_insn; crr_insn = crr_insn->next_in_row)
1952 ddg_node_ptr u = crr_insn->node;
1953 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1955 SCHED_TIME (u) = new_time;
1956 crr_insn->cycle = new_time;
1957 SCHED_ROW (u) = new_time % new_ii;
1958 SCHED_STAGE (u) = new_time / new_ii;
1962 /* Updating ps. */
1963 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1964 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1965 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1966 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1967 free (ps->rows);
1968 ps->rows = rows_new;
1969 ps->ii = new_ii;
1970 gcc_assert (ps->min_cycle >= 0);
1972 verify_partial_schedule (ps, sched_nodes);
1974 if (dump_file)
1975 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1976 ps->max_cycle);
1979 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1980 UP which are the boundaries of it's scheduling window; compute using
1981 SCHED_NODES and II a row in the partial schedule that can be split
1982 which will separate a critical predecessor from a critical successor
1983 thereby expanding the window, and return it. */
1984 static int
1985 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1986 ddg_node_ptr u_node)
1988 ddg_edge_ptr e;
1989 int lower = INT_MIN, upper = INT_MAX;
1990 ddg_node_ptr crit_pred = NULL;
1991 ddg_node_ptr crit_succ = NULL;
1992 int crit_cycle;
1994 for (e = u_node->in; e != 0; e = e->next_in)
1996 ddg_node_ptr v_node = e->src;
1998 if (TEST_BIT (sched_nodes, v_node->cuid)
1999 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
2000 if (SCHED_TIME (v_node) > lower)
2002 crit_pred = v_node;
2003 lower = SCHED_TIME (v_node);
2007 if (crit_pred != NULL)
2009 crit_cycle = SCHED_TIME (crit_pred) + 1;
2010 return SMODULO (crit_cycle, ii);
2013 for (e = u_node->out; e != 0; e = e->next_out)
2015 ddg_node_ptr v_node = e->dest;
2016 if (TEST_BIT (sched_nodes, v_node->cuid)
2017 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
2018 if (SCHED_TIME (v_node) < upper)
2020 crit_succ = v_node;
2021 upper = SCHED_TIME (v_node);
2025 if (crit_succ != NULL)
2027 crit_cycle = SCHED_TIME (crit_succ);
2028 return SMODULO (crit_cycle, ii);
2031 if (dump_file)
2032 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2034 return SMODULO ((low + up + 1) / 2, ii);
2037 static void
2038 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2040 int row;
2041 ps_insn_ptr crr_insn;
2043 for (row = 0; row < ps->ii; row++)
2044 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2046 ddg_node_ptr u = crr_insn->node;
2048 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2049 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2050 popcount (sched_nodes) == number of insns in ps. */
2051 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2052 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2057 /* This page implements the algorithm for ordering the nodes of a DDG
2058 for modulo scheduling, activated through the
2059 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2061 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2062 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2063 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2064 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2065 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2066 #define DEPTH(x) (ASAP ((x)))
2068 typedef struct node_order_params * nopa;
2070 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2071 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2072 static nopa calculate_order_params (ddg_ptr, int, int *);
2073 static int find_max_asap (ddg_ptr, sbitmap);
2074 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2075 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2077 enum sms_direction {BOTTOMUP, TOPDOWN};
2079 struct node_order_params
2081 int asap;
2082 int alap;
2083 int height;
2086 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2087 static void
2088 check_nodes_order (int *node_order, int num_nodes)
2090 int i;
2091 sbitmap tmp = sbitmap_alloc (num_nodes);
2093 sbitmap_zero (tmp);
2095 if (dump_file)
2096 fprintf (dump_file, "SMS final nodes order: \n");
2098 for (i = 0; i < num_nodes; i++)
2100 int u = node_order[i];
2102 if (dump_file)
2103 fprintf (dump_file, "%d ", u);
2104 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2106 SET_BIT (tmp, u);
2109 if (dump_file)
2110 fprintf (dump_file, "\n");
2112 sbitmap_free (tmp);
2115 /* Order the nodes of G for scheduling and pass the result in
2116 NODE_ORDER. Also set aux.count of each node to ASAP.
2117 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2118 static int
2119 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2121 int i;
2122 int rec_mii = 0;
2123 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2125 nopa nops = calculate_order_params (g, mii, pmax_asap);
2127 if (dump_file)
2128 print_sccs (dump_file, sccs, g);
2130 order_nodes_of_sccs (sccs, node_order);
2132 if (sccs->num_sccs > 0)
2133 /* First SCC has the largest recurrence_length. */
2134 rec_mii = sccs->sccs[0]->recurrence_length;
2136 /* Save ASAP before destroying node_order_params. */
2137 for (i = 0; i < g->num_nodes; i++)
2139 ddg_node_ptr v = &g->nodes[i];
2140 v->aux.count = ASAP (v);
2143 free (nops);
2144 free_ddg_all_sccs (sccs);
2145 check_nodes_order (node_order, g->num_nodes);
2147 return rec_mii;
2150 static void
2151 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2153 int i, pos = 0;
2154 ddg_ptr g = all_sccs->ddg;
2155 int num_nodes = g->num_nodes;
2156 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2157 sbitmap on_path = sbitmap_alloc (num_nodes);
2158 sbitmap tmp = sbitmap_alloc (num_nodes);
2159 sbitmap ones = sbitmap_alloc (num_nodes);
2161 sbitmap_zero (prev_sccs);
2162 sbitmap_ones (ones);
2164 /* Perform the node ordering starting from the SCC with the highest recMII.
2165 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2166 for (i = 0; i < all_sccs->num_sccs; i++)
2168 ddg_scc_ptr scc = all_sccs->sccs[i];
2170 /* Add nodes on paths from previous SCCs to the current SCC. */
2171 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2172 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2174 /* Add nodes on paths from the current SCC to previous SCCs. */
2175 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2176 sbitmap_a_or_b (tmp, tmp, on_path);
2178 /* Remove nodes of previous SCCs from current extended SCC. */
2179 sbitmap_difference (tmp, tmp, prev_sccs);
2181 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2182 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2185 /* Handle the remaining nodes that do not belong to any scc. Each call
2186 to order_nodes_in_scc handles a single connected component. */
2187 while (pos < g->num_nodes)
2189 sbitmap_difference (tmp, ones, prev_sccs);
2190 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2192 sbitmap_free (prev_sccs);
2193 sbitmap_free (on_path);
2194 sbitmap_free (tmp);
2195 sbitmap_free (ones);
2198 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2199 static struct node_order_params *
2200 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2202 int u;
2203 int max_asap;
2204 int num_nodes = g->num_nodes;
2205 ddg_edge_ptr e;
2206 /* Allocate a place to hold ordering params for each node in the DDG. */
2207 nopa node_order_params_arr;
2209 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2210 node_order_params_arr = (nopa) xcalloc (num_nodes,
2211 sizeof (struct node_order_params));
2213 /* Set the aux pointer of each node to point to its order_params structure. */
2214 for (u = 0; u < num_nodes; u++)
2215 g->nodes[u].aux.info = &node_order_params_arr[u];
2217 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2218 calculate ASAP, ALAP, mobility, distance, and height for each node
2219 in the dependence (direct acyclic) graph. */
2221 /* We assume that the nodes in the array are in topological order. */
2223 max_asap = 0;
2224 for (u = 0; u < num_nodes; u++)
2226 ddg_node_ptr u_node = &g->nodes[u];
2228 ASAP (u_node) = 0;
2229 for (e = u_node->in; e; e = e->next_in)
2230 if (e->distance == 0)
2231 ASAP (u_node) = MAX (ASAP (u_node),
2232 ASAP (e->src) + e->latency);
2233 max_asap = MAX (max_asap, ASAP (u_node));
2236 for (u = num_nodes - 1; u > -1; u--)
2238 ddg_node_ptr u_node = &g->nodes[u];
2240 ALAP (u_node) = max_asap;
2241 HEIGHT (u_node) = 0;
2242 for (e = u_node->out; e; e = e->next_out)
2243 if (e->distance == 0)
2245 ALAP (u_node) = MIN (ALAP (u_node),
2246 ALAP (e->dest) - e->latency);
2247 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2248 HEIGHT (e->dest) + e->latency);
2251 if (dump_file)
2253 fprintf (dump_file, "\nOrder params\n");
2254 for (u = 0; u < num_nodes; u++)
2256 ddg_node_ptr u_node = &g->nodes[u];
2258 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2259 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2263 *pmax_asap = max_asap;
2264 return node_order_params_arr;
2267 static int
2268 find_max_asap (ddg_ptr g, sbitmap nodes)
2270 unsigned int u = 0;
2271 int max_asap = -1;
2272 int result = -1;
2273 sbitmap_iterator sbi;
2275 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2277 ddg_node_ptr u_node = &g->nodes[u];
2279 if (max_asap < ASAP (u_node))
2281 max_asap = ASAP (u_node);
2282 result = u;
2285 return result;
2288 static int
2289 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2291 unsigned int u = 0;
2292 int max_hv = -1;
2293 int min_mob = INT_MAX;
2294 int result = -1;
2295 sbitmap_iterator sbi;
2297 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2299 ddg_node_ptr u_node = &g->nodes[u];
2301 if (max_hv < HEIGHT (u_node))
2303 max_hv = HEIGHT (u_node);
2304 min_mob = MOB (u_node);
2305 result = u;
2307 else if ((max_hv == HEIGHT (u_node))
2308 && (min_mob > MOB (u_node)))
2310 min_mob = MOB (u_node);
2311 result = u;
2314 return result;
2317 static int
2318 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2320 unsigned int u = 0;
2321 int max_dv = -1;
2322 int min_mob = INT_MAX;
2323 int result = -1;
2324 sbitmap_iterator sbi;
2326 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2328 ddg_node_ptr u_node = &g->nodes[u];
2330 if (max_dv < DEPTH (u_node))
2332 max_dv = DEPTH (u_node);
2333 min_mob = MOB (u_node);
2334 result = u;
2336 else if ((max_dv == DEPTH (u_node))
2337 && (min_mob > MOB (u_node)))
2339 min_mob = MOB (u_node);
2340 result = u;
2343 return result;
2346 /* Places the nodes of SCC into the NODE_ORDER array starting
2347 at position POS, according to the SMS ordering algorithm.
2348 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2349 the NODE_ORDER array, starting from position zero. */
2350 static int
2351 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2352 int * node_order, int pos)
2354 enum sms_direction dir;
2355 int num_nodes = g->num_nodes;
2356 sbitmap workset = sbitmap_alloc (num_nodes);
2357 sbitmap tmp = sbitmap_alloc (num_nodes);
2358 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2359 sbitmap predecessors = sbitmap_alloc (num_nodes);
2360 sbitmap successors = sbitmap_alloc (num_nodes);
2362 sbitmap_zero (predecessors);
2363 find_predecessors (predecessors, g, nodes_ordered);
2365 sbitmap_zero (successors);
2366 find_successors (successors, g, nodes_ordered);
2368 sbitmap_zero (tmp);
2369 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2371 sbitmap_copy (workset, tmp);
2372 dir = BOTTOMUP;
2374 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2376 sbitmap_copy (workset, tmp);
2377 dir = TOPDOWN;
2379 else
2381 int u;
2383 sbitmap_zero (workset);
2384 if ((u = find_max_asap (g, scc)) >= 0)
2385 SET_BIT (workset, u);
2386 dir = BOTTOMUP;
2389 sbitmap_zero (zero_bitmap);
2390 while (!sbitmap_equal (workset, zero_bitmap))
2392 int v;
2393 ddg_node_ptr v_node;
2394 sbitmap v_node_preds;
2395 sbitmap v_node_succs;
2397 if (dir == TOPDOWN)
2399 while (!sbitmap_equal (workset, zero_bitmap))
2401 v = find_max_hv_min_mob (g, workset);
2402 v_node = &g->nodes[v];
2403 node_order[pos++] = v;
2404 v_node_succs = NODE_SUCCESSORS (v_node);
2405 sbitmap_a_and_b (tmp, v_node_succs, scc);
2407 /* Don't consider the already ordered successors again. */
2408 sbitmap_difference (tmp, tmp, nodes_ordered);
2409 sbitmap_a_or_b (workset, workset, tmp);
2410 RESET_BIT (workset, v);
2411 SET_BIT (nodes_ordered, v);
2413 dir = BOTTOMUP;
2414 sbitmap_zero (predecessors);
2415 find_predecessors (predecessors, g, nodes_ordered);
2416 sbitmap_a_and_b (workset, predecessors, scc);
2418 else
2420 while (!sbitmap_equal (workset, zero_bitmap))
2422 v = find_max_dv_min_mob (g, workset);
2423 v_node = &g->nodes[v];
2424 node_order[pos++] = v;
2425 v_node_preds = NODE_PREDECESSORS (v_node);
2426 sbitmap_a_and_b (tmp, v_node_preds, scc);
2428 /* Don't consider the already ordered predecessors again. */
2429 sbitmap_difference (tmp, tmp, nodes_ordered);
2430 sbitmap_a_or_b (workset, workset, tmp);
2431 RESET_BIT (workset, v);
2432 SET_BIT (nodes_ordered, v);
2434 dir = TOPDOWN;
2435 sbitmap_zero (successors);
2436 find_successors (successors, g, nodes_ordered);
2437 sbitmap_a_and_b (workset, successors, scc);
2440 sbitmap_free (tmp);
2441 sbitmap_free (workset);
2442 sbitmap_free (zero_bitmap);
2443 sbitmap_free (predecessors);
2444 sbitmap_free (successors);
2445 return pos;
2449 /* This page contains functions for manipulating partial-schedules during
2450 modulo scheduling. */
2452 /* Create a partial schedule and allocate a memory to hold II rows. */
2454 static partial_schedule_ptr
2455 create_partial_schedule (int ii, ddg_ptr g, int history)
2457 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2458 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2459 ps->ii = ii;
2460 ps->history = history;
2461 ps->min_cycle = INT_MAX;
2462 ps->max_cycle = INT_MIN;
2463 ps->g = g;
2465 return ps;
2468 /* Free the PS_INSNs in rows array of the given partial schedule.
2469 ??? Consider caching the PS_INSN's. */
2470 static void
2471 free_ps_insns (partial_schedule_ptr ps)
2473 int i;
2475 for (i = 0; i < ps->ii; i++)
2477 while (ps->rows[i])
2479 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2481 free (ps->rows[i]);
2482 ps->rows[i] = ps_insn;
2484 ps->rows[i] = NULL;
2488 /* Free all the memory allocated to the partial schedule. */
2490 static void
2491 free_partial_schedule (partial_schedule_ptr ps)
2493 if (!ps)
2494 return;
2495 free_ps_insns (ps);
2496 free (ps->rows);
2497 free (ps);
2500 /* Clear the rows array with its PS_INSNs, and create a new one with
2501 NEW_II rows. */
2503 static void
2504 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2506 if (!ps)
2507 return;
2508 free_ps_insns (ps);
2509 if (new_ii == ps->ii)
2510 return;
2511 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2512 * sizeof (ps_insn_ptr));
2513 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2514 ps->ii = new_ii;
2515 ps->min_cycle = INT_MAX;
2516 ps->max_cycle = INT_MIN;
2519 /* Prints the partial schedule as an ii rows array, for each rows
2520 print the ids of the insns in it. */
2521 void
2522 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2524 int i;
2526 for (i = 0; i < ps->ii; i++)
2528 ps_insn_ptr ps_i = ps->rows[i];
2530 fprintf (dump, "\n[ROW %d ]: ", i);
2531 while (ps_i)
2533 fprintf (dump, "%d, ",
2534 INSN_UID (ps_i->node->insn));
2535 ps_i = ps_i->next_in_row;
2540 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2541 static ps_insn_ptr
2542 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2544 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2546 ps_i->node = node;
2547 ps_i->next_in_row = NULL;
2548 ps_i->prev_in_row = NULL;
2549 ps_i->row_rest_count = rest_count;
2550 ps_i->cycle = cycle;
2552 return ps_i;
2556 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2557 node is not found in the partial schedule, else returns true. */
2558 static bool
2559 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2561 int row;
2563 if (!ps || !ps_i)
2564 return false;
2566 row = SMODULO (ps_i->cycle, ps->ii);
2567 if (! ps_i->prev_in_row)
2569 if (ps_i != ps->rows[row])
2570 return false;
2572 ps->rows[row] = ps_i->next_in_row;
2573 if (ps->rows[row])
2574 ps->rows[row]->prev_in_row = NULL;
2576 else
2578 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2579 if (ps_i->next_in_row)
2580 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2582 free (ps_i);
2583 return true;
2586 /* Unlike what literature describes for modulo scheduling (which focuses
2587 on VLIW machines) the order of the instructions inside a cycle is
2588 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2589 where the current instruction should go relative to the already
2590 scheduled instructions in the given cycle. Go over these
2591 instructions and find the first possible column to put it in. */
2592 static bool
2593 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2594 sbitmap must_precede, sbitmap must_follow)
2596 ps_insn_ptr next_ps_i;
2597 ps_insn_ptr first_must_follow = NULL;
2598 ps_insn_ptr last_must_precede = NULL;
2599 ps_insn_ptr last_in_row = NULL;
2600 int row;
2602 if (! ps_i)
2603 return false;
2605 row = SMODULO (ps_i->cycle, ps->ii);
2607 /* Find the first must follow and the last must precede
2608 and insert the node immediately after the must precede
2609 but make sure that it there is no must follow after it. */
2610 for (next_ps_i = ps->rows[row];
2611 next_ps_i;
2612 next_ps_i = next_ps_i->next_in_row)
2614 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2615 && ! first_must_follow)
2616 first_must_follow = next_ps_i;
2617 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2619 /* If we have already met a node that must follow, then
2620 there is no possible column. */
2621 if (first_must_follow)
2622 return false;
2623 else
2624 last_must_precede = next_ps_i;
2626 /* The closing branch must be the last in the row. */
2627 if (must_precede
2628 && TEST_BIT (must_precede, next_ps_i->node->cuid)
2629 && JUMP_P (next_ps_i->node->insn))
2630 return false;
2632 last_in_row = next_ps_i;
2635 /* The closing branch is scheduled as well. Make sure there is no
2636 dependent instruction after it as the branch should be the last
2637 instruction in the row. */
2638 if (JUMP_P (ps_i->node->insn))
2640 if (first_must_follow)
2641 return false;
2642 if (last_in_row)
2644 /* Make the branch the last in the row. New instructions
2645 will be inserted at the beginning of the row or after the
2646 last must_precede instruction thus the branch is guaranteed
2647 to remain the last instruction in the row. */
2648 last_in_row->next_in_row = ps_i;
2649 ps_i->prev_in_row = last_in_row;
2650 ps_i->next_in_row = NULL;
2652 else
2653 ps->rows[row] = ps_i;
2654 return true;
2657 /* Now insert the node after INSERT_AFTER_PSI. */
2659 if (! last_must_precede)
2661 ps_i->next_in_row = ps->rows[row];
2662 ps_i->prev_in_row = NULL;
2663 if (ps_i->next_in_row)
2664 ps_i->next_in_row->prev_in_row = ps_i;
2665 ps->rows[row] = ps_i;
2667 else
2669 ps_i->next_in_row = last_must_precede->next_in_row;
2670 last_must_precede->next_in_row = ps_i;
2671 ps_i->prev_in_row = last_must_precede;
2672 if (ps_i->next_in_row)
2673 ps_i->next_in_row->prev_in_row = ps_i;
2676 return true;
2679 /* Advances the PS_INSN one column in its current row; returns false
2680 in failure and true in success. Bit N is set in MUST_FOLLOW if
2681 the node with cuid N must be come after the node pointed to by
2682 PS_I when scheduled in the same cycle. */
2683 static int
2684 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2685 sbitmap must_follow)
2687 ps_insn_ptr prev, next;
2688 int row;
2689 ddg_node_ptr next_node;
2691 if (!ps || !ps_i)
2692 return false;
2694 row = SMODULO (ps_i->cycle, ps->ii);
2696 if (! ps_i->next_in_row)
2697 return false;
2699 next_node = ps_i->next_in_row->node;
2701 /* Check if next_in_row is dependent on ps_i, both having same sched
2702 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2703 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2704 return false;
2706 /* Advance PS_I over its next_in_row in the doubly linked list. */
2707 prev = ps_i->prev_in_row;
2708 next = ps_i->next_in_row;
2710 if (ps_i == ps->rows[row])
2711 ps->rows[row] = next;
2713 ps_i->next_in_row = next->next_in_row;
2715 if (next->next_in_row)
2716 next->next_in_row->prev_in_row = ps_i;
2718 next->next_in_row = ps_i;
2719 ps_i->prev_in_row = next;
2721 next->prev_in_row = prev;
2722 if (prev)
2723 prev->next_in_row = next;
2725 return true;
2728 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2729 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2730 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2731 before/after (respectively) the node pointed to by PS_I when scheduled
2732 in the same cycle. */
2733 static ps_insn_ptr
2734 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2735 sbitmap must_precede, sbitmap must_follow)
2737 ps_insn_ptr ps_i;
2738 int rest_count = 1;
2739 int row = SMODULO (cycle, ps->ii);
2741 if (ps->rows[row]
2742 && ps->rows[row]->row_rest_count >= issue_rate)
2743 return NULL;
2745 if (ps->rows[row])
2746 rest_count += ps->rows[row]->row_rest_count;
2748 ps_i = create_ps_insn (node, rest_count, cycle);
2750 /* Finds and inserts PS_I according to MUST_FOLLOW and
2751 MUST_PRECEDE. */
2752 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2754 free (ps_i);
2755 return NULL;
2758 return ps_i;
2761 /* Advance time one cycle. Assumes DFA is being used. */
2762 static void
2763 advance_one_cycle (void)
2765 if (targetm.sched.dfa_pre_cycle_insn)
2766 state_transition (curr_state,
2767 targetm.sched.dfa_pre_cycle_insn ());
2769 state_transition (curr_state, NULL);
2771 if (targetm.sched.dfa_post_cycle_insn)
2772 state_transition (curr_state,
2773 targetm.sched.dfa_post_cycle_insn ());
2778 /* Checks if PS has resource conflicts according to DFA, starting from
2779 FROM cycle to TO cycle; returns true if there are conflicts and false
2780 if there are no conflicts. Assumes DFA is being used. */
2781 static int
2782 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2784 int cycle;
2786 state_reset (curr_state);
2788 for (cycle = from; cycle <= to; cycle++)
2790 ps_insn_ptr crr_insn;
2791 /* Holds the remaining issue slots in the current row. */
2792 int can_issue_more = issue_rate;
2794 /* Walk through the DFA for the current row. */
2795 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2796 crr_insn;
2797 crr_insn = crr_insn->next_in_row)
2799 rtx insn = crr_insn->node->insn;
2801 if (!NONDEBUG_INSN_P (insn))
2802 continue;
2804 /* Check if there is room for the current insn. */
2805 if (!can_issue_more || state_dead_lock_p (curr_state))
2806 return true;
2808 /* Update the DFA state and return with failure if the DFA found
2809 resource conflicts. */
2810 if (state_transition (curr_state, insn) >= 0)
2811 return true;
2813 if (targetm.sched.variable_issue)
2814 can_issue_more =
2815 targetm.sched.variable_issue (sched_dump, sched_verbose,
2816 insn, can_issue_more);
2817 /* A naked CLOBBER or USE generates no instruction, so don't
2818 let them consume issue slots. */
2819 else if (GET_CODE (PATTERN (insn)) != USE
2820 && GET_CODE (PATTERN (insn)) != CLOBBER)
2821 can_issue_more--;
2824 /* Advance the DFA to the next cycle. */
2825 advance_one_cycle ();
2827 return false;
2830 /* Checks if the given node causes resource conflicts when added to PS at
2831 cycle C. If not the node is added to PS and returned; otherwise zero
2832 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2833 cuid N must be come before/after (respectively) the node pointed to by
2834 PS_I when scheduled in the same cycle. */
2835 ps_insn_ptr
2836 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2837 int c, sbitmap must_precede,
2838 sbitmap must_follow)
2840 int has_conflicts = 0;
2841 ps_insn_ptr ps_i;
2843 /* First add the node to the PS, if this succeeds check for
2844 conflicts, trying different issue slots in the same row. */
2845 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2846 return NULL; /* Failed to insert the node at the given cycle. */
2848 has_conflicts = ps_has_conflicts (ps, c, c)
2849 || (ps->history > 0
2850 && ps_has_conflicts (ps,
2851 c - ps->history,
2852 c + ps->history));
2854 /* Try different issue slots to find one that the given node can be
2855 scheduled in without conflicts. */
2856 while (has_conflicts)
2858 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2859 break;
2860 has_conflicts = ps_has_conflicts (ps, c, c)
2861 || (ps->history > 0
2862 && ps_has_conflicts (ps,
2863 c - ps->history,
2864 c + ps->history));
2867 if (has_conflicts)
2869 remove_node_from_ps (ps, ps_i);
2870 return NULL;
2873 ps->min_cycle = MIN (ps->min_cycle, c);
2874 ps->max_cycle = MAX (ps->max_cycle, c);
2875 return ps_i;
2878 /* Calculate the stage count of the partial schedule PS. The calculation
2879 takes into account the rotation to bring the closing branch to row
2880 ii-1. */
2882 calculate_stage_count (partial_schedule_ptr ps)
2884 int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
2885 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
2886 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
2887 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
2889 /* The calculation of stage count is done adding the number of stages
2890 before cycle zero and after cycle zero. */
2891 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
2893 return stage_count;
2896 /* Rotate the rows of PS such that insns scheduled at time
2897 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2898 void
2899 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2901 int i, row, backward_rotates;
2902 int last_row = ps->ii - 1;
2904 if (start_cycle == 0)
2905 return;
2907 backward_rotates = SMODULO (start_cycle, ps->ii);
2909 /* Revisit later and optimize this into a single loop. */
2910 for (i = 0; i < backward_rotates; i++)
2912 ps_insn_ptr first_row = ps->rows[0];
2914 for (row = 0; row < last_row; row++)
2915 ps->rows[row] = ps->rows[row+1];
2917 ps->rows[last_row] = first_row;
2920 ps->max_cycle -= start_cycle;
2921 ps->min_cycle -= start_cycle;
2924 #endif /* INSN_SCHEDULING */
2926 static bool
2927 gate_handle_sms (void)
2929 return (optimize > 0 && flag_modulo_sched);
2933 /* Run instruction scheduler. */
2934 /* Perform SMS module scheduling. */
2935 static unsigned int
2936 rest_of_handle_sms (void)
2938 #ifdef INSN_SCHEDULING
2939 basic_block bb;
2941 /* Collect loop information to be used in SMS. */
2942 cfg_layout_initialize (0);
2943 sms_schedule ();
2945 /* Update the life information, because we add pseudos. */
2946 max_regno = max_reg_num ();
2948 /* Finalize layout changes. */
2949 FOR_EACH_BB (bb)
2950 if (bb->next_bb != EXIT_BLOCK_PTR)
2951 bb->aux = bb->next_bb;
2952 free_dominance_info (CDI_DOMINATORS);
2953 cfg_layout_finalize ();
2954 #endif /* INSN_SCHEDULING */
2955 return 0;
2958 struct rtl_opt_pass pass_sms =
2961 RTL_PASS,
2962 "sms", /* name */
2963 gate_handle_sms, /* gate */
2964 rest_of_handle_sms, /* execute */
2965 NULL, /* sub */
2966 NULL, /* next */
2967 0, /* static_pass_number */
2968 TV_SMS, /* tv_id */
2969 0, /* properties_required */
2970 0, /* properties_provided */
2971 0, /* properties_destroyed */
2972 TODO_dump_func, /* todo_flags_start */
2973 TODO_df_finish
2974 | TODO_verify_flow
2975 | TODO_verify_rtl_sharing
2976 | TODO_dump_func
2977 | TODO_ggc_collect /* todo_flags_finish */