EnumSet*.class: Regenerate
[official-gcc.git] / gcc / modulo-sched.c
blob7e9f6aa1a693cb60bd695ecdb66d74d335e64dc0
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
86 SMS works with countable loops (1) whose control part can be easily
87 decoupled from the rest of the loop and (2) whose loop count can
88 be easily adjusted. This is because we peel a constant number of
89 iterations into a prologue and epilogue for which we want to avoid
90 emitting the control part, and a kernel which is to iterate that
91 constant number of iterations less than the original loop. So the
92 control part should be a set of insns clearly identified and having
93 its own iv, not otherwise used in the loop (at-least for now), which
94 initializes a register before the loop to the number of iterations.
95 Currently SMS relies on the do-loop pattern to recognize such loops,
96 where (1) the control part comprises of all insns defining and/or
97 using a certain 'count' register and (2) the loop count can be
98 adjusted by modifying this register prior to the loop.
99 TODO: Rely on cfgloop analysis instead. */
101 /* This page defines partial-schedule structures and functions for
102 modulo scheduling. */
104 typedef struct partial_schedule *partial_schedule_ptr;
105 typedef struct ps_insn *ps_insn_ptr;
107 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
108 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
111 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113 /* Perform signed modulo, always returning a non-negative value. */
114 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116 /* The number of different iterations the nodes in ps span, assuming
117 the stage boundaries are placed efficiently. */
118 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
119 + 1 + (ps)->ii - 1) / (ps)->ii)
121 /* A single instruction in the partial schedule. */
122 struct ps_insn
124 /* The corresponding DDG_NODE. */
125 ddg_node_ptr node;
127 /* The (absolute) cycle in which the PS instruction is scheduled.
128 Same as SCHED_TIME (node). */
129 int cycle;
131 /* The next/prev PS_INSN in the same row. */
132 ps_insn_ptr next_in_row,
133 prev_in_row;
135 /* The number of nodes in the same row that come after this node. */
136 int row_rest_count;
139 /* Holds the partial schedule as an array of II rows. Each entry of the
140 array points to a linked list of PS_INSNs, which represents the
141 instructions that are scheduled for that row. */
142 struct partial_schedule
144 int ii; /* Number of rows in the partial schedule. */
145 int history; /* Threshold for conflict checking using DFA. */
147 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
148 ps_insn_ptr *rows;
150 /* The earliest absolute cycle of an insn in the partial schedule. */
151 int min_cycle;
153 /* The latest absolute cycle of an insn in the partial schedule. */
154 int max_cycle;
156 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
159 /* We use this to record all the register replacements we do in
160 the kernel so we can undo SMS if it is not profitable. */
161 struct undo_replace_buff_elem
163 rtx insn;
164 rtx orig_reg;
165 rtx new_reg;
166 struct undo_replace_buff_elem *next;
171 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
172 static void free_partial_schedule (partial_schedule_ptr);
173 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
174 void print_partial_schedule (partial_schedule_ptr, FILE *);
175 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
176 ddg_node_ptr node, int cycle,
177 sbitmap must_precede,
178 sbitmap must_follow);
179 static void rotate_partial_schedule (partial_schedule_ptr, int);
180 void set_row_column_for_ps (partial_schedule_ptr);
181 static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
184 /* This page defines constants and structures for the modulo scheduling
185 driver. */
187 /* As in haifa-sched.c: */
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
194 static int sms_order_nodes (ddg_ptr, int, int * result);
195 static void set_node_sched_params (ddg_ptr);
196 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
197 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
198 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *loop,
199 rtx, rtx);
200 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
201 int from_stage, int to_stage,
202 int is_prolog, rtx count_reg);
204 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
205 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
206 #define SCHED_FIRST_REG_MOVE(x) \
207 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
208 #define SCHED_NREG_MOVES(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
210 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
211 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
212 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
214 /* The scheduling parameters held for each node. */
215 typedef struct node_sched_params
217 int asap; /* A lower-bound on the absolute scheduling cycle. */
218 int time; /* The absolute scheduling cycle (time >= asap). */
220 /* The following field (first_reg_move) is a pointer to the first
221 register-move instruction added to handle the modulo-variable-expansion
222 of the register defined by this node. This register-move copies the
223 original register defined by the node. */
224 rtx first_reg_move;
226 /* The number of register-move instructions added, immediately preceding
227 first_reg_move. */
228 int nreg_moves;
230 int row; /* Holds time % ii. */
231 int stage; /* Holds time / ii. */
233 /* The column of a node inside the ps. If nodes u, v are on the same row,
234 u will precede v if column (u) < column (v). */
235 int column;
236 } *node_sched_params_ptr;
239 /* The following three functions are copied from the current scheduler
240 code in order to use sched_analyze() for computing the dependencies.
241 They are used when initializing the sched_info structure. */
242 static const char *
243 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
245 static char tmp[80];
247 sprintf (tmp, "i%4d", INSN_UID (insn));
248 return tmp;
251 static void
252 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
253 regset cond_exec ATTRIBUTE_UNUSED,
254 regset used ATTRIBUTE_UNUSED,
255 regset set ATTRIBUTE_UNUSED)
259 static struct sched_info sms_sched_info =
261 NULL,
262 NULL,
263 NULL,
264 NULL,
265 NULL,
266 sms_print_insn,
267 NULL,
268 compute_jump_reg_dependencies,
269 NULL, NULL,
270 NULL, NULL,
271 0, 0, 0,
273 NULL, NULL, NULL, NULL, NULL,
278 /* Given HEAD and TAIL which are the first and last insns in a loop;
279 return the register which controls the loop. Return zero if it has
280 more than one occurrence in the loop besides the control part or the
281 do-loop pattern is not of the form we expect. */
282 static rtx
283 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
285 #ifdef HAVE_doloop_end
286 rtx reg, condition, insn;
287 bool found = false;
289 if (!JUMP_P (tail))
290 return NULL_RTX;
292 /* TODO: Free SMS's dependence on doloop_condition_get. */
293 condition = doloop_condition_get (tail);
294 if (! condition)
295 return NULL_RTX;
297 if (REG_P (XEXP (condition, 0)))
298 reg = XEXP (condition, 0);
299 else if (GET_CODE (XEXP (condition, 0)) == PLUS
300 && REG_P (XEXP (XEXP (condition, 0), 0)))
301 reg = XEXP (XEXP (condition, 0), 0);
302 else
303 gcc_unreachable ();
305 /* Check that the COUNT_REG has no other occurrences in the loop
306 until the decrement. We assume the control part consists of
307 either a single (parallel) branch-on-count or a (non-parallel)
308 branch immediately preceded by a single (decrement) insn. */
309 for (insn = head; insn != PREV_INSN (tail); insn = NEXT_INSN (insn))
310 if ((found = reg_mentioned_p (reg, insn)) == true)
311 break;
312 if (found)
314 if (dump_file)
315 fprintf (dump_file, "SMS count_reg found outside control\n");
317 return NULL_RTX;
319 /* One last check in case the do-loop pattern is parallel. */
320 if (GET_CODE (PATTERN (tail)) == PARALLEL)
321 if (reg_mentioned_p (reg, PREV_INSN (tail)))
323 if (dump_file)
324 fprintf (dump_file, "SMS count_reg found outside control\n");
326 return NULL_RTX;
328 return reg;
329 #else
330 return NULL_RTX;
331 #endif
334 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
335 that the number of iterations is a compile-time constant. If so,
336 return the rtx that sets COUNT_REG to a constant, and set COUNT to
337 this constant. Otherwise return 0. */
338 static rtx
339 const_iteration_count (rtx count_reg, basic_block pre_header,
340 HOST_WIDEST_INT * count)
342 rtx insn;
343 rtx head, tail;
345 if (! pre_header)
346 return NULL_RTX;
348 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
350 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
351 if (INSN_P (insn) && single_set (insn) &&
352 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
354 rtx pat = single_set (insn);
356 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
358 *count = INTVAL (SET_SRC (pat));
359 return insn;
362 return NULL_RTX;
365 return NULL_RTX;
368 /* A very simple resource-based lower bound on the initiation interval.
369 ??? Improve the accuracy of this bound by considering the
370 utilization of various units. */
371 static int
372 res_MII (ddg_ptr g)
374 return (g->num_nodes / issue_rate);
378 /* Points to the array that contains the sched data for each node. */
379 static node_sched_params_ptr node_sched_params;
381 /* Allocate sched_params for each node and initialize it. Assumes that
382 the aux field of each node contain the asap bound (computed earlier),
383 and copies it into the sched_params field. */
384 static void
385 set_node_sched_params (ddg_ptr g)
387 int i;
389 /* Allocate for each node in the DDG a place to hold the "sched_data". */
390 /* Initialize ASAP/ALAP/HIGHT to zero. */
391 node_sched_params = (node_sched_params_ptr)
392 xcalloc (g->num_nodes,
393 sizeof (struct node_sched_params));
395 /* Set the pointer of the general data of the node to point to the
396 appropriate sched_params structure. */
397 for (i = 0; i < g->num_nodes; i++)
399 /* Watch out for aliasing problems? */
400 node_sched_params[i].asap = g->nodes[i].aux.count;
401 g->nodes[i].aux.info = &node_sched_params[i];
405 static void
406 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
408 int i;
410 if (! file)
411 return;
412 for (i = 0; i < num_nodes; i++)
414 node_sched_params_ptr nsp = &node_sched_params[i];
415 rtx reg_move = nsp->first_reg_move;
416 int j;
418 fprintf (file, "Node = %d; INSN = %d\n", i,
419 (INSN_UID (g->nodes[i].insn)));
420 fprintf (file, " asap = %d:\n", nsp->asap);
421 fprintf (file, " time = %d:\n", nsp->time);
422 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
423 for (j = 0; j < nsp->nreg_moves; j++)
425 fprintf (file, " reg_move = ");
426 print_rtl_single (file, reg_move);
427 reg_move = PREV_INSN (reg_move);
433 Breaking intra-loop register anti-dependences:
434 Each intra-loop register anti-dependence implies a cross-iteration true
435 dependence of distance 1. Therefore, we can remove such false dependencies
436 and figure out if the partial schedule broke them by checking if (for a
437 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
438 if so generate a register move. The number of such moves is equal to:
439 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
440 nreg_moves = ----------------------------------- + 1 - { dependence.
441 ii { 1 if not.
443 static struct undo_replace_buff_elem *
444 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
446 ddg_ptr g = ps->g;
447 int ii = ps->ii;
448 int i;
449 struct undo_replace_buff_elem *reg_move_replaces = NULL;
451 for (i = 0; i < g->num_nodes; i++)
453 ddg_node_ptr u = &g->nodes[i];
454 ddg_edge_ptr e;
455 int nreg_moves = 0, i_reg_move;
456 sbitmap *uses_of_defs;
457 rtx last_reg_move;
458 rtx prev_reg, old_reg;
460 /* Compute the number of reg_moves needed for u, by looking at life
461 ranges started at u (excluding self-loops). */
462 for (e = u->out; e; e = e->next_out)
463 if (e->type == TRUE_DEP && e->dest != e->src)
465 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
467 if (e->distance == 1)
468 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
470 /* If dest precedes src in the schedule of the kernel, then dest
471 will read before src writes and we can save one reg_copy. */
472 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
473 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
474 nreg_moves4e--;
476 nreg_moves = MAX (nreg_moves, nreg_moves4e);
479 if (nreg_moves == 0)
480 continue;
482 /* Every use of the register defined by node may require a different
483 copy of this register, depending on the time the use is scheduled.
484 Set a bitmap vector, telling which nodes use each copy of this
485 register. */
486 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
487 sbitmap_vector_zero (uses_of_defs, nreg_moves);
488 for (e = u->out; e; e = e->next_out)
489 if (e->type == TRUE_DEP && e->dest != e->src)
491 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
493 if (e->distance == 1)
494 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
496 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
497 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
498 dest_copy--;
500 if (dest_copy)
501 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
504 /* Now generate the reg_moves, attaching relevant uses to them. */
505 SCHED_NREG_MOVES (u) = nreg_moves;
506 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
507 last_reg_move = u->insn;
509 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
511 unsigned int i_use = 0;
512 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
513 rtx reg_move = gen_move_insn (new_reg, prev_reg);
514 sbitmap_iterator sbi;
516 add_insn_before (reg_move, last_reg_move, NULL);
517 last_reg_move = reg_move;
519 if (!SCHED_FIRST_REG_MOVE (u))
520 SCHED_FIRST_REG_MOVE (u) = reg_move;
522 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
524 struct undo_replace_buff_elem *rep;
526 rep = (struct undo_replace_buff_elem *)
527 xcalloc (1, sizeof (struct undo_replace_buff_elem));
528 rep->insn = g->nodes[i_use].insn;
529 rep->orig_reg = old_reg;
530 rep->new_reg = new_reg;
532 if (! reg_move_replaces)
533 reg_move_replaces = rep;
534 else
536 rep->next = reg_move_replaces;
537 reg_move_replaces = rep;
540 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
541 if (rescan)
542 df_insn_rescan (g->nodes[i_use].insn);
545 prev_reg = new_reg;
547 sbitmap_vector_free (uses_of_defs);
549 return reg_move_replaces;
552 /* Free memory allocated for the undo buffer. */
553 static void
554 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
557 while (reg_move_replaces)
559 struct undo_replace_buff_elem *rep = reg_move_replaces;
561 reg_move_replaces = reg_move_replaces->next;
562 free (rep);
566 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
567 of SCHED_ROW and SCHED_STAGE. */
568 static void
569 normalize_sched_times (partial_schedule_ptr ps)
571 int i;
572 ddg_ptr g = ps->g;
573 int amount = PS_MIN_CYCLE (ps);
574 int ii = ps->ii;
576 /* Don't include the closing branch assuming that it is the last node. */
577 for (i = 0; i < g->num_nodes - 1; i++)
579 ddg_node_ptr u = &g->nodes[i];
580 int normalized_time = SCHED_TIME (u) - amount;
582 gcc_assert (normalized_time >= 0);
584 SCHED_TIME (u) = normalized_time;
585 SCHED_ROW (u) = normalized_time % ii;
586 SCHED_STAGE (u) = normalized_time / ii;
590 /* Set SCHED_COLUMN of each node according to its position in PS. */
591 static void
592 set_columns_for_ps (partial_schedule_ptr ps)
594 int row;
596 for (row = 0; row < ps->ii; row++)
598 ps_insn_ptr cur_insn = ps->rows[row];
599 int column = 0;
601 for (; cur_insn; cur_insn = cur_insn->next_in_row)
602 SCHED_COLUMN (cur_insn->node) = column++;
606 /* Permute the insns according to their order in PS, from row 0 to
607 row ii-1, and position them right before LAST. This schedules
608 the insns of the loop kernel. */
609 static void
610 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
612 int ii = ps->ii;
613 int row;
614 ps_insn_ptr ps_ij;
616 for (row = 0; row < ii ; row++)
617 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
618 if (PREV_INSN (last) != ps_ij->node->insn)
619 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
620 PREV_INSN (last));
623 static void
624 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
625 int to_stage, int for_prolog, rtx count_reg)
627 int row;
628 ps_insn_ptr ps_ij;
630 for (row = 0; row < ps->ii; row++)
631 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
633 ddg_node_ptr u_node = ps_ij->node;
634 int j, i_reg_moves;
635 rtx reg_move = NULL_RTX;
637 /* Do not duplicate any insn which refers to count_reg as it
638 belongs to the control part.
639 TODO: This should be done by analyzing the control part of
640 the loop. */
641 if (reg_mentioned_p (count_reg, u_node->insn))
642 continue;
644 if (for_prolog)
646 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
647 number of reg_moves starting with the second occurrence of
648 u_node, which is generated if its SCHED_STAGE <= to_stage. */
649 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
650 i_reg_moves = MAX (i_reg_moves, 0);
651 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
653 /* The reg_moves start from the *first* reg_move backwards. */
654 if (i_reg_moves)
656 reg_move = SCHED_FIRST_REG_MOVE (u_node);
657 for (j = 1; j < i_reg_moves; j++)
658 reg_move = PREV_INSN (reg_move);
661 else /* It's for the epilog. */
663 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
664 starting to decrease one stage after u_node no longer occurs;
665 that is, generate all reg_moves until
666 SCHED_STAGE (u_node) == from_stage - 1. */
667 i_reg_moves = SCHED_NREG_MOVES (u_node)
668 - (from_stage - SCHED_STAGE (u_node) - 1);
669 i_reg_moves = MAX (i_reg_moves, 0);
670 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
672 /* The reg_moves start from the *last* reg_move forwards. */
673 if (i_reg_moves)
675 reg_move = SCHED_FIRST_REG_MOVE (u_node);
676 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
677 reg_move = PREV_INSN (reg_move);
681 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
682 emit_insn (copy_rtx (PATTERN (reg_move)));
683 if (SCHED_STAGE (u_node) >= from_stage
684 && SCHED_STAGE (u_node) <= to_stage)
685 duplicate_insn_chain (u_node->first_note, u_node->insn);
690 /* Generate the instructions (including reg_moves) for prolog & epilog. */
691 static void
692 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
693 rtx count_reg, rtx count_init)
695 int i;
696 int last_stage = PS_STAGE_COUNT (ps) - 1;
697 edge e;
699 /* Generate the prolog, inserting its insns on the loop-entry edge. */
700 start_sequence ();
702 if (!count_init)
704 /* Generate instructions at the beginning of the prolog to
705 adjust the loop count by STAGE_COUNT. If loop count is constant
706 (count_init), this constant is adjusted by STAGE_COUNT in
707 generate_prolog_epilog function. */
708 rtx sub_reg = NULL_RTX;
710 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
711 count_reg, GEN_INT (last_stage),
712 count_reg, 1, OPTAB_DIRECT);
713 gcc_assert (REG_P (sub_reg));
714 if (REGNO (sub_reg) != REGNO (count_reg))
715 emit_move_insn (count_reg, sub_reg);
718 for (i = 0; i < last_stage; i++)
719 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
721 /* Put the prolog on the entry edge. */
722 e = loop_preheader_edge (loop);
723 split_edge_and_insert (e, get_insns ());
725 end_sequence ();
727 /* Generate the epilog, inserting its insns on the loop-exit edge. */
728 start_sequence ();
730 for (i = 0; i < last_stage; i++)
731 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
733 /* Put the epilogue on the exit edge. */
734 gcc_assert (single_exit (loop));
735 e = single_exit (loop);
736 split_edge_and_insert (e, get_insns ());
737 end_sequence ();
740 /* Return true if all the BBs of the loop are empty except the
741 loop header. */
742 static bool
743 loop_single_full_bb_p (struct loop *loop)
745 unsigned i;
746 basic_block *bbs = get_loop_body (loop);
748 for (i = 0; i < loop->num_nodes ; i++)
750 rtx head, tail;
751 bool empty_bb = true;
753 if (bbs[i] == loop->header)
754 continue;
756 /* Make sure that basic blocks other than the header
757 have only notes labels or jumps. */
758 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
759 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
761 if (NOTE_P (head) || LABEL_P (head)
762 || (INSN_P (head) && JUMP_P (head)))
763 continue;
764 empty_bb = false;
765 break;
768 if (! empty_bb)
770 free (bbs);
771 return false;
774 free (bbs);
775 return true;
778 /* A simple loop from SMS point of view; it is a loop that is composed of
779 either a single basic block or two BBs - a header and a latch. */
780 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
781 && (EDGE_COUNT (loop->latch->preds) == 1) \
782 && (EDGE_COUNT (loop->latch->succs) == 1))
784 /* Return true if the loop is in its canonical form and false if not.
785 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
786 static bool
787 loop_canon_p (struct loop *loop)
790 if (loop->inner || !loop_outer (loop))
791 return false;
793 if (!single_exit (loop))
795 if (dump_file)
797 rtx insn = BB_END (loop->header);
799 fprintf (dump_file, "SMS loop many exits ");
800 fprintf (dump_file, " %s %d (file, line)\n",
801 insn_file (insn), insn_line (insn));
803 return false;
806 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
808 if (dump_file)
810 rtx insn = BB_END (loop->header);
812 fprintf (dump_file, "SMS loop many BBs. ");
813 fprintf (dump_file, " %s %d (file, line)\n",
814 insn_file (insn), insn_line (insn));
816 return false;
819 return true;
822 /* If there are more than one entry for the loop,
823 make it one by splitting the first entry edge and
824 redirecting the others to the new BB. */
825 static void
826 canon_loop (struct loop *loop)
828 edge e;
829 edge_iterator i;
831 /* Avoid annoying special cases of edges going to exit
832 block. */
833 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
834 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
835 split_edge (e);
837 if (loop->latch == loop->header
838 || EDGE_COUNT (loop->latch->succs) > 1)
840 FOR_EACH_EDGE (e, i, loop->header->preds)
841 if (e->src == loop->latch)
842 break;
843 split_edge (e);
847 /* Probability in % that the sms-ed loop rolls enough so that optimized
848 version may be entered. Just a guess. */
849 #define PROB_SMS_ENOUGH_ITERATIONS 80
851 /* Used to calculate the upper bound of ii. */
852 #define MAXII_FACTOR 2
854 /* Main entry point, perform SMS scheduling on the loops of the function
855 that consist of single basic blocks. */
856 static void
857 sms_schedule (void)
859 static int passes = 0;
860 rtx insn;
861 ddg_ptr *g_arr, g;
862 int * node_order;
863 int maxii;
864 loop_iterator li;
865 partial_schedule_ptr ps;
866 basic_block bb = NULL;
867 struct loop *loop;
868 basic_block condition_bb = NULL;
869 edge latch_edge;
870 gcov_type trip_count = 0;
872 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
873 | LOOPS_HAVE_RECORDED_EXITS);
874 if (number_of_loops () <= 1)
876 loop_optimizer_finalize ();
877 return; /* There are no loops to schedule. */
880 /* Initialize issue_rate. */
881 if (targetm.sched.issue_rate)
883 int temp = reload_completed;
885 reload_completed = 1;
886 issue_rate = targetm.sched.issue_rate ();
887 reload_completed = temp;
889 else
890 issue_rate = 1;
892 /* Initialize the scheduler. */
893 current_sched_info = &sms_sched_info;
895 /* Init Data Flow analysis, to be used in interloop dep calculation. */
896 df_set_flags (DF_LR_RUN_DCE);
897 df_rd_add_problem ();
898 df_note_add_problem ();
899 df_chain_add_problem (DF_DU_CHAIN);
900 df_analyze ();
901 regstat_compute_calls_crossed ();
902 sched_init ();
904 /* Allocate memory to hold the DDG array one entry for each loop.
905 We use loop->num as index into this array. */
906 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
908 /* Build DDGs for all the relevant loops and hold them in G_ARR
909 indexed by the loop index. */
910 FOR_EACH_LOOP (li, loop, 0)
912 rtx head, tail;
913 rtx count_reg;
915 /* For debugging. */
916 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
918 if (dump_file)
919 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
921 break;
924 if (! loop_canon_p (loop))
925 continue;
927 if (! loop_single_full_bb_p (loop))
928 continue;
930 bb = loop->header;
932 get_ebb_head_tail (bb, bb, &head, &tail);
933 latch_edge = loop_latch_edge (loop);
934 gcc_assert (single_exit (loop));
935 if (single_exit (loop)->count)
936 trip_count = latch_edge->count / single_exit (loop)->count;
938 /* Perfrom SMS only on loops that their average count is above threshold. */
940 if ( latch_edge->count
941 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
943 if (dump_file)
945 fprintf (dump_file, " %s %d (file, line)\n",
946 insn_file (tail), insn_line (tail));
947 fprintf (dump_file, "SMS single-bb-loop\n");
948 if (profile_info && flag_branch_probabilities)
950 fprintf (dump_file, "SMS loop-count ");
951 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
952 (HOST_WIDEST_INT) bb->count);
953 fprintf (dump_file, "\n");
954 fprintf (dump_file, "SMS trip-count ");
955 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
956 (HOST_WIDEST_INT) trip_count);
957 fprintf (dump_file, "\n");
958 fprintf (dump_file, "SMS profile-sum-max ");
959 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
960 (HOST_WIDEST_INT) profile_info->sum_max);
961 fprintf (dump_file, "\n");
964 continue;
967 /* Make sure this is a doloop. */
968 if ( !(count_reg = doloop_register_get (head, tail)))
969 continue;
971 /* Don't handle BBs with calls or barriers, or !single_set insns,
972 or auto-increment insns (to avoid creating invalid reg-moves
973 for the auto-increment insns).
974 ??? Should handle auto-increment insns.
975 ??? Should handle insns defining subregs. */
976 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
978 rtx set;
980 if (CALL_P (insn)
981 || BARRIER_P (insn)
982 || (INSN_P (insn) && !JUMP_P (insn)
983 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
984 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
985 || (INSN_P (insn) && (set = single_set (insn))
986 && GET_CODE (SET_DEST (set)) == SUBREG))
987 break;
990 if (insn != NEXT_INSN (tail))
992 if (dump_file)
994 if (CALL_P (insn))
995 fprintf (dump_file, "SMS loop-with-call\n");
996 else if (BARRIER_P (insn))
997 fprintf (dump_file, "SMS loop-with-barrier\n");
998 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
999 fprintf (dump_file, "SMS reg inc\n");
1000 else if ((INSN_P (insn) && !JUMP_P (insn)
1001 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1002 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1003 else
1004 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1005 print_rtl_single (dump_file, insn);
1008 continue;
1011 if (! (g = create_ddg (bb, 0)))
1013 if (dump_file)
1014 fprintf (dump_file, "SMS doloop\n");
1015 continue;
1018 g_arr[loop->num] = g;
1021 /* We don't want to perform SMS on new loops - created by versioning. */
1022 FOR_EACH_LOOP (li, loop, 0)
1024 rtx head, tail;
1025 rtx count_reg, count_init;
1026 int mii, rec_mii;
1027 unsigned stage_count = 0;
1028 HOST_WIDEST_INT loop_count = 0;
1030 if (! (g = g_arr[loop->num]))
1031 continue;
1033 if (dump_file)
1034 print_ddg (dump_file, g);
1036 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1038 latch_edge = loop_latch_edge (loop);
1039 gcc_assert (single_exit (loop));
1040 if (single_exit (loop)->count)
1041 trip_count = latch_edge->count / single_exit (loop)->count;
1043 if (dump_file)
1045 fprintf (dump_file, " %s %d (file, line)\n",
1046 insn_file (tail), insn_line (tail));
1047 fprintf (dump_file, "SMS single-bb-loop\n");
1048 if (profile_info && flag_branch_probabilities)
1050 fprintf (dump_file, "SMS loop-count ");
1051 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1052 (HOST_WIDEST_INT) bb->count);
1053 fprintf (dump_file, "\n");
1054 fprintf (dump_file, "SMS profile-sum-max ");
1055 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1056 (HOST_WIDEST_INT) profile_info->sum_max);
1057 fprintf (dump_file, "\n");
1059 fprintf (dump_file, "SMS doloop\n");
1060 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1061 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1062 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1066 /* In case of th loop have doloop register it gets special
1067 handling. */
1068 count_init = NULL_RTX;
1069 if ((count_reg = doloop_register_get (head, tail)))
1071 basic_block pre_header;
1073 pre_header = loop_preheader_edge (loop)->src;
1074 count_init = const_iteration_count (count_reg, pre_header,
1075 &loop_count);
1077 gcc_assert (count_reg);
1079 if (dump_file && count_init)
1081 fprintf (dump_file, "SMS const-doloop ");
1082 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1083 loop_count);
1084 fprintf (dump_file, "\n");
1087 node_order = XNEWVEC (int, g->num_nodes);
1089 mii = 1; /* Need to pass some estimate of mii. */
1090 rec_mii = sms_order_nodes (g, mii, node_order);
1091 mii = MAX (res_MII (g), rec_mii);
1092 maxii = MAXII_FACTOR * mii;
1094 if (dump_file)
1095 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1096 rec_mii, mii, maxii);
1098 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1099 ASAP. */
1100 set_node_sched_params (g);
1102 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1104 if (ps)
1105 stage_count = PS_STAGE_COUNT (ps);
1107 /* Stage count of 1 means that there is no interleaving between
1108 iterations, let the scheduling passes do the job. */
1109 if (stage_count < 1
1110 || (count_init && (loop_count <= stage_count))
1111 || (flag_branch_probabilities && (trip_count <= stage_count)))
1113 if (dump_file)
1115 fprintf (dump_file, "SMS failed... \n");
1116 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1117 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1118 fprintf (dump_file, ", trip-count=");
1119 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1120 fprintf (dump_file, ")\n");
1122 continue;
1124 else
1126 struct undo_replace_buff_elem *reg_move_replaces;
1128 if (dump_file)
1130 fprintf (dump_file,
1131 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1132 stage_count);
1133 print_partial_schedule (ps, dump_file);
1134 fprintf (dump_file,
1135 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1136 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1139 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1140 the closing_branch was scheduled and should appear in the last (ii-1)
1141 row. Otherwise, we are free to schedule the branch, and we let nodes
1142 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1143 row; this should reduce stage_count to minimum.
1144 TODO: Revisit the issue of scheduling the insns of the
1145 control part relative to the branch when the control part
1146 has more than one insn. */
1147 normalize_sched_times (ps);
1148 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1149 set_columns_for_ps (ps);
1151 canon_loop (loop);
1153 /* case the BCT count is not known , Do loop-versioning */
1154 if (count_reg && ! count_init)
1156 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1157 GEN_INT(stage_count));
1158 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1159 * REG_BR_PROB_BASE) / 100;
1161 loop_version (loop, comp_rtx, &condition_bb,
1162 prob, prob, REG_BR_PROB_BASE - prob,
1163 true);
1166 /* Set new iteration count of loop kernel. */
1167 if (count_reg && count_init)
1168 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1169 - stage_count + 1);
1171 /* Now apply the scheduled kernel to the RTL of the loop. */
1172 permute_partial_schedule (ps, g->closing_branch->first_note);
1174 /* Mark this loop as software pipelined so the later
1175 scheduling passes doesn't touch it. */
1176 if (! flag_resched_modulo_sched)
1177 g->bb->flags |= BB_DISABLE_SCHEDULE;
1178 /* The life-info is not valid any more. */
1179 df_set_bb_dirty (g->bb);
1181 reg_move_replaces = generate_reg_moves (ps, true);
1182 if (dump_file)
1183 print_node_sched_params (dump_file, g->num_nodes, g);
1184 /* Generate prolog and epilog. */
1185 generate_prolog_epilog (ps, loop, count_reg, count_init);
1187 free_undo_replace_buff (reg_move_replaces);
1190 free_partial_schedule (ps);
1191 free (node_sched_params);
1192 free (node_order);
1193 free_ddg (g);
1196 regstat_free_calls_crossed ();
1197 free (g_arr);
1199 /* Release scheduler data, needed until now because of DFA. */
1200 sched_finish ();
1201 loop_optimizer_finalize ();
1204 /* The SMS scheduling algorithm itself
1205 -----------------------------------
1206 Input: 'O' an ordered list of insns of a loop.
1207 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1209 'Q' is the empty Set
1210 'PS' is the partial schedule; it holds the currently scheduled nodes with
1211 their cycle/slot.
1212 'PSP' previously scheduled predecessors.
1213 'PSS' previously scheduled successors.
1214 't(u)' the cycle where u is scheduled.
1215 'l(u)' is the latency of u.
1216 'd(v,u)' is the dependence distance from v to u.
1217 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1218 the node ordering phase.
1219 'check_hardware_resources_conflicts(u, PS, c)'
1220 run a trace around cycle/slot through DFA model
1221 to check resource conflicts involving instruction u
1222 at cycle c given the partial schedule PS.
1223 'add_to_partial_schedule_at_time(u, PS, c)'
1224 Add the node/instruction u to the partial schedule
1225 PS at time c.
1226 'calculate_register_pressure(PS)'
1227 Given a schedule of instructions, calculate the register
1228 pressure it implies. One implementation could be the
1229 maximum number of overlapping live ranges.
1230 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1231 registers available in the hardware.
1233 1. II = MII.
1234 2. PS = empty list
1235 3. for each node u in O in pre-computed order
1236 4. if (PSP(u) != Q && PSS(u) == Q) then
1237 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1238 6. start = Early_start; end = Early_start + II - 1; step = 1
1239 11. else if (PSP(u) == Q && PSS(u) != Q) then
1240 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1241 13. start = Late_start; end = Late_start - II + 1; step = -1
1242 14. else if (PSP(u) != Q && PSS(u) != Q) then
1243 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1244 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1245 17. start = Early_start;
1246 18. end = min(Early_start + II - 1 , Late_start);
1247 19. step = 1
1248 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1249 21. start = ASAP(u); end = start + II - 1; step = 1
1250 22. endif
1252 23. success = false
1253 24. for (c = start ; c != end ; c += step)
1254 25. if check_hardware_resources_conflicts(u, PS, c) then
1255 26. add_to_partial_schedule_at_time(u, PS, c)
1256 27. success = true
1257 28. break
1258 29. endif
1259 30. endfor
1260 31. if (success == false) then
1261 32. II = II + 1
1262 33. if (II > maxII) then
1263 34. finish - failed to schedule
1264 35. endif
1265 36. goto 2.
1266 37. endif
1267 38. endfor
1268 39. if (calculate_register_pressure(PS) > maxRP) then
1269 40. goto 32.
1270 41. endif
1271 42. compute epilogue & prologue
1272 43. finish - succeeded to schedule
1275 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1276 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1277 set to 0 to save compile time. */
1278 #define DFA_HISTORY SMS_DFA_HISTORY
1280 /* Given the partial schedule PS, this function calculates and returns the
1281 cycles in which we can schedule the node with the given index I.
1282 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1283 noticed that there are several cases in which we fail to SMS the loop
1284 because the sched window of a node is empty due to tight data-deps. In
1285 such cases we want to unschedule some of the predecessors/successors
1286 until we get non-empty scheduling window. It returns -1 if the
1287 scheduling window is empty and zero otherwise. */
1289 static int
1290 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1291 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1293 int start, step, end;
1294 ddg_edge_ptr e;
1295 int u = nodes_order [i];
1296 ddg_node_ptr u_node = &ps->g->nodes[u];
1297 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1298 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1299 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1300 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1301 int psp_not_empty;
1302 int pss_not_empty;
1304 /* 1. compute sched window for u (start, end, step). */
1305 sbitmap_zero (psp);
1306 sbitmap_zero (pss);
1307 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1308 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1310 if (psp_not_empty && !pss_not_empty)
1312 int early_start = INT_MIN;
1314 end = INT_MAX;
1315 for (e = u_node->in; e != 0; e = e->next_in)
1317 ddg_node_ptr v_node = e->src;
1318 if (TEST_BIT (sched_nodes, v_node->cuid))
1320 int node_st = SCHED_TIME (v_node)
1321 + e->latency - (e->distance * ii);
1323 early_start = MAX (early_start, node_st);
1325 if (e->data_type == MEM_DEP)
1326 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1329 start = early_start;
1330 end = MIN (end, early_start + ii);
1331 step = 1;
1334 else if (!psp_not_empty && pss_not_empty)
1336 int late_start = INT_MAX;
1338 end = INT_MIN;
1339 for (e = u_node->out; e != 0; e = e->next_out)
1341 ddg_node_ptr v_node = e->dest;
1342 if (TEST_BIT (sched_nodes, v_node->cuid))
1344 late_start = MIN (late_start,
1345 SCHED_TIME (v_node) - e->latency
1346 + (e->distance * ii));
1347 if (e->data_type == MEM_DEP)
1348 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1351 start = late_start;
1352 end = MAX (end, late_start - ii);
1353 step = -1;
1356 else if (psp_not_empty && pss_not_empty)
1358 int early_start = INT_MIN;
1359 int late_start = INT_MAX;
1361 start = INT_MIN;
1362 end = INT_MAX;
1363 for (e = u_node->in; e != 0; e = e->next_in)
1365 ddg_node_ptr v_node = e->src;
1367 if (TEST_BIT (sched_nodes, v_node->cuid))
1369 early_start = MAX (early_start,
1370 SCHED_TIME (v_node) + e->latency
1371 - (e->distance * ii));
1372 if (e->data_type == MEM_DEP)
1373 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1376 for (e = u_node->out; e != 0; e = e->next_out)
1378 ddg_node_ptr v_node = e->dest;
1380 if (TEST_BIT (sched_nodes, v_node->cuid))
1382 late_start = MIN (late_start,
1383 SCHED_TIME (v_node) - e->latency
1384 + (e->distance * ii));
1385 if (e->data_type == MEM_DEP)
1386 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1389 start = MAX (start, early_start);
1390 end = MIN (end, MIN (early_start + ii, late_start + 1));
1391 step = 1;
1393 else /* psp is empty && pss is empty. */
1395 start = SCHED_ASAP (u_node);
1396 end = start + ii;
1397 step = 1;
1400 *start_p = start;
1401 *step_p = step;
1402 *end_p = end;
1403 sbitmap_free (psp);
1404 sbitmap_free (pss);
1406 if ((start >= end && step == 1) || (start <= end && step == -1))
1407 return -1;
1408 else
1409 return 0;
1412 /* This function implements the scheduling algorithm for SMS according to the
1413 above algorithm. */
1414 static partial_schedule_ptr
1415 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1417 int ii = mii;
1418 int i, c, success;
1419 int try_again_with_larger_ii = true;
1420 int num_nodes = g->num_nodes;
1421 ddg_edge_ptr e;
1422 int start, end, step; /* Place together into one struct? */
1423 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1424 sbitmap must_precede = sbitmap_alloc (num_nodes);
1425 sbitmap must_follow = sbitmap_alloc (num_nodes);
1426 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1428 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1430 sbitmap_ones (tobe_scheduled);
1431 sbitmap_zero (sched_nodes);
1433 while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1434 || try_again_with_larger_ii ) && ii < maxii)
1436 int j;
1437 bool unscheduled_nodes = false;
1439 if (dump_file)
1440 fprintf (dump_file, "Starting with ii=%d\n", ii);
1441 if (try_again_with_larger_ii)
1443 try_again_with_larger_ii = false;
1444 sbitmap_zero (sched_nodes);
1447 for (i = 0; i < num_nodes; i++)
1449 int u = nodes_order[i];
1450 ddg_node_ptr u_node = &ps->g->nodes[u];
1451 rtx insn = u_node->insn;
1453 if (!INSN_P (insn))
1455 RESET_BIT (tobe_scheduled, u);
1456 continue;
1459 if (JUMP_P (insn)) /* Closing branch handled later. */
1461 RESET_BIT (tobe_scheduled, u);
1462 continue;
1465 if (TEST_BIT (sched_nodes, u))
1466 continue;
1468 /* Try to get non-empty scheduling window. */
1469 j = i;
1470 while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1471 && j > 0)
1473 unscheduled_nodes = true;
1474 if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1475 || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1477 ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1478 RESET_BIT (sched_nodes, nodes_order [j - 1]);
1480 j--;
1482 if (j < 0)
1484 /* ??? Try backtracking instead of immediately ii++? */
1485 ii++;
1486 try_again_with_larger_ii = true;
1487 reset_partial_schedule (ps, ii);
1488 break;
1490 /* 2. Try scheduling u in window. */
1491 if (dump_file)
1492 fprintf (dump_file,
1493 "Trying to schedule node %d in (%d .. %d) step %d\n",
1494 u, start, end, step);
1496 /* use must_follow & must_precede bitmaps to determine order
1497 of nodes within the cycle. */
1498 sbitmap_zero (must_precede);
1499 sbitmap_zero (must_follow);
1500 /* TODO: We can add an insn to the must_precede or must_follow
1501 bitmaps only if it has tight dependence to U and they
1502 both scheduled in the same row. The current check is less
1503 conservative and content with the fact that both U and the
1504 insn are scheduled in the same row. */
1505 for (e = u_node->in; e != 0; e = e->next_in)
1506 if (TEST_BIT (sched_nodes, e->src->cuid)
1507 && (SMODULO (SCHED_TIME (e->src), ii) == SMODULO (start, ii)))
1508 SET_BIT (must_precede, e->src->cuid);
1510 for (e = u_node->out; e != 0; e = e->next_out)
1511 if (TEST_BIT (sched_nodes, e->dest->cuid)
1512 && (SMODULO (SCHED_TIME (e->dest), ii) ==
1513 SMODULO ((end - step), ii)))
1514 SET_BIT (must_follow, e->dest->cuid);
1516 success = 0;
1517 if ((step > 0 && start < end) || (step < 0 && start > end))
1518 for (c = start; c != end; c += step)
1520 ps_insn_ptr psi;
1522 psi = ps_add_node_check_conflicts (ps, u_node, c,
1523 must_precede,
1524 must_follow);
1526 if (psi)
1528 SCHED_TIME (u_node) = c;
1529 SET_BIT (sched_nodes, u);
1530 success = 1;
1531 if (dump_file)
1532 fprintf (dump_file, "Schedule in %d\n", c);
1533 break;
1536 if (!success)
1538 /* ??? Try backtracking instead of immediately ii++? */
1539 ii++;
1540 try_again_with_larger_ii = true;
1541 reset_partial_schedule (ps, ii);
1542 break;
1544 if (unscheduled_nodes)
1545 break;
1547 /* ??? If (success), check register pressure estimates. */
1548 } /* Continue with next node. */
1549 } /* While try_again_with_larger_ii. */
1551 sbitmap_free (sched_nodes);
1552 sbitmap_free (must_precede);
1553 sbitmap_free (must_follow);
1554 sbitmap_free (tobe_scheduled);
1556 if (ii >= maxii)
1558 free_partial_schedule (ps);
1559 ps = NULL;
1561 return ps;
1565 /* This page implements the algorithm for ordering the nodes of a DDG
1566 for modulo scheduling, activated through the
1567 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1569 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1570 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1571 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1572 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1573 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1574 #define DEPTH(x) (ASAP ((x)))
1576 typedef struct node_order_params * nopa;
1578 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1579 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1580 static nopa calculate_order_params (ddg_ptr, int mii);
1581 static int find_max_asap (ddg_ptr, sbitmap);
1582 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1583 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1585 enum sms_direction {BOTTOMUP, TOPDOWN};
1587 struct node_order_params
1589 int asap;
1590 int alap;
1591 int height;
1594 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1595 static void
1596 check_nodes_order (int *node_order, int num_nodes)
1598 int i;
1599 sbitmap tmp = sbitmap_alloc (num_nodes);
1601 sbitmap_zero (tmp);
1603 for (i = 0; i < num_nodes; i++)
1605 int u = node_order[i];
1607 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1609 SET_BIT (tmp, u);
1612 sbitmap_free (tmp);
1615 /* Order the nodes of G for scheduling and pass the result in
1616 NODE_ORDER. Also set aux.count of each node to ASAP.
1617 Return the recMII for the given DDG. */
1618 static int
1619 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1621 int i;
1622 int rec_mii = 0;
1623 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1625 nopa nops = calculate_order_params (g, mii);
1627 if (dump_file)
1628 print_sccs (dump_file, sccs, g);
1630 order_nodes_of_sccs (sccs, node_order);
1632 if (sccs->num_sccs > 0)
1633 /* First SCC has the largest recurrence_length. */
1634 rec_mii = sccs->sccs[0]->recurrence_length;
1636 /* Save ASAP before destroying node_order_params. */
1637 for (i = 0; i < g->num_nodes; i++)
1639 ddg_node_ptr v = &g->nodes[i];
1640 v->aux.count = ASAP (v);
1643 free (nops);
1644 free_ddg_all_sccs (sccs);
1645 check_nodes_order (node_order, g->num_nodes);
1647 return rec_mii;
1650 static void
1651 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1653 int i, pos = 0;
1654 ddg_ptr g = all_sccs->ddg;
1655 int num_nodes = g->num_nodes;
1656 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1657 sbitmap on_path = sbitmap_alloc (num_nodes);
1658 sbitmap tmp = sbitmap_alloc (num_nodes);
1659 sbitmap ones = sbitmap_alloc (num_nodes);
1661 sbitmap_zero (prev_sccs);
1662 sbitmap_ones (ones);
1664 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1665 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1666 for (i = 0; i < all_sccs->num_sccs; i++)
1668 ddg_scc_ptr scc = all_sccs->sccs[i];
1670 /* Add nodes on paths from previous SCCs to the current SCC. */
1671 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1672 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1674 /* Add nodes on paths from the current SCC to previous SCCs. */
1675 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1676 sbitmap_a_or_b (tmp, tmp, on_path);
1678 /* Remove nodes of previous SCCs from current extended SCC. */
1679 sbitmap_difference (tmp, tmp, prev_sccs);
1681 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1682 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1685 /* Handle the remaining nodes that do not belong to any scc. Each call
1686 to order_nodes_in_scc handles a single connected component. */
1687 while (pos < g->num_nodes)
1689 sbitmap_difference (tmp, ones, prev_sccs);
1690 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1692 sbitmap_free (prev_sccs);
1693 sbitmap_free (on_path);
1694 sbitmap_free (tmp);
1695 sbitmap_free (ones);
1698 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1699 static struct node_order_params *
1700 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1702 int u;
1703 int max_asap;
1704 int num_nodes = g->num_nodes;
1705 ddg_edge_ptr e;
1706 /* Allocate a place to hold ordering params for each node in the DDG. */
1707 nopa node_order_params_arr;
1709 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1710 node_order_params_arr = (nopa) xcalloc (num_nodes,
1711 sizeof (struct node_order_params));
1713 /* Set the aux pointer of each node to point to its order_params structure. */
1714 for (u = 0; u < num_nodes; u++)
1715 g->nodes[u].aux.info = &node_order_params_arr[u];
1717 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1718 calculate ASAP, ALAP, mobility, distance, and height for each node
1719 in the dependence (direct acyclic) graph. */
1721 /* We assume that the nodes in the array are in topological order. */
1723 max_asap = 0;
1724 for (u = 0; u < num_nodes; u++)
1726 ddg_node_ptr u_node = &g->nodes[u];
1728 ASAP (u_node) = 0;
1729 for (e = u_node->in; e; e = e->next_in)
1730 if (e->distance == 0)
1731 ASAP (u_node) = MAX (ASAP (u_node),
1732 ASAP (e->src) + e->latency);
1733 max_asap = MAX (max_asap, ASAP (u_node));
1736 for (u = num_nodes - 1; u > -1; u--)
1738 ddg_node_ptr u_node = &g->nodes[u];
1740 ALAP (u_node) = max_asap;
1741 HEIGHT (u_node) = 0;
1742 for (e = u_node->out; e; e = e->next_out)
1743 if (e->distance == 0)
1745 ALAP (u_node) = MIN (ALAP (u_node),
1746 ALAP (e->dest) - e->latency);
1747 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1748 HEIGHT (e->dest) + e->latency);
1752 return node_order_params_arr;
1755 static int
1756 find_max_asap (ddg_ptr g, sbitmap nodes)
1758 unsigned int u = 0;
1759 int max_asap = -1;
1760 int result = -1;
1761 sbitmap_iterator sbi;
1763 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1765 ddg_node_ptr u_node = &g->nodes[u];
1767 if (max_asap < ASAP (u_node))
1769 max_asap = ASAP (u_node);
1770 result = u;
1773 return result;
1776 static int
1777 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1779 unsigned int u = 0;
1780 int max_hv = -1;
1781 int min_mob = INT_MAX;
1782 int result = -1;
1783 sbitmap_iterator sbi;
1785 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1787 ddg_node_ptr u_node = &g->nodes[u];
1789 if (max_hv < HEIGHT (u_node))
1791 max_hv = HEIGHT (u_node);
1792 min_mob = MOB (u_node);
1793 result = u;
1795 else if ((max_hv == HEIGHT (u_node))
1796 && (min_mob > MOB (u_node)))
1798 min_mob = MOB (u_node);
1799 result = u;
1802 return result;
1805 static int
1806 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1808 unsigned int u = 0;
1809 int max_dv = -1;
1810 int min_mob = INT_MAX;
1811 int result = -1;
1812 sbitmap_iterator sbi;
1814 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1816 ddg_node_ptr u_node = &g->nodes[u];
1818 if (max_dv < DEPTH (u_node))
1820 max_dv = DEPTH (u_node);
1821 min_mob = MOB (u_node);
1822 result = u;
1824 else if ((max_dv == DEPTH (u_node))
1825 && (min_mob > MOB (u_node)))
1827 min_mob = MOB (u_node);
1828 result = u;
1831 return result;
1834 /* Places the nodes of SCC into the NODE_ORDER array starting
1835 at position POS, according to the SMS ordering algorithm.
1836 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1837 the NODE_ORDER array, starting from position zero. */
1838 static int
1839 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1840 int * node_order, int pos)
1842 enum sms_direction dir;
1843 int num_nodes = g->num_nodes;
1844 sbitmap workset = sbitmap_alloc (num_nodes);
1845 sbitmap tmp = sbitmap_alloc (num_nodes);
1846 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1847 sbitmap predecessors = sbitmap_alloc (num_nodes);
1848 sbitmap successors = sbitmap_alloc (num_nodes);
1850 sbitmap_zero (predecessors);
1851 find_predecessors (predecessors, g, nodes_ordered);
1853 sbitmap_zero (successors);
1854 find_successors (successors, g, nodes_ordered);
1856 sbitmap_zero (tmp);
1857 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1859 sbitmap_copy (workset, tmp);
1860 dir = BOTTOMUP;
1862 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1864 sbitmap_copy (workset, tmp);
1865 dir = TOPDOWN;
1867 else
1869 int u;
1871 sbitmap_zero (workset);
1872 if ((u = find_max_asap (g, scc)) >= 0)
1873 SET_BIT (workset, u);
1874 dir = BOTTOMUP;
1877 sbitmap_zero (zero_bitmap);
1878 while (!sbitmap_equal (workset, zero_bitmap))
1880 int v;
1881 ddg_node_ptr v_node;
1882 sbitmap v_node_preds;
1883 sbitmap v_node_succs;
1885 if (dir == TOPDOWN)
1887 while (!sbitmap_equal (workset, zero_bitmap))
1889 v = find_max_hv_min_mob (g, workset);
1890 v_node = &g->nodes[v];
1891 node_order[pos++] = v;
1892 v_node_succs = NODE_SUCCESSORS (v_node);
1893 sbitmap_a_and_b (tmp, v_node_succs, scc);
1895 /* Don't consider the already ordered successors again. */
1896 sbitmap_difference (tmp, tmp, nodes_ordered);
1897 sbitmap_a_or_b (workset, workset, tmp);
1898 RESET_BIT (workset, v);
1899 SET_BIT (nodes_ordered, v);
1901 dir = BOTTOMUP;
1902 sbitmap_zero (predecessors);
1903 find_predecessors (predecessors, g, nodes_ordered);
1904 sbitmap_a_and_b (workset, predecessors, scc);
1906 else
1908 while (!sbitmap_equal (workset, zero_bitmap))
1910 v = find_max_dv_min_mob (g, workset);
1911 v_node = &g->nodes[v];
1912 node_order[pos++] = v;
1913 v_node_preds = NODE_PREDECESSORS (v_node);
1914 sbitmap_a_and_b (tmp, v_node_preds, scc);
1916 /* Don't consider the already ordered predecessors again. */
1917 sbitmap_difference (tmp, tmp, nodes_ordered);
1918 sbitmap_a_or_b (workset, workset, tmp);
1919 RESET_BIT (workset, v);
1920 SET_BIT (nodes_ordered, v);
1922 dir = TOPDOWN;
1923 sbitmap_zero (successors);
1924 find_successors (successors, g, nodes_ordered);
1925 sbitmap_a_and_b (workset, successors, scc);
1928 sbitmap_free (tmp);
1929 sbitmap_free (workset);
1930 sbitmap_free (zero_bitmap);
1931 sbitmap_free (predecessors);
1932 sbitmap_free (successors);
1933 return pos;
1937 /* This page contains functions for manipulating partial-schedules during
1938 modulo scheduling. */
1940 /* Create a partial schedule and allocate a memory to hold II rows. */
1942 static partial_schedule_ptr
1943 create_partial_schedule (int ii, ddg_ptr g, int history)
1945 partial_schedule_ptr ps = XNEW (struct partial_schedule);
1946 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1947 ps->ii = ii;
1948 ps->history = history;
1949 ps->min_cycle = INT_MAX;
1950 ps->max_cycle = INT_MIN;
1951 ps->g = g;
1953 return ps;
1956 /* Free the PS_INSNs in rows array of the given partial schedule.
1957 ??? Consider caching the PS_INSN's. */
1958 static void
1959 free_ps_insns (partial_schedule_ptr ps)
1961 int i;
1963 for (i = 0; i < ps->ii; i++)
1965 while (ps->rows[i])
1967 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1969 free (ps->rows[i]);
1970 ps->rows[i] = ps_insn;
1972 ps->rows[i] = NULL;
1976 /* Free all the memory allocated to the partial schedule. */
1978 static void
1979 free_partial_schedule (partial_schedule_ptr ps)
1981 if (!ps)
1982 return;
1983 free_ps_insns (ps);
1984 free (ps->rows);
1985 free (ps);
1988 /* Clear the rows array with its PS_INSNs, and create a new one with
1989 NEW_II rows. */
1991 static void
1992 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1994 if (!ps)
1995 return;
1996 free_ps_insns (ps);
1997 if (new_ii == ps->ii)
1998 return;
1999 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2000 * sizeof (ps_insn_ptr));
2001 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2002 ps->ii = new_ii;
2003 ps->min_cycle = INT_MAX;
2004 ps->max_cycle = INT_MIN;
2007 /* Prints the partial schedule as an ii rows array, for each rows
2008 print the ids of the insns in it. */
2009 void
2010 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2012 int i;
2014 for (i = 0; i < ps->ii; i++)
2016 ps_insn_ptr ps_i = ps->rows[i];
2018 fprintf (dump, "\n[CYCLE %d ]: ", i);
2019 while (ps_i)
2021 fprintf (dump, "%d, ",
2022 INSN_UID (ps_i->node->insn));
2023 ps_i = ps_i->next_in_row;
2028 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2029 static ps_insn_ptr
2030 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2032 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2034 ps_i->node = node;
2035 ps_i->next_in_row = NULL;
2036 ps_i->prev_in_row = NULL;
2037 ps_i->row_rest_count = rest_count;
2038 ps_i->cycle = cycle;
2040 return ps_i;
2044 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2045 node is not found in the partial schedule, else returns true. */
2046 static bool
2047 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2049 int row;
2051 if (!ps || !ps_i)
2052 return false;
2054 row = SMODULO (ps_i->cycle, ps->ii);
2055 if (! ps_i->prev_in_row)
2057 if (ps_i != ps->rows[row])
2058 return false;
2060 ps->rows[row] = ps_i->next_in_row;
2061 if (ps->rows[row])
2062 ps->rows[row]->prev_in_row = NULL;
2064 else
2066 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2067 if (ps_i->next_in_row)
2068 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2070 free (ps_i);
2071 return true;
2074 /* Unlike what literature describes for modulo scheduling (which focuses
2075 on VLIW machines) the order of the instructions inside a cycle is
2076 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2077 where the current instruction should go relative to the already
2078 scheduled instructions in the given cycle. Go over these
2079 instructions and find the first possible column to put it in. */
2080 static bool
2081 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2082 sbitmap must_precede, sbitmap must_follow)
2084 ps_insn_ptr next_ps_i;
2085 ps_insn_ptr first_must_follow = NULL;
2086 ps_insn_ptr last_must_precede = NULL;
2087 int row;
2089 if (! ps_i)
2090 return false;
2092 row = SMODULO (ps_i->cycle, ps->ii);
2094 /* Find the first must follow and the last must precede
2095 and insert the node immediately after the must precede
2096 but make sure that it there is no must follow after it. */
2097 for (next_ps_i = ps->rows[row];
2098 next_ps_i;
2099 next_ps_i = next_ps_i->next_in_row)
2101 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2102 && ! first_must_follow)
2103 first_must_follow = next_ps_i;
2104 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2106 /* If we have already met a node that must follow, then
2107 there is no possible column. */
2108 if (first_must_follow)
2109 return false;
2110 else
2111 last_must_precede = next_ps_i;
2115 /* Now insert the node after INSERT_AFTER_PSI. */
2117 if (! last_must_precede)
2119 ps_i->next_in_row = ps->rows[row];
2120 ps_i->prev_in_row = NULL;
2121 if (ps_i->next_in_row)
2122 ps_i->next_in_row->prev_in_row = ps_i;
2123 ps->rows[row] = ps_i;
2125 else
2127 ps_i->next_in_row = last_must_precede->next_in_row;
2128 last_must_precede->next_in_row = ps_i;
2129 ps_i->prev_in_row = last_must_precede;
2130 if (ps_i->next_in_row)
2131 ps_i->next_in_row->prev_in_row = ps_i;
2134 return true;
2137 /* Advances the PS_INSN one column in its current row; returns false
2138 in failure and true in success. Bit N is set in MUST_FOLLOW if
2139 the node with cuid N must be come after the node pointed to by
2140 PS_I when scheduled in the same cycle. */
2141 static int
2142 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2143 sbitmap must_follow)
2145 ps_insn_ptr prev, next;
2146 int row;
2147 ddg_node_ptr next_node;
2149 if (!ps || !ps_i)
2150 return false;
2152 row = SMODULO (ps_i->cycle, ps->ii);
2154 if (! ps_i->next_in_row)
2155 return false;
2157 next_node = ps_i->next_in_row->node;
2159 /* Check if next_in_row is dependent on ps_i, both having same sched
2160 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2161 if (TEST_BIT (must_follow, next_node->cuid))
2162 return false;
2164 /* Advance PS_I over its next_in_row in the doubly linked list. */
2165 prev = ps_i->prev_in_row;
2166 next = ps_i->next_in_row;
2168 if (ps_i == ps->rows[row])
2169 ps->rows[row] = next;
2171 ps_i->next_in_row = next->next_in_row;
2173 if (next->next_in_row)
2174 next->next_in_row->prev_in_row = ps_i;
2176 next->next_in_row = ps_i;
2177 ps_i->prev_in_row = next;
2179 next->prev_in_row = prev;
2180 if (prev)
2181 prev->next_in_row = next;
2183 return true;
2186 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2187 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2188 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2189 before/after (respectively) the node pointed to by PS_I when scheduled
2190 in the same cycle. */
2191 static ps_insn_ptr
2192 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2193 sbitmap must_precede, sbitmap must_follow)
2195 ps_insn_ptr ps_i;
2196 int rest_count = 1;
2197 int row = SMODULO (cycle, ps->ii);
2199 if (ps->rows[row]
2200 && ps->rows[row]->row_rest_count >= issue_rate)
2201 return NULL;
2203 if (ps->rows[row])
2204 rest_count += ps->rows[row]->row_rest_count;
2206 ps_i = create_ps_insn (node, rest_count, cycle);
2208 /* Finds and inserts PS_I according to MUST_FOLLOW and
2209 MUST_PRECEDE. */
2210 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2212 free (ps_i);
2213 return NULL;
2216 return ps_i;
2219 /* Advance time one cycle. Assumes DFA is being used. */
2220 static void
2221 advance_one_cycle (void)
2223 if (targetm.sched.dfa_pre_cycle_insn)
2224 state_transition (curr_state,
2225 targetm.sched.dfa_pre_cycle_insn ());
2227 state_transition (curr_state, NULL);
2229 if (targetm.sched.dfa_post_cycle_insn)
2230 state_transition (curr_state,
2231 targetm.sched.dfa_post_cycle_insn ());
2236 /* Checks if PS has resource conflicts according to DFA, starting from
2237 FROM cycle to TO cycle; returns true if there are conflicts and false
2238 if there are no conflicts. Assumes DFA is being used. */
2239 static int
2240 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2242 int cycle;
2244 state_reset (curr_state);
2246 for (cycle = from; cycle <= to; cycle++)
2248 ps_insn_ptr crr_insn;
2249 /* Holds the remaining issue slots in the current row. */
2250 int can_issue_more = issue_rate;
2252 /* Walk through the DFA for the current row. */
2253 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2254 crr_insn;
2255 crr_insn = crr_insn->next_in_row)
2257 rtx insn = crr_insn->node->insn;
2259 if (!INSN_P (insn))
2260 continue;
2262 /* Check if there is room for the current insn. */
2263 if (!can_issue_more || state_dead_lock_p (curr_state))
2264 return true;
2266 /* Update the DFA state and return with failure if the DFA found
2267 recource conflicts. */
2268 if (state_transition (curr_state, insn) >= 0)
2269 return true;
2271 if (targetm.sched.variable_issue)
2272 can_issue_more =
2273 targetm.sched.variable_issue (sched_dump, sched_verbose,
2274 insn, can_issue_more);
2275 /* A naked CLOBBER or USE generates no instruction, so don't
2276 let them consume issue slots. */
2277 else if (GET_CODE (PATTERN (insn)) != USE
2278 && GET_CODE (PATTERN (insn)) != CLOBBER)
2279 can_issue_more--;
2282 /* Advance the DFA to the next cycle. */
2283 advance_one_cycle ();
2285 return false;
2288 /* Checks if the given node causes resource conflicts when added to PS at
2289 cycle C. If not the node is added to PS and returned; otherwise zero
2290 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2291 cuid N must be come before/after (respectively) the node pointed to by
2292 PS_I when scheduled in the same cycle. */
2293 ps_insn_ptr
2294 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2295 int c, sbitmap must_precede,
2296 sbitmap must_follow)
2298 int has_conflicts = 0;
2299 ps_insn_ptr ps_i;
2301 /* First add the node to the PS, if this succeeds check for
2302 conflicts, trying different issue slots in the same row. */
2303 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2304 return NULL; /* Failed to insert the node at the given cycle. */
2306 has_conflicts = ps_has_conflicts (ps, c, c)
2307 || (ps->history > 0
2308 && ps_has_conflicts (ps,
2309 c - ps->history,
2310 c + ps->history));
2312 /* Try different issue slots to find one that the given node can be
2313 scheduled in without conflicts. */
2314 while (has_conflicts)
2316 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2317 break;
2318 has_conflicts = ps_has_conflicts (ps, c, c)
2319 || (ps->history > 0
2320 && ps_has_conflicts (ps,
2321 c - ps->history,
2322 c + ps->history));
2325 if (has_conflicts)
2327 remove_node_from_ps (ps, ps_i);
2328 return NULL;
2331 ps->min_cycle = MIN (ps->min_cycle, c);
2332 ps->max_cycle = MAX (ps->max_cycle, c);
2333 return ps_i;
2336 /* Rotate the rows of PS such that insns scheduled at time
2337 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2338 void
2339 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2341 int i, row, backward_rotates;
2342 int last_row = ps->ii - 1;
2344 if (start_cycle == 0)
2345 return;
2347 backward_rotates = SMODULO (start_cycle, ps->ii);
2349 /* Revisit later and optimize this into a single loop. */
2350 for (i = 0; i < backward_rotates; i++)
2352 ps_insn_ptr first_row = ps->rows[0];
2354 for (row = 0; row < last_row; row++)
2355 ps->rows[row] = ps->rows[row+1];
2357 ps->rows[last_row] = first_row;
2360 ps->max_cycle -= start_cycle;
2361 ps->min_cycle -= start_cycle;
2364 /* Remove the node N from the partial schedule PS; because we restart the DFA
2365 each time we want to check for resource conflicts; this is equivalent to
2366 unscheduling the node N. */
2367 static bool
2368 ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2370 ps_insn_ptr ps_i;
2371 int row = SMODULO (SCHED_TIME (n), ps->ii);
2373 if (row < 0 || row > ps->ii)
2374 return false;
2376 for (ps_i = ps->rows[row];
2377 ps_i && ps_i->node != n;
2378 ps_i = ps_i->next_in_row);
2379 if (!ps_i)
2380 return false;
2382 return remove_node_from_ps (ps, ps_i);
2384 #endif /* INSN_SCHEDULING */
2386 static bool
2387 gate_handle_sms (void)
2389 return (optimize > 0 && flag_modulo_sched);
2393 /* Run instruction scheduler. */
2394 /* Perform SMS module scheduling. */
2395 static unsigned int
2396 rest_of_handle_sms (void)
2398 #ifdef INSN_SCHEDULING
2399 basic_block bb;
2401 /* Collect loop information to be used in SMS. */
2402 cfg_layout_initialize (0);
2403 sms_schedule ();
2405 /* Update the life information, because we add pseudos. */
2406 max_regno = max_reg_num ();
2408 /* Finalize layout changes. */
2409 FOR_EACH_BB (bb)
2410 if (bb->next_bb != EXIT_BLOCK_PTR)
2411 bb->aux = bb->next_bb;
2412 free_dominance_info (CDI_DOMINATORS);
2413 cfg_layout_finalize ();
2414 #endif /* INSN_SCHEDULING */
2415 return 0;
2418 struct tree_opt_pass pass_sms =
2420 "sms", /* name */
2421 gate_handle_sms, /* gate */
2422 rest_of_handle_sms, /* execute */
2423 NULL, /* sub */
2424 NULL, /* next */
2425 0, /* static_pass_number */
2426 TV_SMS, /* tv_id */
2427 0, /* properties_required */
2428 0, /* properties_provided */
2429 0, /* properties_destroyed */
2430 TODO_dump_func, /* todo_flags_start */
2431 TODO_df_finish |
2432 TODO_dump_func |
2433 TODO_ggc_collect, /* todo_flags_finish */
2434 'm' /* letter */