* cgraphunit.c (record_cdtor_fn): Declare all cdtors always inlined.
[official-gcc/constexpr.git] / gcc / modulo-sched.c
blob73c4adc84b030e40c30a87e0fff69c9eb92bd5cc
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
88 /* This page defines partial-schedule structures and functions for
89 modulo scheduling. */
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
108 /* A single instruction in the partial schedule. */
109 struct ps_insn
111 /* The corresponding DDG_NODE. */
112 ddg_node_ptr node;
114 /* The (absolute) cycle in which the PS instruction is scheduled.
115 Same as SCHED_TIME (node). */
116 int cycle;
118 /* The next/prev PS_INSN in the same row. */
119 ps_insn_ptr next_in_row,
120 prev_in_row;
122 /* The number of nodes in the same row that come after this node. */
123 int row_rest_count;
126 /* Holds the partial schedule as an array of II rows. Each entry of the
127 array points to a linked list of PS_INSNs, which represents the
128 instructions that are scheduled for that row. */
129 struct partial_schedule
131 int ii; /* Number of rows in the partial schedule. */
132 int history; /* Threshold for conflict checking using DFA. */
134 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
135 ps_insn_ptr *rows;
137 /* The earliest absolute cycle of an insn in the partial schedule. */
138 int min_cycle;
140 /* The latest absolute cycle of an insn in the partial schedule. */
141 int max_cycle;
143 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
146 /* We use this to record all the register replacements we do in
147 the kernel so we can undo SMS if it is not profitable. */
148 struct undo_replace_buff_elem
150 rtx insn;
151 rtx orig_reg;
152 rtx new_reg;
153 struct undo_replace_buff_elem *next;
158 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
159 static void free_partial_schedule (partial_schedule_ptr);
160 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
161 void print_partial_schedule (partial_schedule_ptr, FILE *);
162 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
163 ddg_node_ptr node, int cycle,
164 sbitmap must_precede,
165 sbitmap must_follow);
166 static void rotate_partial_schedule (partial_schedule_ptr, int);
167 void set_row_column_for_ps (partial_schedule_ptr);
168 static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
171 /* This page defines constants and structures for the modulo scheduling
172 driver. */
174 /* As in haifa-sched.c: */
175 /* issue_rate is the number of insns that can be scheduled in the same
176 machine cycle. It can be defined in the config/mach/mach.h file,
177 otherwise we set it to 1. */
179 static int issue_rate;
181 static int sms_order_nodes (ddg_ptr, int, int * result);
182 static void set_node_sched_params (ddg_ptr);
183 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
184 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
185 static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
186 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
187 int from_stage, int to_stage,
188 int is_prolog);
190 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
191 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
192 #define SCHED_FIRST_REG_MOVE(x) \
193 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
194 #define SCHED_NREG_MOVES(x) \
195 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
196 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
197 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
198 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
200 /* The scheduling parameters held for each node. */
201 typedef struct node_sched_params
203 int asap; /* A lower-bound on the absolute scheduling cycle. */
204 int time; /* The absolute scheduling cycle (time >= asap). */
206 /* The following field (first_reg_move) is a pointer to the first
207 register-move instruction added to handle the modulo-variable-expansion
208 of the register defined by this node. This register-move copies the
209 original register defined by the node. */
210 rtx first_reg_move;
212 /* The number of register-move instructions added, immediately preceding
213 first_reg_move. */
214 int nreg_moves;
216 int row; /* Holds time % ii. */
217 int stage; /* Holds time / ii. */
219 /* The column of a node inside the ps. If nodes u, v are on the same row,
220 u will precede v if column (u) < column (v). */
221 int column;
222 } *node_sched_params_ptr;
225 /* The following three functions are copied from the current scheduler
226 code in order to use sched_analyze() for computing the dependencies.
227 They are used when initializing the sched_info structure. */
228 static const char *
229 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
231 static char tmp[80];
233 sprintf (tmp, "i%4d", INSN_UID (insn));
234 return tmp;
237 static void
238 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
239 regset cond_exec ATTRIBUTE_UNUSED,
240 regset used ATTRIBUTE_UNUSED,
241 regset set ATTRIBUTE_UNUSED)
245 static struct sched_info sms_sched_info =
247 NULL,
248 NULL,
249 NULL,
250 NULL,
251 NULL,
252 sms_print_insn,
253 NULL,
254 compute_jump_reg_dependencies,
255 NULL, NULL,
256 NULL, NULL,
257 0, 0, 0,
259 NULL, NULL, NULL, NULL, NULL,
264 /* Return the register decremented and tested in INSN,
265 or zero if it is not a decrement-and-branch insn. */
267 static rtx
268 doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
270 #ifdef HAVE_doloop_end
271 rtx pattern, reg, condition;
273 if (! JUMP_P (insn))
274 return NULL_RTX;
276 pattern = PATTERN (insn);
277 condition = doloop_condition_get (pattern);
278 if (! condition)
279 return NULL_RTX;
281 if (REG_P (XEXP (condition, 0)))
282 reg = XEXP (condition, 0);
283 else if (GET_CODE (XEXP (condition, 0)) == PLUS
284 && REG_P (XEXP (XEXP (condition, 0), 0)))
285 reg = XEXP (XEXP (condition, 0), 0);
286 else
287 gcc_unreachable ();
289 return reg;
290 #else
291 return NULL_RTX;
292 #endif
295 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
296 that the number of iterations is a compile-time constant. If so,
297 return the rtx that sets COUNT_REG to a constant, and set COUNT to
298 this constant. Otherwise return 0. */
299 static rtx
300 const_iteration_count (rtx count_reg, basic_block pre_header,
301 HOST_WIDEST_INT * count)
303 rtx insn;
304 rtx head, tail;
306 if (! pre_header)
307 return NULL_RTX;
309 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
311 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
312 if (INSN_P (insn) && single_set (insn) &&
313 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
315 rtx pat = single_set (insn);
317 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
319 *count = INTVAL (SET_SRC (pat));
320 return insn;
323 return NULL_RTX;
326 return NULL_RTX;
329 /* A very simple resource-based lower bound on the initiation interval.
330 ??? Improve the accuracy of this bound by considering the
331 utilization of various units. */
332 static int
333 res_MII (ddg_ptr g)
335 return (g->num_nodes / issue_rate);
339 /* Points to the array that contains the sched data for each node. */
340 static node_sched_params_ptr node_sched_params;
342 /* Allocate sched_params for each node and initialize it. Assumes that
343 the aux field of each node contain the asap bound (computed earlier),
344 and copies it into the sched_params field. */
345 static void
346 set_node_sched_params (ddg_ptr g)
348 int i;
350 /* Allocate for each node in the DDG a place to hold the "sched_data". */
351 /* Initialize ASAP/ALAP/HIGHT to zero. */
352 node_sched_params = (node_sched_params_ptr)
353 xcalloc (g->num_nodes,
354 sizeof (struct node_sched_params));
356 /* Set the pointer of the general data of the node to point to the
357 appropriate sched_params structure. */
358 for (i = 0; i < g->num_nodes; i++)
360 /* Watch out for aliasing problems? */
361 node_sched_params[i].asap = g->nodes[i].aux.count;
362 g->nodes[i].aux.info = &node_sched_params[i];
366 static void
367 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
369 int i;
371 if (! file)
372 return;
373 for (i = 0; i < num_nodes; i++)
375 node_sched_params_ptr nsp = &node_sched_params[i];
376 rtx reg_move = nsp->first_reg_move;
377 int j;
379 fprintf (file, "Node = %d; INSN = %d\n", i,
380 (INSN_UID (g->nodes[i].insn)));
381 fprintf (file, " asap = %d:\n", nsp->asap);
382 fprintf (file, " time = %d:\n", nsp->time);
383 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
384 for (j = 0; j < nsp->nreg_moves; j++)
386 fprintf (file, " reg_move = ");
387 print_rtl_single (file, reg_move);
388 reg_move = PREV_INSN (reg_move);
394 Breaking intra-loop register anti-dependences:
395 Each intra-loop register anti-dependence implies a cross-iteration true
396 dependence of distance 1. Therefore, we can remove such false dependencies
397 and figure out if the partial schedule broke them by checking if (for a
398 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
399 if so generate a register move. The number of such moves is equal to:
400 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
401 nreg_moves = ----------------------------------- + 1 - { dependence.
402 ii { 1 if not.
404 static struct undo_replace_buff_elem *
405 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
407 ddg_ptr g = ps->g;
408 int ii = ps->ii;
409 int i;
410 struct undo_replace_buff_elem *reg_move_replaces = NULL;
412 for (i = 0; i < g->num_nodes; i++)
414 ddg_node_ptr u = &g->nodes[i];
415 ddg_edge_ptr e;
416 int nreg_moves = 0, i_reg_move;
417 sbitmap *uses_of_defs;
418 rtx last_reg_move;
419 rtx prev_reg, old_reg;
421 /* Compute the number of reg_moves needed for u, by looking at life
422 ranges started at u (excluding self-loops). */
423 for (e = u->out; e; e = e->next_out)
424 if (e->type == TRUE_DEP && e->dest != e->src)
426 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
428 if (e->distance == 1)
429 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
431 /* If dest precedes src in the schedule of the kernel, then dest
432 will read before src writes and we can save one reg_copy. */
433 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
434 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
435 nreg_moves4e--;
437 nreg_moves = MAX (nreg_moves, nreg_moves4e);
440 if (nreg_moves == 0)
441 continue;
443 /* Every use of the register defined by node may require a different
444 copy of this register, depending on the time the use is scheduled.
445 Set a bitmap vector, telling which nodes use each copy of this
446 register. */
447 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
448 sbitmap_vector_zero (uses_of_defs, nreg_moves);
449 for (e = u->out; e; e = e->next_out)
450 if (e->type == TRUE_DEP && e->dest != e->src)
452 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
454 if (e->distance == 1)
455 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
457 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
458 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
459 dest_copy--;
461 if (dest_copy)
462 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
465 /* Now generate the reg_moves, attaching relevant uses to them. */
466 SCHED_NREG_MOVES (u) = nreg_moves;
467 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
468 last_reg_move = u->insn;
470 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
472 unsigned int i_use = 0;
473 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
474 rtx reg_move = gen_move_insn (new_reg, prev_reg);
475 sbitmap_iterator sbi;
477 add_insn_before (reg_move, last_reg_move, NULL);
478 last_reg_move = reg_move;
480 if (!SCHED_FIRST_REG_MOVE (u))
481 SCHED_FIRST_REG_MOVE (u) = reg_move;
483 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
485 struct undo_replace_buff_elem *rep;
487 rep = (struct undo_replace_buff_elem *)
488 xcalloc (1, sizeof (struct undo_replace_buff_elem));
489 rep->insn = g->nodes[i_use].insn;
490 rep->orig_reg = old_reg;
491 rep->new_reg = new_reg;
493 if (! reg_move_replaces)
494 reg_move_replaces = rep;
495 else
497 rep->next = reg_move_replaces;
498 reg_move_replaces = rep;
501 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
502 if (rescan)
503 df_insn_rescan (g->nodes[i_use].insn);
506 prev_reg = new_reg;
508 sbitmap_vector_free (uses_of_defs);
510 return reg_move_replaces;
513 /* Free memory allocated for the undo buffer. */
514 static void
515 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
518 while (reg_move_replaces)
520 struct undo_replace_buff_elem *rep = reg_move_replaces;
522 reg_move_replaces = reg_move_replaces->next;
523 free (rep);
527 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
528 of SCHED_ROW and SCHED_STAGE. */
529 static void
530 normalize_sched_times (partial_schedule_ptr ps)
532 int i;
533 ddg_ptr g = ps->g;
534 int amount = PS_MIN_CYCLE (ps);
535 int ii = ps->ii;
537 /* Don't include the closing branch assuming that it is the last node. */
538 for (i = 0; i < g->num_nodes - 1; i++)
540 ddg_node_ptr u = &g->nodes[i];
541 int normalized_time = SCHED_TIME (u) - amount;
543 gcc_assert (normalized_time >= 0);
545 SCHED_TIME (u) = normalized_time;
546 SCHED_ROW (u) = normalized_time % ii;
547 SCHED_STAGE (u) = normalized_time / ii;
551 /* Set SCHED_COLUMN of each node according to its position in PS. */
552 static void
553 set_columns_for_ps (partial_schedule_ptr ps)
555 int row;
557 for (row = 0; row < ps->ii; row++)
559 ps_insn_ptr cur_insn = ps->rows[row];
560 int column = 0;
562 for (; cur_insn; cur_insn = cur_insn->next_in_row)
563 SCHED_COLUMN (cur_insn->node) = column++;
567 /* Permute the insns according to their order in PS, from row 0 to
568 row ii-1, and position them right before LAST. This schedules
569 the insns of the loop kernel. */
570 static void
571 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
573 int ii = ps->ii;
574 int row;
575 ps_insn_ptr ps_ij;
577 for (row = 0; row < ii ; row++)
578 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
579 if (PREV_INSN (last) != ps_ij->node->insn)
580 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
581 PREV_INSN (last));
584 static void
585 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
586 int to_stage, int for_prolog)
588 int row;
589 ps_insn_ptr ps_ij;
591 for (row = 0; row < ps->ii; row++)
592 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
594 ddg_node_ptr u_node = ps_ij->node;
595 int j, i_reg_moves;
596 rtx reg_move = NULL_RTX;
598 if (for_prolog)
600 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
601 number of reg_moves starting with the second occurrence of
602 u_node, which is generated if its SCHED_STAGE <= to_stage. */
603 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
604 i_reg_moves = MAX (i_reg_moves, 0);
605 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
607 /* The reg_moves start from the *first* reg_move backwards. */
608 if (i_reg_moves)
610 reg_move = SCHED_FIRST_REG_MOVE (u_node);
611 for (j = 1; j < i_reg_moves; j++)
612 reg_move = PREV_INSN (reg_move);
615 else /* It's for the epilog. */
617 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
618 starting to decrease one stage after u_node no longer occurs;
619 that is, generate all reg_moves until
620 SCHED_STAGE (u_node) == from_stage - 1. */
621 i_reg_moves = SCHED_NREG_MOVES (u_node)
622 - (from_stage - SCHED_STAGE (u_node) - 1);
623 i_reg_moves = MAX (i_reg_moves, 0);
624 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
626 /* The reg_moves start from the *last* reg_move forwards. */
627 if (i_reg_moves)
629 reg_move = SCHED_FIRST_REG_MOVE (u_node);
630 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
631 reg_move = PREV_INSN (reg_move);
635 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
636 emit_insn (copy_rtx (PATTERN (reg_move)));
637 if (SCHED_STAGE (u_node) >= from_stage
638 && SCHED_STAGE (u_node) <= to_stage)
639 duplicate_insn_chain (u_node->first_note, u_node->insn);
644 /* Generate the instructions (including reg_moves) for prolog & epilog. */
645 static void
646 generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
648 int i;
649 int last_stage = PS_STAGE_COUNT (ps) - 1;
650 edge e;
652 /* Generate the prolog, inserting its insns on the loop-entry edge. */
653 start_sequence ();
655 if (count_reg)
656 /* Generate a subtract instruction at the beginning of the prolog to
657 adjust the loop count by STAGE_COUNT. */
658 emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
660 for (i = 0; i < last_stage; i++)
661 duplicate_insns_of_cycles (ps, 0, i, 1);
663 /* Put the prolog on the entry edge. */
664 e = loop_preheader_edge (loop);
665 split_edge_and_insert (e, get_insns ());
667 end_sequence ();
669 /* Generate the epilog, inserting its insns on the loop-exit edge. */
670 start_sequence ();
672 for (i = 0; i < last_stage; i++)
673 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
675 /* Put the epilogue on the exit edge. */
676 gcc_assert (single_exit (loop));
677 e = single_exit (loop);
678 split_edge_and_insert (e, get_insns ());
679 end_sequence ();
682 /* Return true if all the BBs of the loop are empty except the
683 loop header. */
684 static bool
685 loop_single_full_bb_p (struct loop *loop)
687 unsigned i;
688 basic_block *bbs = get_loop_body (loop);
690 for (i = 0; i < loop->num_nodes ; i++)
692 rtx head, tail;
693 bool empty_bb = true;
695 if (bbs[i] == loop->header)
696 continue;
698 /* Make sure that basic blocks other than the header
699 have only notes labels or jumps. */
700 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
701 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
703 if (NOTE_P (head) || LABEL_P (head)
704 || (INSN_P (head) && JUMP_P (head)))
705 continue;
706 empty_bb = false;
707 break;
710 if (! empty_bb)
712 free (bbs);
713 return false;
716 free (bbs);
717 return true;
720 /* A simple loop from SMS point of view; it is a loop that is composed of
721 either a single basic block or two BBs - a header and a latch. */
722 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
723 && (EDGE_COUNT (loop->latch->preds) == 1) \
724 && (EDGE_COUNT (loop->latch->succs) == 1))
726 /* Return true if the loop is in its canonical form and false if not.
727 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
728 static bool
729 loop_canon_p (struct loop *loop)
732 if (loop->inner || !loop_outer (loop))
733 return false;
735 if (!single_exit (loop))
737 if (dump_file)
739 rtx insn = BB_END (loop->header);
741 fprintf (dump_file, "SMS loop many exits ");
742 fprintf (dump_file, " %s %d (file, line)\n",
743 insn_file (insn), insn_line (insn));
745 return false;
748 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
750 if (dump_file)
752 rtx insn = BB_END (loop->header);
754 fprintf (dump_file, "SMS loop many BBs. ");
755 fprintf (dump_file, " %s %d (file, line)\n",
756 insn_file (insn), insn_line (insn));
758 return false;
761 return true;
764 /* If there are more than one entry for the loop,
765 make it one by splitting the first entry edge and
766 redirecting the others to the new BB. */
767 static void
768 canon_loop (struct loop *loop)
770 edge e;
771 edge_iterator i;
773 /* Avoid annoying special cases of edges going to exit
774 block. */
775 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
776 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
777 split_edge (e);
779 if (loop->latch == loop->header
780 || EDGE_COUNT (loop->latch->succs) > 1)
782 FOR_EACH_EDGE (e, i, loop->header->preds)
783 if (e->src == loop->latch)
784 break;
785 split_edge (e);
789 /* Probability in % that the sms-ed loop rolls enough so that optimized
790 version may be entered. Just a guess. */
791 #define PROB_SMS_ENOUGH_ITERATIONS 80
793 /* Used to calculate the upper bound of ii. */
794 #define MAXII_FACTOR 2
796 /* Main entry point, perform SMS scheduling on the loops of the function
797 that consist of single basic blocks. */
798 static void
799 sms_schedule (void)
801 static int passes = 0;
802 rtx insn;
803 ddg_ptr *g_arr, g;
804 int * node_order;
805 int maxii;
806 loop_iterator li;
807 partial_schedule_ptr ps;
808 basic_block bb = NULL;
809 struct loop *loop;
810 basic_block condition_bb = NULL;
811 edge latch_edge;
812 gcov_type trip_count = 0;
814 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
815 | LOOPS_HAVE_RECORDED_EXITS);
816 if (number_of_loops () <= 1)
818 loop_optimizer_finalize ();
819 return; /* There are no loops to schedule. */
822 /* Initialize issue_rate. */
823 if (targetm.sched.issue_rate)
825 int temp = reload_completed;
827 reload_completed = 1;
828 issue_rate = targetm.sched.issue_rate ();
829 reload_completed = temp;
831 else
832 issue_rate = 1;
834 /* Initialize the scheduler. */
835 current_sched_info = &sms_sched_info;
837 /* Init Data Flow analysis, to be used in interloop dep calculation. */
838 df_set_flags (DF_LR_RUN_DCE);
839 df_rd_add_problem ();
840 df_note_add_problem ();
841 df_chain_add_problem (DF_DU_CHAIN);
842 df_analyze ();
843 regstat_compute_calls_crossed ();
844 sched_init ();
846 /* Allocate memory to hold the DDG array one entry for each loop.
847 We use loop->num as index into this array. */
848 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
850 /* Build DDGs for all the relevant loops and hold them in G_ARR
851 indexed by the loop index. */
852 FOR_EACH_LOOP (li, loop, 0)
854 rtx head, tail;
855 rtx count_reg;
857 /* For debugging. */
858 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
860 if (dump_file)
861 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
863 break;
866 if (! loop_canon_p (loop))
867 continue;
869 if (! loop_single_full_bb_p (loop))
870 continue;
872 bb = loop->header;
874 get_ebb_head_tail (bb, bb, &head, &tail);
875 latch_edge = loop_latch_edge (loop);
876 gcc_assert (single_exit (loop));
877 if (single_exit (loop)->count)
878 trip_count = latch_edge->count / single_exit (loop)->count;
880 /* Perfrom SMS only on loops that their average count is above threshold. */
882 if ( latch_edge->count
883 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
885 if (dump_file)
887 fprintf (dump_file, " %s %d (file, line)\n",
888 insn_file (tail), insn_line (tail));
889 fprintf (dump_file, "SMS single-bb-loop\n");
890 if (profile_info && flag_branch_probabilities)
892 fprintf (dump_file, "SMS loop-count ");
893 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
894 (HOST_WIDEST_INT) bb->count);
895 fprintf (dump_file, "\n");
896 fprintf (dump_file, "SMS trip-count ");
897 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
898 (HOST_WIDEST_INT) trip_count);
899 fprintf (dump_file, "\n");
900 fprintf (dump_file, "SMS profile-sum-max ");
901 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
902 (HOST_WIDEST_INT) profile_info->sum_max);
903 fprintf (dump_file, "\n");
906 continue;
909 /* Make sure this is a doloop. */
910 if ( !(count_reg = doloop_register_get (tail)))
911 continue;
913 /* Don't handle BBs with calls or barriers, or !single_set insns,
914 or auto-increment insns (to avoid creating invalid reg-moves
915 for the auto-increment insns).
916 ??? Should handle auto-increment insns. */
917 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
918 if (CALL_P (insn)
919 || BARRIER_P (insn)
920 || (INSN_P (insn) && !JUMP_P (insn)
921 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
922 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0))
923 break;
925 if (insn != NEXT_INSN (tail))
927 if (dump_file)
929 if (CALL_P (insn))
930 fprintf (dump_file, "SMS loop-with-call\n");
931 else if (BARRIER_P (insn))
932 fprintf (dump_file, "SMS loop-with-barrier\n");
933 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
934 fprintf (dump_file, "SMS reg inc\n");
935 else
936 fprintf (dump_file, "SMS loop-with-not-single-set\n");
937 print_rtl_single (dump_file, insn);
940 continue;
943 if (! (g = create_ddg (bb, 0)))
945 if (dump_file)
946 fprintf (dump_file, "SMS doloop\n");
947 continue;
950 g_arr[loop->num] = g;
953 /* We don't want to perform SMS on new loops - created by versioning. */
954 FOR_EACH_LOOP (li, loop, 0)
956 rtx head, tail;
957 rtx count_reg, count_init;
958 int mii, rec_mii;
959 unsigned stage_count = 0;
960 HOST_WIDEST_INT loop_count = 0;
962 if (! (g = g_arr[loop->num]))
963 continue;
965 if (dump_file)
966 print_ddg (dump_file, g);
968 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
970 latch_edge = loop_latch_edge (loop);
971 gcc_assert (single_exit (loop));
972 if (single_exit (loop)->count)
973 trip_count = latch_edge->count / single_exit (loop)->count;
975 if (dump_file)
977 fprintf (dump_file, " %s %d (file, line)\n",
978 insn_file (tail), insn_line (tail));
979 fprintf (dump_file, "SMS single-bb-loop\n");
980 if (profile_info && flag_branch_probabilities)
982 fprintf (dump_file, "SMS loop-count ");
983 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
984 (HOST_WIDEST_INT) bb->count);
985 fprintf (dump_file, "\n");
986 fprintf (dump_file, "SMS profile-sum-max ");
987 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
988 (HOST_WIDEST_INT) profile_info->sum_max);
989 fprintf (dump_file, "\n");
991 fprintf (dump_file, "SMS doloop\n");
992 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
993 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
994 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
998 /* In case of th loop have doloop register it gets special
999 handling. */
1000 count_init = NULL_RTX;
1001 if ((count_reg = doloop_register_get (tail)))
1003 basic_block pre_header;
1005 pre_header = loop_preheader_edge (loop)->src;
1006 count_init = const_iteration_count (count_reg, pre_header,
1007 &loop_count);
1009 gcc_assert (count_reg);
1011 if (dump_file && count_init)
1013 fprintf (dump_file, "SMS const-doloop ");
1014 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1015 loop_count);
1016 fprintf (dump_file, "\n");
1019 node_order = XNEWVEC (int, g->num_nodes);
1021 mii = 1; /* Need to pass some estimate of mii. */
1022 rec_mii = sms_order_nodes (g, mii, node_order);
1023 mii = MAX (res_MII (g), rec_mii);
1024 maxii = MAXII_FACTOR * mii;
1026 if (dump_file)
1027 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1028 rec_mii, mii, maxii);
1030 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1031 ASAP. */
1032 set_node_sched_params (g);
1034 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1036 if (ps)
1037 stage_count = PS_STAGE_COUNT (ps);
1039 /* Stage count of 1 means that there is no interleaving between
1040 iterations, let the scheduling passes do the job. */
1041 if (stage_count < 1
1042 || (count_init && (loop_count <= stage_count))
1043 || (flag_branch_probabilities && (trip_count <= stage_count)))
1045 if (dump_file)
1047 fprintf (dump_file, "SMS failed... \n");
1048 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1049 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1050 fprintf (dump_file, ", trip-count=");
1051 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1052 fprintf (dump_file, ")\n");
1054 continue;
1056 else
1058 struct undo_replace_buff_elem *reg_move_replaces;
1060 if (dump_file)
1062 fprintf (dump_file,
1063 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1064 stage_count);
1065 print_partial_schedule (ps, dump_file);
1066 fprintf (dump_file,
1067 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1068 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1071 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1072 the closing_branch was scheduled and should appear in the last (ii-1)
1073 row. Otherwise, we are free to schedule the branch, and we let nodes
1074 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1075 row; this should reduce stage_count to minimum. */
1076 normalize_sched_times (ps);
1077 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1078 set_columns_for_ps (ps);
1080 canon_loop (loop);
1082 /* case the BCT count is not known , Do loop-versioning */
1083 if (count_reg && ! count_init)
1085 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1086 GEN_INT(stage_count));
1087 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1088 * REG_BR_PROB_BASE) / 100;
1090 loop_version (loop, comp_rtx, &condition_bb,
1091 prob, prob, REG_BR_PROB_BASE - prob,
1092 true);
1095 /* Set new iteration count of loop kernel. */
1096 if (count_reg && count_init)
1097 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1098 - stage_count + 1);
1100 /* Now apply the scheduled kernel to the RTL of the loop. */
1101 permute_partial_schedule (ps, g->closing_branch->first_note);
1103 /* Mark this loop as software pipelined so the later
1104 scheduling passes doesn't touch it. */
1105 if (! flag_resched_modulo_sched)
1106 g->bb->flags |= BB_DISABLE_SCHEDULE;
1107 /* The life-info is not valid any more. */
1108 df_set_bb_dirty (g->bb);
1110 reg_move_replaces = generate_reg_moves (ps, true);
1111 if (dump_file)
1112 print_node_sched_params (dump_file, g->num_nodes, g);
1113 /* Generate prolog and epilog. */
1114 if (count_reg && !count_init)
1115 generate_prolog_epilog (ps, loop, count_reg);
1116 else
1117 generate_prolog_epilog (ps, loop, NULL_RTX);
1119 free_undo_replace_buff (reg_move_replaces);
1122 free_partial_schedule (ps);
1123 free (node_sched_params);
1124 free (node_order);
1125 free_ddg (g);
1128 regstat_free_calls_crossed ();
1129 free (g_arr);
1131 /* Release scheduler data, needed until now because of DFA. */
1132 sched_finish ();
1133 loop_optimizer_finalize ();
1136 /* The SMS scheduling algorithm itself
1137 -----------------------------------
1138 Input: 'O' an ordered list of insns of a loop.
1139 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1141 'Q' is the empty Set
1142 'PS' is the partial schedule; it holds the currently scheduled nodes with
1143 their cycle/slot.
1144 'PSP' previously scheduled predecessors.
1145 'PSS' previously scheduled successors.
1146 't(u)' the cycle where u is scheduled.
1147 'l(u)' is the latency of u.
1148 'd(v,u)' is the dependence distance from v to u.
1149 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1150 the node ordering phase.
1151 'check_hardware_resources_conflicts(u, PS, c)'
1152 run a trace around cycle/slot through DFA model
1153 to check resource conflicts involving instruction u
1154 at cycle c given the partial schedule PS.
1155 'add_to_partial_schedule_at_time(u, PS, c)'
1156 Add the node/instruction u to the partial schedule
1157 PS at time c.
1158 'calculate_register_pressure(PS)'
1159 Given a schedule of instructions, calculate the register
1160 pressure it implies. One implementation could be the
1161 maximum number of overlapping live ranges.
1162 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1163 registers available in the hardware.
1165 1. II = MII.
1166 2. PS = empty list
1167 3. for each node u in O in pre-computed order
1168 4. if (PSP(u) != Q && PSS(u) == Q) then
1169 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1170 6. start = Early_start; end = Early_start + II - 1; step = 1
1171 11. else if (PSP(u) == Q && PSS(u) != Q) then
1172 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1173 13. start = Late_start; end = Late_start - II + 1; step = -1
1174 14. else if (PSP(u) != Q && PSS(u) != Q) then
1175 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1176 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1177 17. start = Early_start;
1178 18. end = min(Early_start + II - 1 , Late_start);
1179 19. step = 1
1180 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1181 21. start = ASAP(u); end = start + II - 1; step = 1
1182 22. endif
1184 23. success = false
1185 24. for (c = start ; c != end ; c += step)
1186 25. if check_hardware_resources_conflicts(u, PS, c) then
1187 26. add_to_partial_schedule_at_time(u, PS, c)
1188 27. success = true
1189 28. break
1190 29. endif
1191 30. endfor
1192 31. if (success == false) then
1193 32. II = II + 1
1194 33. if (II > maxII) then
1195 34. finish - failed to schedule
1196 35. endif
1197 36. goto 2.
1198 37. endif
1199 38. endfor
1200 39. if (calculate_register_pressure(PS) > maxRP) then
1201 40. goto 32.
1202 41. endif
1203 42. compute epilogue & prologue
1204 43. finish - succeeded to schedule
1207 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1208 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1209 set to 0 to save compile time. */
1210 #define DFA_HISTORY SMS_DFA_HISTORY
1212 /* Given the partial schedule PS, this function calculates and returns the
1213 cycles in which we can schedule the node with the given index I.
1214 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1215 noticed that there are several cases in which we fail to SMS the loop
1216 because the sched window of a node is empty due to tight data-deps. In
1217 such cases we want to unschedule some of the predecessors/successors
1218 until we get non-empty scheduling window. It returns -1 if the
1219 scheduling window is empty and zero otherwise. */
1221 static int
1222 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1223 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1225 int start, step, end;
1226 ddg_edge_ptr e;
1227 int u = nodes_order [i];
1228 ddg_node_ptr u_node = &ps->g->nodes[u];
1229 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1230 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1231 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1232 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1233 int psp_not_empty;
1234 int pss_not_empty;
1236 /* 1. compute sched window for u (start, end, step). */
1237 sbitmap_zero (psp);
1238 sbitmap_zero (pss);
1239 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1240 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1242 if (psp_not_empty && !pss_not_empty)
1244 int early_start = INT_MIN;
1246 end = INT_MAX;
1247 for (e = u_node->in; e != 0; e = e->next_in)
1249 ddg_node_ptr v_node = e->src;
1250 if (TEST_BIT (sched_nodes, v_node->cuid))
1252 int node_st = SCHED_TIME (v_node)
1253 + e->latency - (e->distance * ii);
1255 early_start = MAX (early_start, node_st);
1257 if (e->data_type == MEM_DEP)
1258 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1261 start = early_start;
1262 end = MIN (end, early_start + ii);
1263 step = 1;
1266 else if (!psp_not_empty && pss_not_empty)
1268 int late_start = INT_MAX;
1270 end = INT_MIN;
1271 for (e = u_node->out; e != 0; e = e->next_out)
1273 ddg_node_ptr v_node = e->dest;
1274 if (TEST_BIT (sched_nodes, v_node->cuid))
1276 late_start = MIN (late_start,
1277 SCHED_TIME (v_node) - e->latency
1278 + (e->distance * ii));
1279 if (e->data_type == MEM_DEP)
1280 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1283 start = late_start;
1284 end = MAX (end, late_start - ii);
1285 step = -1;
1288 else if (psp_not_empty && pss_not_empty)
1290 int early_start = INT_MIN;
1291 int late_start = INT_MAX;
1293 start = INT_MIN;
1294 end = INT_MAX;
1295 for (e = u_node->in; e != 0; e = e->next_in)
1297 ddg_node_ptr v_node = e->src;
1299 if (TEST_BIT (sched_nodes, v_node->cuid))
1301 early_start = MAX (early_start,
1302 SCHED_TIME (v_node) + e->latency
1303 - (e->distance * ii));
1304 if (e->data_type == MEM_DEP)
1305 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1308 for (e = u_node->out; e != 0; e = e->next_out)
1310 ddg_node_ptr v_node = e->dest;
1312 if (TEST_BIT (sched_nodes, v_node->cuid))
1314 late_start = MIN (late_start,
1315 SCHED_TIME (v_node) - e->latency
1316 + (e->distance * ii));
1317 if (e->data_type == MEM_DEP)
1318 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1321 start = MAX (start, early_start);
1322 end = MIN (end, MIN (early_start + ii, late_start + 1));
1323 step = 1;
1325 else /* psp is empty && pss is empty. */
1327 start = SCHED_ASAP (u_node);
1328 end = start + ii;
1329 step = 1;
1332 *start_p = start;
1333 *step_p = step;
1334 *end_p = end;
1335 sbitmap_free (psp);
1336 sbitmap_free (pss);
1338 if ((start >= end && step == 1) || (start <= end && step == -1))
1339 return -1;
1340 else
1341 return 0;
1344 /* This function implements the scheduling algorithm for SMS according to the
1345 above algorithm. */
1346 static partial_schedule_ptr
1347 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1349 int ii = mii;
1350 int i, c, success;
1351 int try_again_with_larger_ii = true;
1352 int num_nodes = g->num_nodes;
1353 ddg_edge_ptr e;
1354 int start, end, step; /* Place together into one struct? */
1355 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1356 sbitmap must_precede = sbitmap_alloc (num_nodes);
1357 sbitmap must_follow = sbitmap_alloc (num_nodes);
1358 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1360 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1362 sbitmap_ones (tobe_scheduled);
1363 sbitmap_zero (sched_nodes);
1365 while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1366 || try_again_with_larger_ii ) && ii < maxii)
1368 int j;
1369 bool unscheduled_nodes = false;
1371 if (dump_file)
1372 fprintf (dump_file, "Starting with ii=%d\n", ii);
1373 if (try_again_with_larger_ii)
1375 try_again_with_larger_ii = false;
1376 sbitmap_zero (sched_nodes);
1379 for (i = 0; i < num_nodes; i++)
1381 int u = nodes_order[i];
1382 ddg_node_ptr u_node = &ps->g->nodes[u];
1383 rtx insn = u_node->insn;
1385 if (!INSN_P (insn))
1387 RESET_BIT (tobe_scheduled, u);
1388 continue;
1391 if (JUMP_P (insn)) /* Closing branch handled later. */
1393 RESET_BIT (tobe_scheduled, u);
1394 continue;
1397 if (TEST_BIT (sched_nodes, u))
1398 continue;
1400 /* Try to get non-empty scheduling window. */
1401 j = i;
1402 while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1403 && j > 0)
1405 unscheduled_nodes = true;
1406 if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1407 || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1409 ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1410 RESET_BIT (sched_nodes, nodes_order [j - 1]);
1412 j--;
1414 if (j < 0)
1416 /* ??? Try backtracking instead of immediately ii++? */
1417 ii++;
1418 try_again_with_larger_ii = true;
1419 reset_partial_schedule (ps, ii);
1420 break;
1422 /* 2. Try scheduling u in window. */
1423 if (dump_file)
1424 fprintf (dump_file,
1425 "Trying to schedule node %d in (%d .. %d) step %d\n",
1426 u, start, end, step);
1428 /* use must_follow & must_precede bitmaps to determine order
1429 of nodes within the cycle. */
1430 sbitmap_zero (must_precede);
1431 sbitmap_zero (must_follow);
1432 /* TODO: We can add an insn to the must_precede or must_follow
1433 bitmaps only if it has tight dependence to U and they
1434 both scheduled in the same row. The current check is less
1435 conservative and content with the fact that both U and the
1436 insn are scheduled in the same row. */
1437 for (e = u_node->in; e != 0; e = e->next_in)
1438 if (TEST_BIT (sched_nodes, e->src->cuid)
1439 && (SMODULO (SCHED_TIME (e->src), ii) == SMODULO (start, ii)))
1440 SET_BIT (must_precede, e->src->cuid);
1442 for (e = u_node->out; e != 0; e = e->next_out)
1443 if (TEST_BIT (sched_nodes, e->dest->cuid)
1444 && (SMODULO (SCHED_TIME (e->dest), ii) ==
1445 SMODULO ((end - step), ii)))
1446 SET_BIT (must_follow, e->dest->cuid);
1448 success = 0;
1449 if ((step > 0 && start < end) || (step < 0 && start > end))
1450 for (c = start; c != end; c += step)
1452 ps_insn_ptr psi;
1454 psi = ps_add_node_check_conflicts (ps, u_node, c,
1455 must_precede,
1456 must_follow);
1458 if (psi)
1460 SCHED_TIME (u_node) = c;
1461 SET_BIT (sched_nodes, u);
1462 success = 1;
1463 if (dump_file)
1464 fprintf (dump_file, "Schedule in %d\n", c);
1465 break;
1468 if (!success)
1470 /* ??? Try backtracking instead of immediately ii++? */
1471 ii++;
1472 try_again_with_larger_ii = true;
1473 reset_partial_schedule (ps, ii);
1474 break;
1476 if (unscheduled_nodes)
1477 break;
1479 /* ??? If (success), check register pressure estimates. */
1480 } /* Continue with next node. */
1481 } /* While try_again_with_larger_ii. */
1483 sbitmap_free (sched_nodes);
1484 sbitmap_free (must_precede);
1485 sbitmap_free (must_follow);
1486 sbitmap_free (tobe_scheduled);
1488 if (ii >= maxii)
1490 free_partial_schedule (ps);
1491 ps = NULL;
1493 return ps;
1497 /* This page implements the algorithm for ordering the nodes of a DDG
1498 for modulo scheduling, activated through the
1499 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1501 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1502 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1503 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1504 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1505 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1506 #define DEPTH(x) (ASAP ((x)))
1508 typedef struct node_order_params * nopa;
1510 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1511 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1512 static nopa calculate_order_params (ddg_ptr, int mii);
1513 static int find_max_asap (ddg_ptr, sbitmap);
1514 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1515 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1517 enum sms_direction {BOTTOMUP, TOPDOWN};
1519 struct node_order_params
1521 int asap;
1522 int alap;
1523 int height;
1526 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1527 static void
1528 check_nodes_order (int *node_order, int num_nodes)
1530 int i;
1531 sbitmap tmp = sbitmap_alloc (num_nodes);
1533 sbitmap_zero (tmp);
1535 for (i = 0; i < num_nodes; i++)
1537 int u = node_order[i];
1539 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1541 SET_BIT (tmp, u);
1544 sbitmap_free (tmp);
1547 /* Order the nodes of G for scheduling and pass the result in
1548 NODE_ORDER. Also set aux.count of each node to ASAP.
1549 Return the recMII for the given DDG. */
1550 static int
1551 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1553 int i;
1554 int rec_mii = 0;
1555 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1557 nopa nops = calculate_order_params (g, mii);
1559 if (dump_file)
1560 print_sccs (dump_file, sccs, g);
1562 order_nodes_of_sccs (sccs, node_order);
1564 if (sccs->num_sccs > 0)
1565 /* First SCC has the largest recurrence_length. */
1566 rec_mii = sccs->sccs[0]->recurrence_length;
1568 /* Save ASAP before destroying node_order_params. */
1569 for (i = 0; i < g->num_nodes; i++)
1571 ddg_node_ptr v = &g->nodes[i];
1572 v->aux.count = ASAP (v);
1575 free (nops);
1576 free_ddg_all_sccs (sccs);
1577 check_nodes_order (node_order, g->num_nodes);
1579 return rec_mii;
1582 static void
1583 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1585 int i, pos = 0;
1586 ddg_ptr g = all_sccs->ddg;
1587 int num_nodes = g->num_nodes;
1588 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1589 sbitmap on_path = sbitmap_alloc (num_nodes);
1590 sbitmap tmp = sbitmap_alloc (num_nodes);
1591 sbitmap ones = sbitmap_alloc (num_nodes);
1593 sbitmap_zero (prev_sccs);
1594 sbitmap_ones (ones);
1596 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1597 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1598 for (i = 0; i < all_sccs->num_sccs; i++)
1600 ddg_scc_ptr scc = all_sccs->sccs[i];
1602 /* Add nodes on paths from previous SCCs to the current SCC. */
1603 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1604 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1606 /* Add nodes on paths from the current SCC to previous SCCs. */
1607 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1608 sbitmap_a_or_b (tmp, tmp, on_path);
1610 /* Remove nodes of previous SCCs from current extended SCC. */
1611 sbitmap_difference (tmp, tmp, prev_sccs);
1613 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1614 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1617 /* Handle the remaining nodes that do not belong to any scc. Each call
1618 to order_nodes_in_scc handles a single connected component. */
1619 while (pos < g->num_nodes)
1621 sbitmap_difference (tmp, ones, prev_sccs);
1622 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1624 sbitmap_free (prev_sccs);
1625 sbitmap_free (on_path);
1626 sbitmap_free (tmp);
1627 sbitmap_free (ones);
1630 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1631 static struct node_order_params *
1632 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1634 int u;
1635 int max_asap;
1636 int num_nodes = g->num_nodes;
1637 ddg_edge_ptr e;
1638 /* Allocate a place to hold ordering params for each node in the DDG. */
1639 nopa node_order_params_arr;
1641 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1642 node_order_params_arr = (nopa) xcalloc (num_nodes,
1643 sizeof (struct node_order_params));
1645 /* Set the aux pointer of each node to point to its order_params structure. */
1646 for (u = 0; u < num_nodes; u++)
1647 g->nodes[u].aux.info = &node_order_params_arr[u];
1649 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1650 calculate ASAP, ALAP, mobility, distance, and height for each node
1651 in the dependence (direct acyclic) graph. */
1653 /* We assume that the nodes in the array are in topological order. */
1655 max_asap = 0;
1656 for (u = 0; u < num_nodes; u++)
1658 ddg_node_ptr u_node = &g->nodes[u];
1660 ASAP (u_node) = 0;
1661 for (e = u_node->in; e; e = e->next_in)
1662 if (e->distance == 0)
1663 ASAP (u_node) = MAX (ASAP (u_node),
1664 ASAP (e->src) + e->latency);
1665 max_asap = MAX (max_asap, ASAP (u_node));
1668 for (u = num_nodes - 1; u > -1; u--)
1670 ddg_node_ptr u_node = &g->nodes[u];
1672 ALAP (u_node) = max_asap;
1673 HEIGHT (u_node) = 0;
1674 for (e = u_node->out; e; e = e->next_out)
1675 if (e->distance == 0)
1677 ALAP (u_node) = MIN (ALAP (u_node),
1678 ALAP (e->dest) - e->latency);
1679 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1680 HEIGHT (e->dest) + e->latency);
1684 return node_order_params_arr;
1687 static int
1688 find_max_asap (ddg_ptr g, sbitmap nodes)
1690 unsigned int u = 0;
1691 int max_asap = -1;
1692 int result = -1;
1693 sbitmap_iterator sbi;
1695 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1697 ddg_node_ptr u_node = &g->nodes[u];
1699 if (max_asap < ASAP (u_node))
1701 max_asap = ASAP (u_node);
1702 result = u;
1705 return result;
1708 static int
1709 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1711 unsigned int u = 0;
1712 int max_hv = -1;
1713 int min_mob = INT_MAX;
1714 int result = -1;
1715 sbitmap_iterator sbi;
1717 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1719 ddg_node_ptr u_node = &g->nodes[u];
1721 if (max_hv < HEIGHT (u_node))
1723 max_hv = HEIGHT (u_node);
1724 min_mob = MOB (u_node);
1725 result = u;
1727 else if ((max_hv == HEIGHT (u_node))
1728 && (min_mob > MOB (u_node)))
1730 min_mob = MOB (u_node);
1731 result = u;
1734 return result;
1737 static int
1738 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1740 unsigned int u = 0;
1741 int max_dv = -1;
1742 int min_mob = INT_MAX;
1743 int result = -1;
1744 sbitmap_iterator sbi;
1746 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1748 ddg_node_ptr u_node = &g->nodes[u];
1750 if (max_dv < DEPTH (u_node))
1752 max_dv = DEPTH (u_node);
1753 min_mob = MOB (u_node);
1754 result = u;
1756 else if ((max_dv == DEPTH (u_node))
1757 && (min_mob > MOB (u_node)))
1759 min_mob = MOB (u_node);
1760 result = u;
1763 return result;
1766 /* Places the nodes of SCC into the NODE_ORDER array starting
1767 at position POS, according to the SMS ordering algorithm.
1768 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1769 the NODE_ORDER array, starting from position zero. */
1770 static int
1771 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1772 int * node_order, int pos)
1774 enum sms_direction dir;
1775 int num_nodes = g->num_nodes;
1776 sbitmap workset = sbitmap_alloc (num_nodes);
1777 sbitmap tmp = sbitmap_alloc (num_nodes);
1778 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1779 sbitmap predecessors = sbitmap_alloc (num_nodes);
1780 sbitmap successors = sbitmap_alloc (num_nodes);
1782 sbitmap_zero (predecessors);
1783 find_predecessors (predecessors, g, nodes_ordered);
1785 sbitmap_zero (successors);
1786 find_successors (successors, g, nodes_ordered);
1788 sbitmap_zero (tmp);
1789 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1791 sbitmap_copy (workset, tmp);
1792 dir = BOTTOMUP;
1794 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1796 sbitmap_copy (workset, tmp);
1797 dir = TOPDOWN;
1799 else
1801 int u;
1803 sbitmap_zero (workset);
1804 if ((u = find_max_asap (g, scc)) >= 0)
1805 SET_BIT (workset, u);
1806 dir = BOTTOMUP;
1809 sbitmap_zero (zero_bitmap);
1810 while (!sbitmap_equal (workset, zero_bitmap))
1812 int v;
1813 ddg_node_ptr v_node;
1814 sbitmap v_node_preds;
1815 sbitmap v_node_succs;
1817 if (dir == TOPDOWN)
1819 while (!sbitmap_equal (workset, zero_bitmap))
1821 v = find_max_hv_min_mob (g, workset);
1822 v_node = &g->nodes[v];
1823 node_order[pos++] = v;
1824 v_node_succs = NODE_SUCCESSORS (v_node);
1825 sbitmap_a_and_b (tmp, v_node_succs, scc);
1827 /* Don't consider the already ordered successors again. */
1828 sbitmap_difference (tmp, tmp, nodes_ordered);
1829 sbitmap_a_or_b (workset, workset, tmp);
1830 RESET_BIT (workset, v);
1831 SET_BIT (nodes_ordered, v);
1833 dir = BOTTOMUP;
1834 sbitmap_zero (predecessors);
1835 find_predecessors (predecessors, g, nodes_ordered);
1836 sbitmap_a_and_b (workset, predecessors, scc);
1838 else
1840 while (!sbitmap_equal (workset, zero_bitmap))
1842 v = find_max_dv_min_mob (g, workset);
1843 v_node = &g->nodes[v];
1844 node_order[pos++] = v;
1845 v_node_preds = NODE_PREDECESSORS (v_node);
1846 sbitmap_a_and_b (tmp, v_node_preds, scc);
1848 /* Don't consider the already ordered predecessors again. */
1849 sbitmap_difference (tmp, tmp, nodes_ordered);
1850 sbitmap_a_or_b (workset, workset, tmp);
1851 RESET_BIT (workset, v);
1852 SET_BIT (nodes_ordered, v);
1854 dir = TOPDOWN;
1855 sbitmap_zero (successors);
1856 find_successors (successors, g, nodes_ordered);
1857 sbitmap_a_and_b (workset, successors, scc);
1860 sbitmap_free (tmp);
1861 sbitmap_free (workset);
1862 sbitmap_free (zero_bitmap);
1863 sbitmap_free (predecessors);
1864 sbitmap_free (successors);
1865 return pos;
1869 /* This page contains functions for manipulating partial-schedules during
1870 modulo scheduling. */
1872 /* Create a partial schedule and allocate a memory to hold II rows. */
1874 static partial_schedule_ptr
1875 create_partial_schedule (int ii, ddg_ptr g, int history)
1877 partial_schedule_ptr ps = XNEW (struct partial_schedule);
1878 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1879 ps->ii = ii;
1880 ps->history = history;
1881 ps->min_cycle = INT_MAX;
1882 ps->max_cycle = INT_MIN;
1883 ps->g = g;
1885 return ps;
1888 /* Free the PS_INSNs in rows array of the given partial schedule.
1889 ??? Consider caching the PS_INSN's. */
1890 static void
1891 free_ps_insns (partial_schedule_ptr ps)
1893 int i;
1895 for (i = 0; i < ps->ii; i++)
1897 while (ps->rows[i])
1899 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1901 free (ps->rows[i]);
1902 ps->rows[i] = ps_insn;
1904 ps->rows[i] = NULL;
1908 /* Free all the memory allocated to the partial schedule. */
1910 static void
1911 free_partial_schedule (partial_schedule_ptr ps)
1913 if (!ps)
1914 return;
1915 free_ps_insns (ps);
1916 free (ps->rows);
1917 free (ps);
1920 /* Clear the rows array with its PS_INSNs, and create a new one with
1921 NEW_II rows. */
1923 static void
1924 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1926 if (!ps)
1927 return;
1928 free_ps_insns (ps);
1929 if (new_ii == ps->ii)
1930 return;
1931 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
1932 * sizeof (ps_insn_ptr));
1933 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
1934 ps->ii = new_ii;
1935 ps->min_cycle = INT_MAX;
1936 ps->max_cycle = INT_MIN;
1939 /* Prints the partial schedule as an ii rows array, for each rows
1940 print the ids of the insns in it. */
1941 void
1942 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
1944 int i;
1946 for (i = 0; i < ps->ii; i++)
1948 ps_insn_ptr ps_i = ps->rows[i];
1950 fprintf (dump, "\n[CYCLE %d ]: ", i);
1951 while (ps_i)
1953 fprintf (dump, "%d, ",
1954 INSN_UID (ps_i->node->insn));
1955 ps_i = ps_i->next_in_row;
1960 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1961 static ps_insn_ptr
1962 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
1964 ps_insn_ptr ps_i = XNEW (struct ps_insn);
1966 ps_i->node = node;
1967 ps_i->next_in_row = NULL;
1968 ps_i->prev_in_row = NULL;
1969 ps_i->row_rest_count = rest_count;
1970 ps_i->cycle = cycle;
1972 return ps_i;
1976 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1977 node is not found in the partial schedule, else returns true. */
1978 static bool
1979 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1981 int row;
1983 if (!ps || !ps_i)
1984 return false;
1986 row = SMODULO (ps_i->cycle, ps->ii);
1987 if (! ps_i->prev_in_row)
1989 if (ps_i != ps->rows[row])
1990 return false;
1992 ps->rows[row] = ps_i->next_in_row;
1993 if (ps->rows[row])
1994 ps->rows[row]->prev_in_row = NULL;
1996 else
1998 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
1999 if (ps_i->next_in_row)
2000 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2002 free (ps_i);
2003 return true;
2006 /* Unlike what literature describes for modulo scheduling (which focuses
2007 on VLIW machines) the order of the instructions inside a cycle is
2008 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2009 where the current instruction should go relative to the already
2010 scheduled instructions in the given cycle. Go over these
2011 instructions and find the first possible column to put it in. */
2012 static bool
2013 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2014 sbitmap must_precede, sbitmap must_follow)
2016 ps_insn_ptr next_ps_i;
2017 ps_insn_ptr first_must_follow = NULL;
2018 ps_insn_ptr last_must_precede = NULL;
2019 int row;
2021 if (! ps_i)
2022 return false;
2024 row = SMODULO (ps_i->cycle, ps->ii);
2026 /* Find the first must follow and the last must precede
2027 and insert the node immediately after the must precede
2028 but make sure that it there is no must follow after it. */
2029 for (next_ps_i = ps->rows[row];
2030 next_ps_i;
2031 next_ps_i = next_ps_i->next_in_row)
2033 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2034 && ! first_must_follow)
2035 first_must_follow = next_ps_i;
2036 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2038 /* If we have already met a node that must follow, then
2039 there is no possible column. */
2040 if (first_must_follow)
2041 return false;
2042 else
2043 last_must_precede = next_ps_i;
2047 /* Now insert the node after INSERT_AFTER_PSI. */
2049 if (! last_must_precede)
2051 ps_i->next_in_row = ps->rows[row];
2052 ps_i->prev_in_row = NULL;
2053 if (ps_i->next_in_row)
2054 ps_i->next_in_row->prev_in_row = ps_i;
2055 ps->rows[row] = ps_i;
2057 else
2059 ps_i->next_in_row = last_must_precede->next_in_row;
2060 last_must_precede->next_in_row = ps_i;
2061 ps_i->prev_in_row = last_must_precede;
2062 if (ps_i->next_in_row)
2063 ps_i->next_in_row->prev_in_row = ps_i;
2066 return true;
2069 /* Advances the PS_INSN one column in its current row; returns false
2070 in failure and true in success. Bit N is set in MUST_FOLLOW if
2071 the node with cuid N must be come after the node pointed to by
2072 PS_I when scheduled in the same cycle. */
2073 static int
2074 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2075 sbitmap must_follow)
2077 ps_insn_ptr prev, next;
2078 int row;
2079 ddg_node_ptr next_node;
2081 if (!ps || !ps_i)
2082 return false;
2084 row = SMODULO (ps_i->cycle, ps->ii);
2086 if (! ps_i->next_in_row)
2087 return false;
2089 next_node = ps_i->next_in_row->node;
2091 /* Check if next_in_row is dependent on ps_i, both having same sched
2092 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2093 if (TEST_BIT (must_follow, next_node->cuid))
2094 return false;
2096 /* Advance PS_I over its next_in_row in the doubly linked list. */
2097 prev = ps_i->prev_in_row;
2098 next = ps_i->next_in_row;
2100 if (ps_i == ps->rows[row])
2101 ps->rows[row] = next;
2103 ps_i->next_in_row = next->next_in_row;
2105 if (next->next_in_row)
2106 next->next_in_row->prev_in_row = ps_i;
2108 next->next_in_row = ps_i;
2109 ps_i->prev_in_row = next;
2111 next->prev_in_row = prev;
2112 if (prev)
2113 prev->next_in_row = next;
2115 return true;
2118 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2119 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2120 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2121 before/after (respectively) the node pointed to by PS_I when scheduled
2122 in the same cycle. */
2123 static ps_insn_ptr
2124 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2125 sbitmap must_precede, sbitmap must_follow)
2127 ps_insn_ptr ps_i;
2128 int rest_count = 1;
2129 int row = SMODULO (cycle, ps->ii);
2131 if (ps->rows[row]
2132 && ps->rows[row]->row_rest_count >= issue_rate)
2133 return NULL;
2135 if (ps->rows[row])
2136 rest_count += ps->rows[row]->row_rest_count;
2138 ps_i = create_ps_insn (node, rest_count, cycle);
2140 /* Finds and inserts PS_I according to MUST_FOLLOW and
2141 MUST_PRECEDE. */
2142 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2144 free (ps_i);
2145 return NULL;
2148 return ps_i;
2151 /* Advance time one cycle. Assumes DFA is being used. */
2152 static void
2153 advance_one_cycle (void)
2155 if (targetm.sched.dfa_pre_cycle_insn)
2156 state_transition (curr_state,
2157 targetm.sched.dfa_pre_cycle_insn ());
2159 state_transition (curr_state, NULL);
2161 if (targetm.sched.dfa_post_cycle_insn)
2162 state_transition (curr_state,
2163 targetm.sched.dfa_post_cycle_insn ());
2168 /* Checks if PS has resource conflicts according to DFA, starting from
2169 FROM cycle to TO cycle; returns true if there are conflicts and false
2170 if there are no conflicts. Assumes DFA is being used. */
2171 static int
2172 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2174 int cycle;
2176 state_reset (curr_state);
2178 for (cycle = from; cycle <= to; cycle++)
2180 ps_insn_ptr crr_insn;
2181 /* Holds the remaining issue slots in the current row. */
2182 int can_issue_more = issue_rate;
2184 /* Walk through the DFA for the current row. */
2185 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2186 crr_insn;
2187 crr_insn = crr_insn->next_in_row)
2189 rtx insn = crr_insn->node->insn;
2191 if (!INSN_P (insn))
2192 continue;
2194 /* Check if there is room for the current insn. */
2195 if (!can_issue_more || state_dead_lock_p (curr_state))
2196 return true;
2198 /* Update the DFA state and return with failure if the DFA found
2199 recource conflicts. */
2200 if (state_transition (curr_state, insn) >= 0)
2201 return true;
2203 if (targetm.sched.variable_issue)
2204 can_issue_more =
2205 targetm.sched.variable_issue (sched_dump, sched_verbose,
2206 insn, can_issue_more);
2207 /* A naked CLOBBER or USE generates no instruction, so don't
2208 let them consume issue slots. */
2209 else if (GET_CODE (PATTERN (insn)) != USE
2210 && GET_CODE (PATTERN (insn)) != CLOBBER)
2211 can_issue_more--;
2214 /* Advance the DFA to the next cycle. */
2215 advance_one_cycle ();
2217 return false;
2220 /* Checks if the given node causes resource conflicts when added to PS at
2221 cycle C. If not the node is added to PS and returned; otherwise zero
2222 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2223 cuid N must be come before/after (respectively) the node pointed to by
2224 PS_I when scheduled in the same cycle. */
2225 ps_insn_ptr
2226 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2227 int c, sbitmap must_precede,
2228 sbitmap must_follow)
2230 int has_conflicts = 0;
2231 ps_insn_ptr ps_i;
2233 /* First add the node to the PS, if this succeeds check for
2234 conflicts, trying different issue slots in the same row. */
2235 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2236 return NULL; /* Failed to insert the node at the given cycle. */
2238 has_conflicts = ps_has_conflicts (ps, c, c)
2239 || (ps->history > 0
2240 && ps_has_conflicts (ps,
2241 c - ps->history,
2242 c + ps->history));
2244 /* Try different issue slots to find one that the given node can be
2245 scheduled in without conflicts. */
2246 while (has_conflicts)
2248 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2249 break;
2250 has_conflicts = ps_has_conflicts (ps, c, c)
2251 || (ps->history > 0
2252 && ps_has_conflicts (ps,
2253 c - ps->history,
2254 c + ps->history));
2257 if (has_conflicts)
2259 remove_node_from_ps (ps, ps_i);
2260 return NULL;
2263 ps->min_cycle = MIN (ps->min_cycle, c);
2264 ps->max_cycle = MAX (ps->max_cycle, c);
2265 return ps_i;
2268 /* Rotate the rows of PS such that insns scheduled at time
2269 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2270 void
2271 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2273 int i, row, backward_rotates;
2274 int last_row = ps->ii - 1;
2276 if (start_cycle == 0)
2277 return;
2279 backward_rotates = SMODULO (start_cycle, ps->ii);
2281 /* Revisit later and optimize this into a single loop. */
2282 for (i = 0; i < backward_rotates; i++)
2284 ps_insn_ptr first_row = ps->rows[0];
2286 for (row = 0; row < last_row; row++)
2287 ps->rows[row] = ps->rows[row+1];
2289 ps->rows[last_row] = first_row;
2292 ps->max_cycle -= start_cycle;
2293 ps->min_cycle -= start_cycle;
2296 /* Remove the node N from the partial schedule PS; because we restart the DFA
2297 each time we want to check for resource conflicts; this is equivalent to
2298 unscheduling the node N. */
2299 static bool
2300 ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2302 ps_insn_ptr ps_i;
2303 int row = SMODULO (SCHED_TIME (n), ps->ii);
2305 if (row < 0 || row > ps->ii)
2306 return false;
2308 for (ps_i = ps->rows[row];
2309 ps_i && ps_i->node != n;
2310 ps_i = ps_i->next_in_row);
2311 if (!ps_i)
2312 return false;
2314 return remove_node_from_ps (ps, ps_i);
2316 #endif /* INSN_SCHEDULING */
2318 static bool
2319 gate_handle_sms (void)
2321 return (optimize > 0 && flag_modulo_sched);
2325 /* Run instruction scheduler. */
2326 /* Perform SMS module scheduling. */
2327 static unsigned int
2328 rest_of_handle_sms (void)
2330 #ifdef INSN_SCHEDULING
2331 basic_block bb;
2333 /* Collect loop information to be used in SMS. */
2334 cfg_layout_initialize (0);
2335 sms_schedule ();
2337 /* Update the life information, because we add pseudos. */
2338 max_regno = max_reg_num ();
2340 /* Finalize layout changes. */
2341 FOR_EACH_BB (bb)
2342 if (bb->next_bb != EXIT_BLOCK_PTR)
2343 bb->aux = bb->next_bb;
2344 free_dominance_info (CDI_DOMINATORS);
2345 cfg_layout_finalize ();
2346 #endif /* INSN_SCHEDULING */
2347 return 0;
2350 struct tree_opt_pass pass_sms =
2352 "sms", /* name */
2353 gate_handle_sms, /* gate */
2354 rest_of_handle_sms, /* execute */
2355 NULL, /* sub */
2356 NULL, /* next */
2357 0, /* static_pass_number */
2358 TV_SMS, /* tv_id */
2359 0, /* properties_required */
2360 0, /* properties_provided */
2361 0, /* properties_destroyed */
2362 TODO_dump_func, /* todo_flags_start */
2363 TODO_df_finish |
2364 TODO_dump_func |
2365 TODO_ggc_collect, /* todo_flags_finish */
2366 'm' /* letter */