* gcc.dg/vect/slp-9.c: Disable einline.
[official-gcc.git] / gcc / modulo-sched.c
blob8abb8ee75534f8259abec8aba9b8be4c1bee0657
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "hard-reg-set.h"
30 #include "regs.h"
31 #include "hashtab.h"
32 #include "hash-set.h"
33 #include "vec.h"
34 #include "machmode.h"
35 #include "input.h"
36 #include "function.h"
37 #include "profile.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "insn-attr.h"
41 #include "except.h"
42 #include "recog.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfgrtl.h"
46 #include "predict.h"
47 #include "basic-block.h"
48 #include "sched-int.h"
49 #include "target.h"
50 #include "cfgloop.h"
51 #include "tree-core.h"
52 #include "insn-codes.h"
53 #include "optabs.h"
54 #include "expr.h"
55 #include "params.h"
56 #include "gcov-io.h"
57 #include "sbitmap.h"
58 #include "df.h"
59 #include "ddg.h"
60 #include "tree-pass.h"
61 #include "dbgcnt.h"
62 #include "loop-unroll.h"
64 #ifdef INSN_SCHEDULING
66 /* This file contains the implementation of the Swing Modulo Scheduler,
67 described in the following references:
68 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
69 Lifetime--sensitive modulo scheduling in a production environment.
70 IEEE Trans. on Comps., 50(3), March 2001
71 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
72 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
73 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
75 The basic structure is:
76 1. Build a data-dependence graph (DDG) for each loop.
77 2. Use the DDG to order the insns of a loop (not in topological order
78 necessarily, but rather) trying to place each insn after all its
79 predecessors _or_ after all its successors.
80 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
81 4. Use the ordering to perform list-scheduling of the loop:
82 1. Set II = MII. We will try to schedule the loop within II cycles.
83 2. Try to schedule the insns one by one according to the ordering.
84 For each insn compute an interval of cycles by considering already-
85 scheduled preds and succs (and associated latencies); try to place
86 the insn in the cycles of this window checking for potential
87 resource conflicts (using the DFA interface).
88 Note: this is different from the cycle-scheduling of schedule_insns;
89 here the insns are not scheduled monotonically top-down (nor bottom-
90 up).
91 3. If failed in scheduling all insns - bump II++ and try again, unless
92 II reaches an upper bound MaxII, in which case report failure.
93 5. If we succeeded in scheduling the loop within II cycles, we now
94 generate prolog and epilog, decrease the counter of the loop, and
95 perform modulo variable expansion for live ranges that span more than
96 II cycles (i.e. use register copies to prevent a def from overwriting
97 itself before reaching the use).
99 SMS works with countable loops (1) whose control part can be easily
100 decoupled from the rest of the loop and (2) whose loop count can
101 be easily adjusted. This is because we peel a constant number of
102 iterations into a prologue and epilogue for which we want to avoid
103 emitting the control part, and a kernel which is to iterate that
104 constant number of iterations less than the original loop. So the
105 control part should be a set of insns clearly identified and having
106 its own iv, not otherwise used in the loop (at-least for now), which
107 initializes a register before the loop to the number of iterations.
108 Currently SMS relies on the do-loop pattern to recognize such loops,
109 where (1) the control part comprises of all insns defining and/or
110 using a certain 'count' register and (2) the loop count can be
111 adjusted by modifying this register prior to the loop.
112 TODO: Rely on cfgloop analysis instead. */
114 /* This page defines partial-schedule structures and functions for
115 modulo scheduling. */
117 typedef struct partial_schedule *partial_schedule_ptr;
118 typedef struct ps_insn *ps_insn_ptr;
120 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
121 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
123 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
124 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
126 /* Perform signed modulo, always returning a non-negative value. */
127 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
129 /* The number of different iterations the nodes in ps span, assuming
130 the stage boundaries are placed efficiently. */
131 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
132 + 1 + ii - 1) / ii)
133 /* The stage count of ps. */
134 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
136 /* A single instruction in the partial schedule. */
137 struct ps_insn
139 /* Identifies the instruction to be scheduled. Values smaller than
140 the ddg's num_nodes refer directly to ddg nodes. A value of
141 X - num_nodes refers to register move X. */
142 int id;
144 /* The (absolute) cycle in which the PS instruction is scheduled.
145 Same as SCHED_TIME (node). */
146 int cycle;
148 /* The next/prev PS_INSN in the same row. */
149 ps_insn_ptr next_in_row,
150 prev_in_row;
154 /* Information about a register move that has been added to a partial
155 schedule. */
156 struct ps_reg_move_info
158 /* The source of the move is defined by the ps_insn with id DEF.
159 The destination is used by the ps_insns with the ids in USES. */
160 int def;
161 sbitmap uses;
163 /* The original form of USES' instructions used OLD_REG, but they
164 should now use NEW_REG. */
165 rtx old_reg;
166 rtx new_reg;
168 /* The number of consecutive stages that the move occupies. */
169 int num_consecutive_stages;
171 /* An instruction that sets NEW_REG to the correct value. The first
172 move associated with DEF will have an rhs of OLD_REG; later moves
173 use the result of the previous move. */
174 rtx_insn *insn;
177 typedef struct ps_reg_move_info ps_reg_move_info;
179 /* Holds the partial schedule as an array of II rows. Each entry of the
180 array points to a linked list of PS_INSNs, which represents the
181 instructions that are scheduled for that row. */
182 struct partial_schedule
184 int ii; /* Number of rows in the partial schedule. */
185 int history; /* Threshold for conflict checking using DFA. */
187 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
188 ps_insn_ptr *rows;
190 /* All the moves added for this partial schedule. Index X has
191 a ps_insn id of X + g->num_nodes. */
192 vec<ps_reg_move_info> reg_moves;
194 /* rows_length[i] holds the number of instructions in the row.
195 It is used only (as an optimization) to back off quickly from
196 trying to schedule a node in a full row; that is, to avoid running
197 through futile DFA state transitions. */
198 int *rows_length;
200 /* The earliest absolute cycle of an insn in the partial schedule. */
201 int min_cycle;
203 /* The latest absolute cycle of an insn in the partial schedule. */
204 int max_cycle;
206 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
208 int stage_count; /* The stage count of the partial schedule. */
212 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
213 static void free_partial_schedule (partial_schedule_ptr);
214 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
215 void print_partial_schedule (partial_schedule_ptr, FILE *);
216 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
217 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
218 int, int, sbitmap, sbitmap);
219 static void rotate_partial_schedule (partial_schedule_ptr, int);
220 void set_row_column_for_ps (partial_schedule_ptr);
221 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
222 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
225 /* This page defines constants and structures for the modulo scheduling
226 driver. */
228 static int sms_order_nodes (ddg_ptr, int, int *, int *);
229 static void set_node_sched_params (ddg_ptr);
230 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
231 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
232 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
233 rtx, rtx);
234 static int calculate_stage_count (partial_schedule_ptr, int);
235 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
236 int, int, sbitmap, sbitmap, sbitmap);
237 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
238 sbitmap, int, int *, int *, int *);
239 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
240 sbitmap, int *, sbitmap, sbitmap);
241 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
243 #define NODE_ASAP(node) ((node)->aux.count)
245 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
246 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
247 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
248 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
249 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
251 /* The scheduling parameters held for each node. */
252 typedef struct node_sched_params
254 int time; /* The absolute scheduling cycle. */
256 int row; /* Holds time % ii. */
257 int stage; /* Holds time / ii. */
259 /* The column of a node inside the ps. If nodes u, v are on the same row,
260 u will precede v if column (u) < column (v). */
261 int column;
262 } *node_sched_params_ptr;
264 typedef struct node_sched_params node_sched_params;
266 /* The following three functions are copied from the current scheduler
267 code in order to use sched_analyze() for computing the dependencies.
268 They are used when initializing the sched_info structure. */
269 static const char *
270 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
272 static char tmp[80];
274 sprintf (tmp, "i%4d", INSN_UID (insn));
275 return tmp;
278 static void
279 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
280 regset used ATTRIBUTE_UNUSED)
284 static struct common_sched_info_def sms_common_sched_info;
286 static struct sched_deps_info_def sms_sched_deps_info =
288 compute_jump_reg_dependencies,
289 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
290 NULL,
291 0, 0, 0
294 static struct haifa_sched_info sms_sched_info =
296 NULL,
297 NULL,
298 NULL,
299 NULL,
300 NULL,
301 sms_print_insn,
302 NULL,
303 NULL, /* insn_finishes_block_p */
304 NULL, NULL,
305 NULL, NULL,
306 0, 0,
308 NULL, NULL, NULL, NULL,
309 NULL, NULL,
313 /* Partial schedule instruction ID in PS is a register move. Return
314 information about it. */
315 static struct ps_reg_move_info *
316 ps_reg_move (partial_schedule_ptr ps, int id)
318 gcc_checking_assert (id >= ps->g->num_nodes);
319 return &ps->reg_moves[id - ps->g->num_nodes];
322 /* Return the rtl instruction that is being scheduled by partial schedule
323 instruction ID, which belongs to schedule PS. */
324 static rtx_insn *
325 ps_rtl_insn (partial_schedule_ptr ps, int id)
327 if (id < ps->g->num_nodes)
328 return ps->g->nodes[id].insn;
329 else
330 return ps_reg_move (ps, id)->insn;
333 /* Partial schedule instruction ID, which belongs to PS, occurred in
334 the original (unscheduled) loop. Return the first instruction
335 in the loop that was associated with ps_rtl_insn (PS, ID).
336 If the instruction had some notes before it, this is the first
337 of those notes. */
338 static rtx_insn *
339 ps_first_note (partial_schedule_ptr ps, int id)
341 gcc_assert (id < ps->g->num_nodes);
342 return ps->g->nodes[id].first_note;
345 /* Return the number of consecutive stages that are occupied by
346 partial schedule instruction ID in PS. */
347 static int
348 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
350 if (id < ps->g->num_nodes)
351 return 1;
352 else
353 return ps_reg_move (ps, id)->num_consecutive_stages;
356 /* Given HEAD and TAIL which are the first and last insns in a loop;
357 return the register which controls the loop. Return zero if it has
358 more than one occurrence in the loop besides the control part or the
359 do-loop pattern is not of the form we expect. */
360 static rtx
361 doloop_register_get (rtx_insn *head ATTRIBUTE_UNUSED, rtx_insn *tail ATTRIBUTE_UNUSED)
363 #ifdef HAVE_doloop_end
364 rtx reg, condition;
365 rtx_insn *insn, *first_insn_not_to_check;
367 if (!JUMP_P (tail))
368 return NULL_RTX;
370 /* TODO: Free SMS's dependence on doloop_condition_get. */
371 condition = doloop_condition_get (tail);
372 if (! condition)
373 return NULL_RTX;
375 if (REG_P (XEXP (condition, 0)))
376 reg = XEXP (condition, 0);
377 else if (GET_CODE (XEXP (condition, 0)) == PLUS
378 && REG_P (XEXP (XEXP (condition, 0), 0)))
379 reg = XEXP (XEXP (condition, 0), 0);
380 else
381 gcc_unreachable ();
383 /* Check that the COUNT_REG has no other occurrences in the loop
384 until the decrement. We assume the control part consists of
385 either a single (parallel) branch-on-count or a (non-parallel)
386 branch immediately preceded by a single (decrement) insn. */
387 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
388 : prev_nondebug_insn (tail));
390 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
391 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
393 if (dump_file)
395 fprintf (dump_file, "SMS count_reg found ");
396 print_rtl_single (dump_file, reg);
397 fprintf (dump_file, " outside control in insn:\n");
398 print_rtl_single (dump_file, insn);
401 return NULL_RTX;
404 return reg;
405 #else
406 return NULL_RTX;
407 #endif
410 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
411 that the number of iterations is a compile-time constant. If so,
412 return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
413 this constant. Otherwise return 0. */
414 static rtx_insn *
415 const_iteration_count (rtx count_reg, basic_block pre_header,
416 int64_t * count)
418 rtx_insn *insn;
419 rtx_insn *head, *tail;
421 if (! pre_header)
422 return NULL;
424 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
426 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
427 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
428 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
430 rtx pat = single_set (insn);
432 if (CONST_INT_P (SET_SRC (pat)))
434 *count = INTVAL (SET_SRC (pat));
435 return insn;
438 return NULL;
441 return NULL;
444 /* A very simple resource-based lower bound on the initiation interval.
445 ??? Improve the accuracy of this bound by considering the
446 utilization of various units. */
447 static int
448 res_MII (ddg_ptr g)
450 if (targetm.sched.sms_res_mii)
451 return targetm.sched.sms_res_mii (g);
453 return ((g->num_nodes - g->num_debug) / issue_rate);
457 /* A vector that contains the sched data for each ps_insn. */
458 static vec<node_sched_params> node_sched_param_vec;
460 /* Allocate sched_params for each node and initialize it. */
461 static void
462 set_node_sched_params (ddg_ptr g)
464 node_sched_param_vec.truncate (0);
465 node_sched_param_vec.safe_grow_cleared (g->num_nodes);
468 /* Make sure that node_sched_param_vec has an entry for every move in PS. */
469 static void
470 extend_node_sched_params (partial_schedule_ptr ps)
472 node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
473 + ps->reg_moves.length ());
476 /* Update the sched_params (time, row and stage) for node U using the II,
477 the CYCLE of U and MIN_CYCLE.
478 We're not simply taking the following
479 SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
480 because the stages may not be aligned on cycle 0. */
481 static void
482 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
484 int sc_until_cycle_zero;
485 int stage;
487 SCHED_TIME (u) = cycle;
488 SCHED_ROW (u) = SMODULO (cycle, ii);
490 /* The calculation of stage count is done adding the number
491 of stages before cycle zero and after cycle zero. */
492 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
494 if (SCHED_TIME (u) < 0)
496 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
497 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
499 else
501 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
502 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
506 static void
507 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
509 int i;
511 if (! file)
512 return;
513 for (i = 0; i < num_nodes; i++)
515 node_sched_params_ptr nsp = SCHED_PARAMS (i);
517 fprintf (file, "Node = %d; INSN = %d\n", i,
518 INSN_UID (ps_rtl_insn (ps, i)));
519 fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
520 fprintf (file, " time = %d:\n", nsp->time);
521 fprintf (file, " stage = %d:\n", nsp->stage);
525 /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
526 static void
527 set_columns_for_row (partial_schedule_ptr ps, int row)
529 ps_insn_ptr cur_insn;
530 int column;
532 column = 0;
533 for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
534 SCHED_COLUMN (cur_insn->id) = column++;
537 /* Set SCHED_COLUMN for each instruction in PS. */
538 static void
539 set_columns_for_ps (partial_schedule_ptr ps)
541 int row;
543 for (row = 0; row < ps->ii; row++)
544 set_columns_for_row (ps, row);
547 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
548 Its single predecessor has already been scheduled, as has its
549 ddg node successors. (The move may have also another move as its
550 successor, in which case that successor will be scheduled later.)
552 The move is part of a chain that satisfies register dependencies
553 between a producing ddg node and various consuming ddg nodes.
554 If some of these dependencies have a distance of 1 (meaning that
555 the use is upward-exposed) then DISTANCE1_USES is nonnull and
556 contains the set of uses with distance-1 dependencies.
557 DISTANCE1_USES is null otherwise.
559 MUST_FOLLOW is a scratch bitmap that is big enough to hold
560 all current ps_insn ids.
562 Return true on success. */
563 static bool
564 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
565 sbitmap distance1_uses, sbitmap must_follow)
567 unsigned int u;
568 int this_time, this_distance, this_start, this_end, this_latency;
569 int start, end, c, ii;
570 sbitmap_iterator sbi;
571 ps_reg_move_info *move;
572 rtx_insn *this_insn;
573 ps_insn_ptr psi;
575 move = ps_reg_move (ps, i_reg_move);
576 ii = ps->ii;
577 if (dump_file)
579 fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
580 ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
581 PS_MIN_CYCLE (ps));
582 print_rtl_single (dump_file, move->insn);
583 fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
584 fprintf (dump_file, "=========== =========== =====\n");
587 start = INT_MIN;
588 end = INT_MAX;
590 /* For dependencies of distance 1 between a producer ddg node A
591 and consumer ddg node B, we have a chain of dependencies:
593 A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
595 where Mi is the ith move. For dependencies of distance 0 between
596 a producer ddg node A and consumer ddg node C, we have a chain of
597 dependencies:
599 A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
601 where Mi' occupies the same position as Mi but occurs a stage later.
602 We can only schedule each move once, so if we have both types of
603 chain, we model the second as:
605 A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
607 First handle the dependencies between the previously-scheduled
608 predecessor and the move. */
609 this_insn = ps_rtl_insn (ps, move->def);
610 this_latency = insn_latency (this_insn, move->insn);
611 this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
612 this_time = SCHED_TIME (move->def) - this_distance * ii;
613 this_start = this_time + this_latency;
614 this_end = this_time + ii;
615 if (dump_file)
616 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
617 this_start, this_end, SCHED_TIME (move->def),
618 INSN_UID (this_insn), this_latency, this_distance,
619 INSN_UID (move->insn));
621 if (start < this_start)
622 start = this_start;
623 if (end > this_end)
624 end = this_end;
626 /* Handle the dependencies between the move and previously-scheduled
627 successors. */
628 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
630 this_insn = ps_rtl_insn (ps, u);
631 this_latency = insn_latency (move->insn, this_insn);
632 if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
633 this_distance = -1;
634 else
635 this_distance = 0;
636 this_time = SCHED_TIME (u) + this_distance * ii;
637 this_start = this_time - ii;
638 this_end = this_time - this_latency;
639 if (dump_file)
640 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
641 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
642 this_latency, this_distance, INSN_UID (this_insn));
644 if (start < this_start)
645 start = this_start;
646 if (end > this_end)
647 end = this_end;
650 if (dump_file)
652 fprintf (dump_file, "----------- ----------- -----\n");
653 fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
656 bitmap_clear (must_follow);
657 bitmap_set_bit (must_follow, move->def);
659 start = MAX (start, end - (ii - 1));
660 for (c = end; c >= start; c--)
662 psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
663 move->uses, must_follow);
664 if (psi)
666 update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
667 if (dump_file)
668 fprintf (dump_file, "\nScheduled register move INSN %d at"
669 " time %d, row %d\n\n", INSN_UID (move->insn), c,
670 SCHED_ROW (i_reg_move));
671 return true;
675 if (dump_file)
676 fprintf (dump_file, "\nNo available slot\n\n");
678 return false;
682 Breaking intra-loop register anti-dependences:
683 Each intra-loop register anti-dependence implies a cross-iteration true
684 dependence of distance 1. Therefore, we can remove such false dependencies
685 and figure out if the partial schedule broke them by checking if (for a
686 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
687 if so generate a register move. The number of such moves is equal to:
688 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
689 nreg_moves = ----------------------------------- + 1 - { dependence.
690 ii { 1 if not.
692 static bool
693 schedule_reg_moves (partial_schedule_ptr ps)
695 ddg_ptr g = ps->g;
696 int ii = ps->ii;
697 int i;
699 for (i = 0; i < g->num_nodes; i++)
701 ddg_node_ptr u = &g->nodes[i];
702 ddg_edge_ptr e;
703 int nreg_moves = 0, i_reg_move;
704 rtx prev_reg, old_reg;
705 int first_move;
706 int distances[2];
707 sbitmap must_follow;
708 sbitmap distance1_uses;
709 rtx set = single_set (u->insn);
711 /* Skip instructions that do not set a register. */
712 if ((set && !REG_P (SET_DEST (set))))
713 continue;
715 /* Compute the number of reg_moves needed for u, by looking at life
716 ranges started at u (excluding self-loops). */
717 distances[0] = distances[1] = false;
718 for (e = u->out; e; e = e->next_out)
719 if (e->type == TRUE_DEP && e->dest != e->src)
721 int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
722 - SCHED_TIME (e->src->cuid)) / ii;
724 if (e->distance == 1)
725 nreg_moves4e = (SCHED_TIME (e->dest->cuid)
726 - SCHED_TIME (e->src->cuid) + ii) / ii;
728 /* If dest precedes src in the schedule of the kernel, then dest
729 will read before src writes and we can save one reg_copy. */
730 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
731 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
732 nreg_moves4e--;
734 if (nreg_moves4e >= 1)
736 /* !single_set instructions are not supported yet and
737 thus we do not except to encounter them in the loop
738 except from the doloop part. For the latter case
739 we assume no regmoves are generated as the doloop
740 instructions are tied to the branch with an edge. */
741 gcc_assert (set);
742 /* If the instruction contains auto-inc register then
743 validate that the regmov is being generated for the
744 target regsiter rather then the inc'ed register. */
745 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
748 if (nreg_moves4e)
750 gcc_assert (e->distance < 2);
751 distances[e->distance] = true;
753 nreg_moves = MAX (nreg_moves, nreg_moves4e);
756 if (nreg_moves == 0)
757 continue;
759 /* Create NREG_MOVES register moves. */
760 first_move = ps->reg_moves.length ();
761 ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
762 extend_node_sched_params (ps);
764 /* Record the moves associated with this node. */
765 first_move += ps->g->num_nodes;
767 /* Generate each move. */
768 old_reg = prev_reg = SET_DEST (single_set (u->insn));
769 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
771 ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
773 move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
774 move->uses = sbitmap_alloc (first_move + nreg_moves);
775 move->old_reg = old_reg;
776 move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
777 move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
778 move->insn = as_a <rtx_insn *> (gen_move_insn (move->new_reg,
779 copy_rtx (prev_reg)));
780 bitmap_clear (move->uses);
782 prev_reg = move->new_reg;
785 distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
787 if (distance1_uses)
788 bitmap_clear (distance1_uses);
790 /* Every use of the register defined by node may require a different
791 copy of this register, depending on the time the use is scheduled.
792 Record which uses require which move results. */
793 for (e = u->out; e; e = e->next_out)
794 if (e->type == TRUE_DEP && e->dest != e->src)
796 int dest_copy = (SCHED_TIME (e->dest->cuid)
797 - SCHED_TIME (e->src->cuid)) / ii;
799 if (e->distance == 1)
800 dest_copy = (SCHED_TIME (e->dest->cuid)
801 - SCHED_TIME (e->src->cuid) + ii) / ii;
803 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
804 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
805 dest_copy--;
807 if (dest_copy)
809 ps_reg_move_info *move;
811 move = ps_reg_move (ps, first_move + dest_copy - 1);
812 bitmap_set_bit (move->uses, e->dest->cuid);
813 if (e->distance == 1)
814 bitmap_set_bit (distance1_uses, e->dest->cuid);
818 must_follow = sbitmap_alloc (first_move + nreg_moves);
819 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
820 if (!schedule_reg_move (ps, first_move + i_reg_move,
821 distance1_uses, must_follow))
822 break;
823 sbitmap_free (must_follow);
824 if (distance1_uses)
825 sbitmap_free (distance1_uses);
826 if (i_reg_move < nreg_moves)
827 return false;
829 return true;
832 /* Emit the moves associatied with PS. Apply the substitutions
833 associated with them. */
834 static void
835 apply_reg_moves (partial_schedule_ptr ps)
837 ps_reg_move_info *move;
838 int i;
840 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
842 unsigned int i_use;
843 sbitmap_iterator sbi;
845 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
847 replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
848 df_insn_rescan (ps->g->nodes[i_use].insn);
853 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
854 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
855 will move to cycle zero. */
856 static void
857 reset_sched_times (partial_schedule_ptr ps, int amount)
859 int row;
860 int ii = ps->ii;
861 ps_insn_ptr crr_insn;
863 for (row = 0; row < ii; row++)
864 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
866 int u = crr_insn->id;
867 int normalized_time = SCHED_TIME (u) - amount;
868 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
870 if (dump_file)
872 /* Print the scheduling times after the rotation. */
873 rtx_insn *insn = ps_rtl_insn (ps, u);
875 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
876 "crr_insn->cycle=%d, min_cycle=%d", u,
877 INSN_UID (insn), normalized_time, new_min_cycle);
878 if (JUMP_P (insn))
879 fprintf (dump_file, " (branch)");
880 fprintf (dump_file, "\n");
883 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
884 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
886 crr_insn->cycle = normalized_time;
887 update_node_sched_params (u, ii, normalized_time, new_min_cycle);
891 /* Permute the insns according to their order in PS, from row 0 to
892 row ii-1, and position them right before LAST. This schedules
893 the insns of the loop kernel. */
894 static void
895 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
897 int ii = ps->ii;
898 int row;
899 ps_insn_ptr ps_ij;
901 for (row = 0; row < ii ; row++)
902 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
904 rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
906 if (PREV_INSN (last) != insn)
908 if (ps_ij->id < ps->g->num_nodes)
909 reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
910 PREV_INSN (last));
911 else
912 add_insn_before (insn, last, NULL);
917 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
918 respectively only if cycle C falls on the border of the scheduling
919 window boundaries marked by START and END cycles. STEP is the
920 direction of the window. */
921 static inline void
922 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
923 sbitmap *tmp_precede, sbitmap must_precede, int c,
924 int start, int end, int step)
926 *tmp_precede = NULL;
927 *tmp_follow = NULL;
929 if (c == start)
931 if (step == 1)
932 *tmp_precede = must_precede;
933 else /* step == -1. */
934 *tmp_follow = must_follow;
936 if (c == end - step)
938 if (step == 1)
939 *tmp_follow = must_follow;
940 else /* step == -1. */
941 *tmp_precede = must_precede;
946 /* Return True if the branch can be moved to row ii-1 while
947 normalizing the partial schedule PS to start from cycle zero and thus
948 optimize the SC. Otherwise return False. */
949 static bool
950 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
952 int amount = PS_MIN_CYCLE (ps);
953 sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
954 int start, end, step;
955 int ii = ps->ii;
956 bool ok = false;
957 int stage_count, stage_count_curr;
959 /* Compare the SC after normalization and SC after bringing the branch
960 to row ii-1. If they are equal just bail out. */
961 stage_count = calculate_stage_count (ps, amount);
962 stage_count_curr =
963 calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
965 if (stage_count == stage_count_curr)
967 if (dump_file)
968 fprintf (dump_file, "SMS SC already optimized.\n");
970 ok = false;
971 goto clear;
974 if (dump_file)
976 fprintf (dump_file, "SMS Trying to optimize branch location\n");
977 fprintf (dump_file, "SMS partial schedule before trial:\n");
978 print_partial_schedule (ps, dump_file);
981 /* First, normalize the partial scheduling. */
982 reset_sched_times (ps, amount);
983 rotate_partial_schedule (ps, amount);
984 if (dump_file)
986 fprintf (dump_file,
987 "SMS partial schedule after normalization (ii, %d, SC %d):\n",
988 ii, stage_count);
989 print_partial_schedule (ps, dump_file);
992 if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
994 ok = true;
995 goto clear;
998 bitmap_ones (sched_nodes);
1000 /* Calculate the new placement of the branch. It should be in row
1001 ii-1 and fall into it's scheduling window. */
1002 if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
1003 &step, &end) == 0)
1005 bool success;
1006 ps_insn_ptr next_ps_i;
1007 int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
1008 int row = SMODULO (branch_cycle, ps->ii);
1009 int num_splits = 0;
1010 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1011 int c;
1013 if (dump_file)
1014 fprintf (dump_file, "\nTrying to schedule node %d "
1015 "INSN = %d in (%d .. %d) step %d\n",
1016 g->closing_branch->cuid,
1017 (INSN_UID (g->closing_branch->insn)), start, end, step);
1019 gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1020 if (step == 1)
1022 c = start + ii - SMODULO (start, ii) - 1;
1023 gcc_assert (c >= start);
1024 if (c >= end)
1026 ok = false;
1027 if (dump_file)
1028 fprintf (dump_file,
1029 "SMS failed to schedule branch at cycle: %d\n", c);
1030 goto clear;
1033 else
1035 c = start - SMODULO (start, ii) - 1;
1036 gcc_assert (c <= start);
1038 if (c <= end)
1040 if (dump_file)
1041 fprintf (dump_file,
1042 "SMS failed to schedule branch at cycle: %d\n", c);
1043 ok = false;
1044 goto clear;
1048 must_precede = sbitmap_alloc (g->num_nodes);
1049 must_follow = sbitmap_alloc (g->num_nodes);
1051 /* Try to schedule the branch is it's new cycle. */
1052 calculate_must_precede_follow (g->closing_branch, start, end,
1053 step, ii, sched_nodes,
1054 must_precede, must_follow);
1056 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1057 must_precede, c, start, end, step);
1059 /* Find the element in the partial schedule related to the closing
1060 branch so we can remove it from it's current cycle. */
1061 for (next_ps_i = ps->rows[row];
1062 next_ps_i; next_ps_i = next_ps_i->next_in_row)
1063 if (next_ps_i->id == g->closing_branch->cuid)
1064 break;
1066 remove_node_from_ps (ps, next_ps_i);
1067 success =
1068 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1069 sched_nodes, &num_splits,
1070 tmp_precede, tmp_follow);
1071 gcc_assert (num_splits == 0);
1072 if (!success)
1074 if (dump_file)
1075 fprintf (dump_file,
1076 "SMS failed to schedule branch at cycle: %d, "
1077 "bringing it back to cycle %d\n", c, branch_cycle);
1079 /* The branch was failed to be placed in row ii - 1.
1080 Put it back in it's original place in the partial
1081 schedualing. */
1082 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1083 must_precede, branch_cycle, start, end,
1084 step);
1085 success =
1086 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1087 branch_cycle, sched_nodes,
1088 &num_splits, tmp_precede,
1089 tmp_follow);
1090 gcc_assert (success && (num_splits == 0));
1091 ok = false;
1093 else
1095 /* The branch is placed in row ii - 1. */
1096 if (dump_file)
1097 fprintf (dump_file,
1098 "SMS success in moving branch to cycle %d\n", c);
1100 update_node_sched_params (g->closing_branch->cuid, ii, c,
1101 PS_MIN_CYCLE (ps));
1102 ok = true;
1105 free (must_precede);
1106 free (must_follow);
1109 clear:
1110 free (sched_nodes);
1111 return ok;
1114 static void
1115 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1116 int to_stage, rtx count_reg)
1118 int row;
1119 ps_insn_ptr ps_ij;
1121 for (row = 0; row < ps->ii; row++)
1122 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1124 int u = ps_ij->id;
1125 int first_u, last_u;
1126 rtx_insn *u_insn;
1128 /* Do not duplicate any insn which refers to count_reg as it
1129 belongs to the control part.
1130 The closing branch is scheduled as well and thus should
1131 be ignored.
1132 TODO: This should be done by analyzing the control part of
1133 the loop. */
1134 u_insn = ps_rtl_insn (ps, u);
1135 if (reg_mentioned_p (count_reg, u_insn)
1136 || JUMP_P (u_insn))
1137 continue;
1139 first_u = SCHED_STAGE (u);
1140 last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1141 if (from_stage <= last_u && to_stage >= first_u)
1143 if (u < ps->g->num_nodes)
1144 duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1145 else
1146 emit_insn (copy_rtx (PATTERN (u_insn)));
1152 /* Generate the instructions (including reg_moves) for prolog & epilog. */
1153 static void
1154 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1155 rtx count_reg, rtx count_init)
1157 int i;
1158 int last_stage = PS_STAGE_COUNT (ps) - 1;
1159 edge e;
1161 /* Generate the prolog, inserting its insns on the loop-entry edge. */
1162 start_sequence ();
1164 if (!count_init)
1166 /* Generate instructions at the beginning of the prolog to
1167 adjust the loop count by STAGE_COUNT. If loop count is constant
1168 (count_init), this constant is adjusted by STAGE_COUNT in
1169 generate_prolog_epilog function. */
1170 rtx sub_reg = NULL_RTX;
1172 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1173 gen_int_mode (last_stage,
1174 GET_MODE (count_reg)),
1175 count_reg, 1, OPTAB_DIRECT);
1176 gcc_assert (REG_P (sub_reg));
1177 if (REGNO (sub_reg) != REGNO (count_reg))
1178 emit_move_insn (count_reg, sub_reg);
1181 for (i = 0; i < last_stage; i++)
1182 duplicate_insns_of_cycles (ps, 0, i, count_reg);
1184 /* Put the prolog on the entry edge. */
1185 e = loop_preheader_edge (loop);
1186 split_edge_and_insert (e, get_insns ());
1187 if (!flag_resched_modulo_sched)
1188 e->dest->flags |= BB_DISABLE_SCHEDULE;
1190 end_sequence ();
1192 /* Generate the epilog, inserting its insns on the loop-exit edge. */
1193 start_sequence ();
1195 for (i = 0; i < last_stage; i++)
1196 duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1198 /* Put the epilogue on the exit edge. */
1199 gcc_assert (single_exit (loop));
1200 e = single_exit (loop);
1201 split_edge_and_insert (e, get_insns ());
1202 if (!flag_resched_modulo_sched)
1203 e->dest->flags |= BB_DISABLE_SCHEDULE;
1205 end_sequence ();
1208 /* Mark LOOP as software pipelined so the later
1209 scheduling passes don't touch it. */
1210 static void
1211 mark_loop_unsched (struct loop *loop)
1213 unsigned i;
1214 basic_block *bbs = get_loop_body (loop);
1216 for (i = 0; i < loop->num_nodes; i++)
1217 bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1219 free (bbs);
1222 /* Return true if all the BBs of the loop are empty except the
1223 loop header. */
1224 static bool
1225 loop_single_full_bb_p (struct loop *loop)
1227 unsigned i;
1228 basic_block *bbs = get_loop_body (loop);
1230 for (i = 0; i < loop->num_nodes ; i++)
1232 rtx_insn *head, *tail;
1233 bool empty_bb = true;
1235 if (bbs[i] == loop->header)
1236 continue;
1238 /* Make sure that basic blocks other than the header
1239 have only notes labels or jumps. */
1240 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1241 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1243 if (NOTE_P (head) || LABEL_P (head)
1244 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1245 continue;
1246 empty_bb = false;
1247 break;
1250 if (! empty_bb)
1252 free (bbs);
1253 return false;
1256 free (bbs);
1257 return true;
1260 /* Dump file:line from INSN's location info to dump_file. */
1262 static void
1263 dump_insn_location (rtx_insn *insn)
1265 if (dump_file && INSN_HAS_LOCATION (insn))
1267 expanded_location xloc = insn_location (insn);
1268 fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1272 /* A simple loop from SMS point of view; it is a loop that is composed of
1273 either a single basic block or two BBs - a header and a latch. */
1274 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1275 && (EDGE_COUNT (loop->latch->preds) == 1) \
1276 && (EDGE_COUNT (loop->latch->succs) == 1))
1278 /* Return true if the loop is in its canonical form and false if not.
1279 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1280 static bool
1281 loop_canon_p (struct loop *loop)
1284 if (loop->inner || !loop_outer (loop))
1286 if (dump_file)
1287 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1288 return false;
1291 if (!single_exit (loop))
1293 if (dump_file)
1295 rtx_insn *insn = BB_END (loop->header);
1297 fprintf (dump_file, "SMS loop many exits");
1298 dump_insn_location (insn);
1299 fprintf (dump_file, "\n");
1301 return false;
1304 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1306 if (dump_file)
1308 rtx_insn *insn = BB_END (loop->header);
1310 fprintf (dump_file, "SMS loop many BBs.");
1311 dump_insn_location (insn);
1312 fprintf (dump_file, "\n");
1314 return false;
1317 return true;
1320 /* If there are more than one entry for the loop,
1321 make it one by splitting the first entry edge and
1322 redirecting the others to the new BB. */
1323 static void
1324 canon_loop (struct loop *loop)
1326 edge e;
1327 edge_iterator i;
1329 /* Avoid annoying special cases of edges going to exit
1330 block. */
1331 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1332 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1333 split_edge (e);
1335 if (loop->latch == loop->header
1336 || EDGE_COUNT (loop->latch->succs) > 1)
1338 FOR_EACH_EDGE (e, i, loop->header->preds)
1339 if (e->src == loop->latch)
1340 break;
1341 split_edge (e);
1345 /* Setup infos. */
1346 static void
1347 setup_sched_infos (void)
1349 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1350 sizeof (sms_common_sched_info));
1351 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1352 common_sched_info = &sms_common_sched_info;
1354 sched_deps_info = &sms_sched_deps_info;
1355 current_sched_info = &sms_sched_info;
1358 /* Probability in % that the sms-ed loop rolls enough so that optimized
1359 version may be entered. Just a guess. */
1360 #define PROB_SMS_ENOUGH_ITERATIONS 80
1362 /* Used to calculate the upper bound of ii. */
1363 #define MAXII_FACTOR 2
1365 /* Main entry point, perform SMS scheduling on the loops of the function
1366 that consist of single basic blocks. */
1367 static void
1368 sms_schedule (void)
1370 rtx_insn *insn;
1371 ddg_ptr *g_arr, g;
1372 int * node_order;
1373 int maxii, max_asap;
1374 partial_schedule_ptr ps;
1375 basic_block bb = NULL;
1376 struct loop *loop;
1377 basic_block condition_bb = NULL;
1378 edge latch_edge;
1379 gcov_type trip_count = 0;
1381 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1382 | LOOPS_HAVE_RECORDED_EXITS);
1383 if (number_of_loops (cfun) <= 1)
1385 loop_optimizer_finalize ();
1386 return; /* There are no loops to schedule. */
1389 /* Initialize issue_rate. */
1390 if (targetm.sched.issue_rate)
1392 int temp = reload_completed;
1394 reload_completed = 1;
1395 issue_rate = targetm.sched.issue_rate ();
1396 reload_completed = temp;
1398 else
1399 issue_rate = 1;
1401 /* Initialize the scheduler. */
1402 setup_sched_infos ();
1403 haifa_sched_init ();
1405 /* Allocate memory to hold the DDG array one entry for each loop.
1406 We use loop->num as index into this array. */
1407 g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1409 if (dump_file)
1411 fprintf (dump_file, "\n\nSMS analysis phase\n");
1412 fprintf (dump_file, "===================\n\n");
1415 /* Build DDGs for all the relevant loops and hold them in G_ARR
1416 indexed by the loop index. */
1417 FOR_EACH_LOOP (loop, 0)
1419 rtx_insn *head, *tail;
1420 rtx count_reg;
1422 /* For debugging. */
1423 if (dbg_cnt (sms_sched_loop) == false)
1425 if (dump_file)
1426 fprintf (dump_file, "SMS reached max limit... \n");
1428 break;
1431 if (dump_file)
1433 rtx_insn *insn = BB_END (loop->header);
1435 fprintf (dump_file, "SMS loop num: %d", loop->num);
1436 dump_insn_location (insn);
1437 fprintf (dump_file, "\n");
1440 if (! loop_canon_p (loop))
1441 continue;
1443 if (! loop_single_full_bb_p (loop))
1445 if (dump_file)
1446 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1447 continue;
1450 bb = loop->header;
1452 get_ebb_head_tail (bb, bb, &head, &tail);
1453 latch_edge = loop_latch_edge (loop);
1454 gcc_assert (single_exit (loop));
1455 if (single_exit (loop)->count)
1456 trip_count = latch_edge->count / single_exit (loop)->count;
1458 /* Perform SMS only on loops that their average count is above threshold. */
1460 if ( latch_edge->count
1461 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1463 if (dump_file)
1465 dump_insn_location (tail);
1466 fprintf (dump_file, "\nSMS single-bb-loop\n");
1467 if (profile_info && flag_branch_probabilities)
1469 fprintf (dump_file, "SMS loop-count ");
1470 fprintf (dump_file, "%"PRId64,
1471 (int64_t) bb->count);
1472 fprintf (dump_file, "\n");
1473 fprintf (dump_file, "SMS trip-count ");
1474 fprintf (dump_file, "%"PRId64,
1475 (int64_t) trip_count);
1476 fprintf (dump_file, "\n");
1477 fprintf (dump_file, "SMS profile-sum-max ");
1478 fprintf (dump_file, "%"PRId64,
1479 (int64_t) profile_info->sum_max);
1480 fprintf (dump_file, "\n");
1483 continue;
1486 /* Make sure this is a doloop. */
1487 if ( !(count_reg = doloop_register_get (head, tail)))
1489 if (dump_file)
1490 fprintf (dump_file, "SMS doloop_register_get failed\n");
1491 continue;
1494 /* Don't handle BBs with calls or barriers
1495 or !single_set with the exception of instructions that include
1496 count_reg---these instructions are part of the control part
1497 that do-loop recognizes.
1498 ??? Should handle insns defining subregs. */
1499 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1501 rtx set;
1503 if (CALL_P (insn)
1504 || BARRIER_P (insn)
1505 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1506 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1507 && !reg_mentioned_p (count_reg, insn))
1508 || (INSN_P (insn) && (set = single_set (insn))
1509 && GET_CODE (SET_DEST (set)) == SUBREG))
1510 break;
1513 if (insn != NEXT_INSN (tail))
1515 if (dump_file)
1517 if (CALL_P (insn))
1518 fprintf (dump_file, "SMS loop-with-call\n");
1519 else if (BARRIER_P (insn))
1520 fprintf (dump_file, "SMS loop-with-barrier\n");
1521 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1522 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1523 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1524 else
1525 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1526 print_rtl_single (dump_file, insn);
1529 continue;
1532 /* Always schedule the closing branch with the rest of the
1533 instructions. The branch is rotated to be in row ii-1 at the
1534 end of the scheduling procedure to make sure it's the last
1535 instruction in the iteration. */
1536 if (! (g = create_ddg (bb, 1)))
1538 if (dump_file)
1539 fprintf (dump_file, "SMS create_ddg failed\n");
1540 continue;
1543 g_arr[loop->num] = g;
1544 if (dump_file)
1545 fprintf (dump_file, "...OK\n");
1548 if (dump_file)
1550 fprintf (dump_file, "\nSMS transformation phase\n");
1551 fprintf (dump_file, "=========================\n\n");
1554 /* We don't want to perform SMS on new loops - created by versioning. */
1555 FOR_EACH_LOOP (loop, 0)
1557 rtx_insn *head, *tail;
1558 rtx count_reg;
1559 rtx_insn *count_init;
1560 int mii, rec_mii, stage_count, min_cycle;
1561 int64_t loop_count = 0;
1562 bool opt_sc_p;
1564 if (! (g = g_arr[loop->num]))
1565 continue;
1567 if (dump_file)
1569 rtx_insn *insn = BB_END (loop->header);
1571 fprintf (dump_file, "SMS loop num: %d", loop->num);
1572 dump_insn_location (insn);
1573 fprintf (dump_file, "\n");
1575 print_ddg (dump_file, g);
1578 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1580 latch_edge = loop_latch_edge (loop);
1581 gcc_assert (single_exit (loop));
1582 if (single_exit (loop)->count)
1583 trip_count = latch_edge->count / single_exit (loop)->count;
1585 if (dump_file)
1587 dump_insn_location (tail);
1588 fprintf (dump_file, "\nSMS single-bb-loop\n");
1589 if (profile_info && flag_branch_probabilities)
1591 fprintf (dump_file, "SMS loop-count ");
1592 fprintf (dump_file, "%"PRId64,
1593 (int64_t) bb->count);
1594 fprintf (dump_file, "\n");
1595 fprintf (dump_file, "SMS profile-sum-max ");
1596 fprintf (dump_file, "%"PRId64,
1597 (int64_t) profile_info->sum_max);
1598 fprintf (dump_file, "\n");
1600 fprintf (dump_file, "SMS doloop\n");
1601 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1602 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1603 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1607 /* In case of th loop have doloop register it gets special
1608 handling. */
1609 count_init = NULL;
1610 if ((count_reg = doloop_register_get (head, tail)))
1612 basic_block pre_header;
1614 pre_header = loop_preheader_edge (loop)->src;
1615 count_init = const_iteration_count (count_reg, pre_header,
1616 &loop_count);
1618 gcc_assert (count_reg);
1620 if (dump_file && count_init)
1622 fprintf (dump_file, "SMS const-doloop ");
1623 fprintf (dump_file, "%"PRId64,
1624 loop_count);
1625 fprintf (dump_file, "\n");
1628 node_order = XNEWVEC (int, g->num_nodes);
1630 mii = 1; /* Need to pass some estimate of mii. */
1631 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1632 mii = MAX (res_MII (g), rec_mii);
1633 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1635 if (dump_file)
1636 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1637 rec_mii, mii, maxii);
1639 for (;;)
1641 set_node_sched_params (g);
1643 stage_count = 0;
1644 opt_sc_p = false;
1645 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1647 if (ps)
1649 /* Try to achieve optimized SC by normalizing the partial
1650 schedule (having the cycles start from cycle zero).
1651 The branch location must be placed in row ii-1 in the
1652 final scheduling. If failed, shift all instructions to
1653 position the branch in row ii-1. */
1654 opt_sc_p = optimize_sc (ps, g);
1655 if (opt_sc_p)
1656 stage_count = calculate_stage_count (ps, 0);
1657 else
1659 /* Bring the branch to cycle ii-1. */
1660 int amount = (SCHED_TIME (g->closing_branch->cuid)
1661 - (ps->ii - 1));
1663 if (dump_file)
1664 fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1666 stage_count = calculate_stage_count (ps, amount);
1669 gcc_assert (stage_count >= 1);
1672 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1673 1 means that there is no interleaving between iterations thus
1674 we let the scheduling passes do the job in this case. */
1675 if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1676 || (count_init && (loop_count <= stage_count))
1677 || (flag_branch_probabilities && (trip_count <= stage_count)))
1679 if (dump_file)
1681 fprintf (dump_file, "SMS failed... \n");
1682 fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1683 " loop-count=", stage_count);
1684 fprintf (dump_file, "%"PRId64, loop_count);
1685 fprintf (dump_file, ", trip-count=");
1686 fprintf (dump_file, "%"PRId64, trip_count);
1687 fprintf (dump_file, ")\n");
1689 break;
1692 if (!opt_sc_p)
1694 /* Rotate the partial schedule to have the branch in row ii-1. */
1695 int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1697 reset_sched_times (ps, amount);
1698 rotate_partial_schedule (ps, amount);
1701 set_columns_for_ps (ps);
1703 min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1704 if (!schedule_reg_moves (ps))
1706 mii = ps->ii + 1;
1707 free_partial_schedule (ps);
1708 continue;
1711 /* Moves that handle incoming values might have been added
1712 to a new first stage. Bump the stage count if so.
1714 ??? Perhaps we could consider rotating the schedule here
1715 instead? */
1716 if (PS_MIN_CYCLE (ps) < min_cycle)
1718 reset_sched_times (ps, 0);
1719 stage_count++;
1722 /* The stage count should now be correct without rotation. */
1723 gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1724 PS_STAGE_COUNT (ps) = stage_count;
1726 canon_loop (loop);
1728 if (dump_file)
1730 dump_insn_location (tail);
1731 fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1732 ps->ii, stage_count);
1733 print_partial_schedule (ps, dump_file);
1736 /* case the BCT count is not known , Do loop-versioning */
1737 if (count_reg && ! count_init)
1739 rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1740 gen_int_mode (stage_count,
1741 GET_MODE (count_reg)));
1742 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1743 * REG_BR_PROB_BASE) / 100;
1745 loop_version (loop, comp_rtx, &condition_bb,
1746 prob, prob, REG_BR_PROB_BASE - prob,
1747 true);
1750 /* Set new iteration count of loop kernel. */
1751 if (count_reg && count_init)
1752 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1753 - stage_count + 1);
1755 /* Now apply the scheduled kernel to the RTL of the loop. */
1756 permute_partial_schedule (ps, g->closing_branch->first_note);
1758 /* Mark this loop as software pipelined so the later
1759 scheduling passes don't touch it. */
1760 if (! flag_resched_modulo_sched)
1761 mark_loop_unsched (loop);
1763 /* The life-info is not valid any more. */
1764 df_set_bb_dirty (g->bb);
1766 apply_reg_moves (ps);
1767 if (dump_file)
1768 print_node_sched_params (dump_file, g->num_nodes, ps);
1769 /* Generate prolog and epilog. */
1770 generate_prolog_epilog (ps, loop, count_reg, count_init);
1771 break;
1774 free_partial_schedule (ps);
1775 node_sched_param_vec.release ();
1776 free (node_order);
1777 free_ddg (g);
1780 free (g_arr);
1782 /* Release scheduler data, needed until now because of DFA. */
1783 haifa_sched_finish ();
1784 loop_optimizer_finalize ();
1787 /* The SMS scheduling algorithm itself
1788 -----------------------------------
1789 Input: 'O' an ordered list of insns of a loop.
1790 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1792 'Q' is the empty Set
1793 'PS' is the partial schedule; it holds the currently scheduled nodes with
1794 their cycle/slot.
1795 'PSP' previously scheduled predecessors.
1796 'PSS' previously scheduled successors.
1797 't(u)' the cycle where u is scheduled.
1798 'l(u)' is the latency of u.
1799 'd(v,u)' is the dependence distance from v to u.
1800 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1801 the node ordering phase.
1802 'check_hardware_resources_conflicts(u, PS, c)'
1803 run a trace around cycle/slot through DFA model
1804 to check resource conflicts involving instruction u
1805 at cycle c given the partial schedule PS.
1806 'add_to_partial_schedule_at_time(u, PS, c)'
1807 Add the node/instruction u to the partial schedule
1808 PS at time c.
1809 'calculate_register_pressure(PS)'
1810 Given a schedule of instructions, calculate the register
1811 pressure it implies. One implementation could be the
1812 maximum number of overlapping live ranges.
1813 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1814 registers available in the hardware.
1816 1. II = MII.
1817 2. PS = empty list
1818 3. for each node u in O in pre-computed order
1819 4. if (PSP(u) != Q && PSS(u) == Q) then
1820 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1821 6. start = Early_start; end = Early_start + II - 1; step = 1
1822 11. else if (PSP(u) == Q && PSS(u) != Q) then
1823 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1824 13. start = Late_start; end = Late_start - II + 1; step = -1
1825 14. else if (PSP(u) != Q && PSS(u) != Q) then
1826 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1827 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1828 17. start = Early_start;
1829 18. end = min(Early_start + II - 1 , Late_start);
1830 19. step = 1
1831 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1832 21. start = ASAP(u); end = start + II - 1; step = 1
1833 22. endif
1835 23. success = false
1836 24. for (c = start ; c != end ; c += step)
1837 25. if check_hardware_resources_conflicts(u, PS, c) then
1838 26. add_to_partial_schedule_at_time(u, PS, c)
1839 27. success = true
1840 28. break
1841 29. endif
1842 30. endfor
1843 31. if (success == false) then
1844 32. II = II + 1
1845 33. if (II > maxII) then
1846 34. finish - failed to schedule
1847 35. endif
1848 36. goto 2.
1849 37. endif
1850 38. endfor
1851 39. if (calculate_register_pressure(PS) > maxRP) then
1852 40. goto 32.
1853 41. endif
1854 42. compute epilogue & prologue
1855 43. finish - succeeded to schedule
1857 ??? The algorithm restricts the scheduling window to II cycles.
1858 In rare cases, it may be better to allow windows of II+1 cycles.
1859 The window would then start and end on the same row, but with
1860 different "must precede" and "must follow" requirements. */
1862 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1863 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1864 set to 0 to save compile time. */
1865 #define DFA_HISTORY SMS_DFA_HISTORY
1867 /* A threshold for the number of repeated unsuccessful attempts to insert
1868 an empty row, before we flush the partial schedule and start over. */
1869 #define MAX_SPLIT_NUM 10
1870 /* Given the partial schedule PS, this function calculates and returns the
1871 cycles in which we can schedule the node with the given index I.
1872 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1873 noticed that there are several cases in which we fail to SMS the loop
1874 because the sched window of a node is empty due to tight data-deps. In
1875 such cases we want to unschedule some of the predecessors/successors
1876 until we get non-empty scheduling window. It returns -1 if the
1877 scheduling window is empty and zero otherwise. */
1879 static int
1880 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1881 sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1882 int *end_p)
1884 int start, step, end;
1885 int early_start, late_start;
1886 ddg_edge_ptr e;
1887 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1888 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1889 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1890 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1891 int psp_not_empty;
1892 int pss_not_empty;
1893 int count_preds;
1894 int count_succs;
1896 /* 1. compute sched window for u (start, end, step). */
1897 bitmap_clear (psp);
1898 bitmap_clear (pss);
1899 psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1900 pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1902 /* We first compute a forward range (start <= end), then decide whether
1903 to reverse it. */
1904 early_start = INT_MIN;
1905 late_start = INT_MAX;
1906 start = INT_MIN;
1907 end = INT_MAX;
1908 step = 1;
1910 count_preds = 0;
1911 count_succs = 0;
1913 if (dump_file && (psp_not_empty || pss_not_empty))
1915 fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1916 "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1917 fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1918 "start", "early start", "late start", "end", "time");
1919 fprintf (dump_file, "=========== =========== =========== ==========="
1920 " =====\n");
1922 /* Calculate early_start and limit end. Both bounds are inclusive. */
1923 if (psp_not_empty)
1924 for (e = u_node->in; e != 0; e = e->next_in)
1926 int v = e->src->cuid;
1928 if (bitmap_bit_p (sched_nodes, v))
1930 int p_st = SCHED_TIME (v);
1931 int earliest = p_st + e->latency - (e->distance * ii);
1932 int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1934 if (dump_file)
1936 fprintf (dump_file, "%11s %11d %11s %11d %5d",
1937 "", earliest, "", latest, p_st);
1938 print_ddg_edge (dump_file, e);
1939 fprintf (dump_file, "\n");
1942 early_start = MAX (early_start, earliest);
1943 end = MIN (end, latest);
1945 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1946 count_preds++;
1950 /* Calculate late_start and limit start. Both bounds are inclusive. */
1951 if (pss_not_empty)
1952 for (e = u_node->out; e != 0; e = e->next_out)
1954 int v = e->dest->cuid;
1956 if (bitmap_bit_p (sched_nodes, v))
1958 int s_st = SCHED_TIME (v);
1959 int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1960 int latest = s_st - e->latency + (e->distance * ii);
1962 if (dump_file)
1964 fprintf (dump_file, "%11d %11s %11d %11s %5d",
1965 earliest, "", latest, "", s_st);
1966 print_ddg_edge (dump_file, e);
1967 fprintf (dump_file, "\n");
1970 start = MAX (start, earliest);
1971 late_start = MIN (late_start, latest);
1973 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1974 count_succs++;
1978 if (dump_file && (psp_not_empty || pss_not_empty))
1980 fprintf (dump_file, "----------- ----------- ----------- -----------"
1981 " -----\n");
1982 fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1983 start, early_start, late_start, end, "",
1984 "(max, max, min, min)");
1987 /* Get a target scheduling window no bigger than ii. */
1988 if (early_start == INT_MIN && late_start == INT_MAX)
1989 early_start = NODE_ASAP (u_node);
1990 else if (early_start == INT_MIN)
1991 early_start = late_start - (ii - 1);
1992 late_start = MIN (late_start, early_start + (ii - 1));
1994 /* Apply memory dependence limits. */
1995 start = MAX (start, early_start);
1996 end = MIN (end, late_start);
1998 if (dump_file && (psp_not_empty || pss_not_empty))
1999 fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
2000 "", start, end, "", "");
2002 /* If there are at least as many successors as predecessors, schedule the
2003 node close to its successors. */
2004 if (pss_not_empty && count_succs >= count_preds)
2006 int tmp = end;
2007 end = start;
2008 start = tmp;
2009 step = -1;
2012 /* Now that we've finalized the window, make END an exclusive rather
2013 than an inclusive bound. */
2014 end += step;
2016 *start_p = start;
2017 *step_p = step;
2018 *end_p = end;
2019 sbitmap_free (psp);
2020 sbitmap_free (pss);
2022 if ((start >= end && step == 1) || (start <= end && step == -1))
2024 if (dump_file)
2025 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2026 start, end, step);
2027 return -1;
2030 return 0;
2033 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2034 node currently been scheduled. At the end of the calculation
2035 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2036 U_NODE which are (1) already scheduled in the first/last row of
2037 U_NODE's scheduling window, (2) whose dependence inequality with U
2038 becomes an equality when U is scheduled in this same row, and (3)
2039 whose dependence latency is zero.
2041 The first and last rows are calculated using the following parameters:
2042 START/END rows - The cycles that begins/ends the traversal on the window;
2043 searching for an empty cycle to schedule U_NODE.
2044 STEP - The direction in which we traverse the window.
2045 II - The initiation interval. */
2047 static void
2048 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2049 int step, int ii, sbitmap sched_nodes,
2050 sbitmap must_precede, sbitmap must_follow)
2052 ddg_edge_ptr e;
2053 int first_cycle_in_window, last_cycle_in_window;
2055 gcc_assert (must_precede && must_follow);
2057 /* Consider the following scheduling window:
2058 {first_cycle_in_window, first_cycle_in_window+1, ...,
2059 last_cycle_in_window}. If step is 1 then the following will be
2060 the order we traverse the window: {start=first_cycle_in_window,
2061 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2062 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2063 end=first_cycle_in_window-1} if step is -1. */
2064 first_cycle_in_window = (step == 1) ? start : end - step;
2065 last_cycle_in_window = (step == 1) ? end - step : start;
2067 bitmap_clear (must_precede);
2068 bitmap_clear (must_follow);
2070 if (dump_file)
2071 fprintf (dump_file, "\nmust_precede: ");
2073 /* Instead of checking if:
2074 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2075 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2076 first_cycle_in_window)
2077 && e->latency == 0
2078 we use the fact that latency is non-negative:
2079 SCHED_TIME (e->src) - (e->distance * ii) <=
2080 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2081 first_cycle_in_window
2082 and check only if
2083 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2084 for (e = u_node->in; e != 0; e = e->next_in)
2085 if (bitmap_bit_p (sched_nodes, e->src->cuid)
2086 && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2087 first_cycle_in_window))
2089 if (dump_file)
2090 fprintf (dump_file, "%d ", e->src->cuid);
2092 bitmap_set_bit (must_precede, e->src->cuid);
2095 if (dump_file)
2096 fprintf (dump_file, "\nmust_follow: ");
2098 /* Instead of checking if:
2099 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2100 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2101 last_cycle_in_window)
2102 && e->latency == 0
2103 we use the fact that latency is non-negative:
2104 SCHED_TIME (e->dest) + (e->distance * ii) >=
2105 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2106 last_cycle_in_window
2107 and check only if
2108 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2109 for (e = u_node->out; e != 0; e = e->next_out)
2110 if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2111 && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2112 last_cycle_in_window))
2114 if (dump_file)
2115 fprintf (dump_file, "%d ", e->dest->cuid);
2117 bitmap_set_bit (must_follow, e->dest->cuid);
2120 if (dump_file)
2121 fprintf (dump_file, "\n");
2124 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2125 parameters to decide if that's possible:
2126 PS - The partial schedule.
2127 U - The serial number of U_NODE.
2128 NUM_SPLITS - The number of row splits made so far.
2129 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2130 the first row of the scheduling window)
2131 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2132 last row of the scheduling window) */
2134 static bool
2135 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2136 int u, int cycle, sbitmap sched_nodes,
2137 int *num_splits, sbitmap must_precede,
2138 sbitmap must_follow)
2140 ps_insn_ptr psi;
2141 bool success = 0;
2143 verify_partial_schedule (ps, sched_nodes);
2144 psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2145 if (psi)
2147 SCHED_TIME (u) = cycle;
2148 bitmap_set_bit (sched_nodes, u);
2149 success = 1;
2150 *num_splits = 0;
2151 if (dump_file)
2152 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2156 return success;
2159 /* This function implements the scheduling algorithm for SMS according to the
2160 above algorithm. */
2161 static partial_schedule_ptr
2162 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2164 int ii = mii;
2165 int i, c, success, num_splits = 0;
2166 int flush_and_start_over = true;
2167 int num_nodes = g->num_nodes;
2168 int start, end, step; /* Place together into one struct? */
2169 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2170 sbitmap must_precede = sbitmap_alloc (num_nodes);
2171 sbitmap must_follow = sbitmap_alloc (num_nodes);
2172 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2174 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2176 bitmap_ones (tobe_scheduled);
2177 bitmap_clear (sched_nodes);
2179 while (flush_and_start_over && (ii < maxii))
2182 if (dump_file)
2183 fprintf (dump_file, "Starting with ii=%d\n", ii);
2184 flush_and_start_over = false;
2185 bitmap_clear (sched_nodes);
2187 for (i = 0; i < num_nodes; i++)
2189 int u = nodes_order[i];
2190 ddg_node_ptr u_node = &ps->g->nodes[u];
2191 rtx insn = u_node->insn;
2193 if (!NONDEBUG_INSN_P (insn))
2195 bitmap_clear_bit (tobe_scheduled, u);
2196 continue;
2199 if (bitmap_bit_p (sched_nodes, u))
2200 continue;
2202 /* Try to get non-empty scheduling window. */
2203 success = 0;
2204 if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2205 &step, &end) == 0)
2207 if (dump_file)
2208 fprintf (dump_file, "\nTrying to schedule node %d "
2209 "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2210 (g->nodes[u].insn)), start, end, step);
2212 gcc_assert ((step > 0 && start < end)
2213 || (step < 0 && start > end));
2215 calculate_must_precede_follow (u_node, start, end, step, ii,
2216 sched_nodes, must_precede,
2217 must_follow);
2219 for (c = start; c != end; c += step)
2221 sbitmap tmp_precede, tmp_follow;
2223 set_must_precede_follow (&tmp_follow, must_follow,
2224 &tmp_precede, must_precede,
2225 c, start, end, step);
2226 success =
2227 try_scheduling_node_in_cycle (ps, u, c,
2228 sched_nodes,
2229 &num_splits, tmp_precede,
2230 tmp_follow);
2231 if (success)
2232 break;
2235 verify_partial_schedule (ps, sched_nodes);
2237 if (!success)
2239 int split_row;
2241 if (ii++ == maxii)
2242 break;
2244 if (num_splits >= MAX_SPLIT_NUM)
2246 num_splits = 0;
2247 flush_and_start_over = true;
2248 verify_partial_schedule (ps, sched_nodes);
2249 reset_partial_schedule (ps, ii);
2250 verify_partial_schedule (ps, sched_nodes);
2251 break;
2254 num_splits++;
2255 /* The scheduling window is exclusive of 'end'
2256 whereas compute_split_window() expects an inclusive,
2257 ordered range. */
2258 if (step == 1)
2259 split_row = compute_split_row (sched_nodes, start, end - 1,
2260 ps->ii, u_node);
2261 else
2262 split_row = compute_split_row (sched_nodes, end + 1, start,
2263 ps->ii, u_node);
2265 ps_insert_empty_row (ps, split_row, sched_nodes);
2266 i--; /* Go back and retry node i. */
2268 if (dump_file)
2269 fprintf (dump_file, "num_splits=%d\n", num_splits);
2272 /* ??? If (success), check register pressure estimates. */
2273 } /* Continue with next node. */
2274 } /* While flush_and_start_over. */
2275 if (ii >= maxii)
2277 free_partial_schedule (ps);
2278 ps = NULL;
2280 else
2281 gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2283 sbitmap_free (sched_nodes);
2284 sbitmap_free (must_precede);
2285 sbitmap_free (must_follow);
2286 sbitmap_free (tobe_scheduled);
2288 return ps;
2291 /* This function inserts a new empty row into PS at the position
2292 according to SPLITROW, keeping all already scheduled instructions
2293 intact and updating their SCHED_TIME and cycle accordingly. */
2294 static void
2295 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2296 sbitmap sched_nodes)
2298 ps_insn_ptr crr_insn;
2299 ps_insn_ptr *rows_new;
2300 int ii = ps->ii;
2301 int new_ii = ii + 1;
2302 int row;
2303 int *rows_length_new;
2305 verify_partial_schedule (ps, sched_nodes);
2307 /* We normalize sched_time and rotate ps to have only non-negative sched
2308 times, for simplicity of updating cycles after inserting new row. */
2309 split_row -= ps->min_cycle;
2310 split_row = SMODULO (split_row, ii);
2311 if (dump_file)
2312 fprintf (dump_file, "split_row=%d\n", split_row);
2314 reset_sched_times (ps, PS_MIN_CYCLE (ps));
2315 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2317 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2318 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2319 for (row = 0; row < split_row; row++)
2321 rows_new[row] = ps->rows[row];
2322 rows_length_new[row] = ps->rows_length[row];
2323 ps->rows[row] = NULL;
2324 for (crr_insn = rows_new[row];
2325 crr_insn; crr_insn = crr_insn->next_in_row)
2327 int u = crr_insn->id;
2328 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2330 SCHED_TIME (u) = new_time;
2331 crr_insn->cycle = new_time;
2332 SCHED_ROW (u) = new_time % new_ii;
2333 SCHED_STAGE (u) = new_time / new_ii;
2338 rows_new[split_row] = NULL;
2340 for (row = split_row; row < ii; row++)
2342 rows_new[row + 1] = ps->rows[row];
2343 rows_length_new[row + 1] = ps->rows_length[row];
2344 ps->rows[row] = NULL;
2345 for (crr_insn = rows_new[row + 1];
2346 crr_insn; crr_insn = crr_insn->next_in_row)
2348 int u = crr_insn->id;
2349 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2351 SCHED_TIME (u) = new_time;
2352 crr_insn->cycle = new_time;
2353 SCHED_ROW (u) = new_time % new_ii;
2354 SCHED_STAGE (u) = new_time / new_ii;
2358 /* Updating ps. */
2359 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2360 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2361 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2362 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2363 free (ps->rows);
2364 ps->rows = rows_new;
2365 free (ps->rows_length);
2366 ps->rows_length = rows_length_new;
2367 ps->ii = new_ii;
2368 gcc_assert (ps->min_cycle >= 0);
2370 verify_partial_schedule (ps, sched_nodes);
2372 if (dump_file)
2373 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2374 ps->max_cycle);
2377 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2378 UP which are the boundaries of it's scheduling window; compute using
2379 SCHED_NODES and II a row in the partial schedule that can be split
2380 which will separate a critical predecessor from a critical successor
2381 thereby expanding the window, and return it. */
2382 static int
2383 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2384 ddg_node_ptr u_node)
2386 ddg_edge_ptr e;
2387 int lower = INT_MIN, upper = INT_MAX;
2388 int crit_pred = -1;
2389 int crit_succ = -1;
2390 int crit_cycle;
2392 for (e = u_node->in; e != 0; e = e->next_in)
2394 int v = e->src->cuid;
2396 if (bitmap_bit_p (sched_nodes, v)
2397 && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2398 if (SCHED_TIME (v) > lower)
2400 crit_pred = v;
2401 lower = SCHED_TIME (v);
2405 if (crit_pred >= 0)
2407 crit_cycle = SCHED_TIME (crit_pred) + 1;
2408 return SMODULO (crit_cycle, ii);
2411 for (e = u_node->out; e != 0; e = e->next_out)
2413 int v = e->dest->cuid;
2415 if (bitmap_bit_p (sched_nodes, v)
2416 && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2417 if (SCHED_TIME (v) < upper)
2419 crit_succ = v;
2420 upper = SCHED_TIME (v);
2424 if (crit_succ >= 0)
2426 crit_cycle = SCHED_TIME (crit_succ);
2427 return SMODULO (crit_cycle, ii);
2430 if (dump_file)
2431 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2433 return SMODULO ((low + up + 1) / 2, ii);
2436 static void
2437 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2439 int row;
2440 ps_insn_ptr crr_insn;
2442 for (row = 0; row < ps->ii; row++)
2444 int length = 0;
2446 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2448 int u = crr_insn->id;
2450 length++;
2451 gcc_assert (bitmap_bit_p (sched_nodes, u));
2452 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2453 popcount (sched_nodes) == number of insns in ps. */
2454 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2455 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2458 gcc_assert (ps->rows_length[row] == length);
2463 /* This page implements the algorithm for ordering the nodes of a DDG
2464 for modulo scheduling, activated through the
2465 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2467 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2468 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2469 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2470 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2471 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2472 #define DEPTH(x) (ASAP ((x)))
2474 typedef struct node_order_params * nopa;
2476 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2477 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2478 static nopa calculate_order_params (ddg_ptr, int, int *);
2479 static int find_max_asap (ddg_ptr, sbitmap);
2480 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2481 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2483 enum sms_direction {BOTTOMUP, TOPDOWN};
2485 struct node_order_params
2487 int asap;
2488 int alap;
2489 int height;
2492 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2493 static void
2494 check_nodes_order (int *node_order, int num_nodes)
2496 int i;
2497 sbitmap tmp = sbitmap_alloc (num_nodes);
2499 bitmap_clear (tmp);
2501 if (dump_file)
2502 fprintf (dump_file, "SMS final nodes order: \n");
2504 for (i = 0; i < num_nodes; i++)
2506 int u = node_order[i];
2508 if (dump_file)
2509 fprintf (dump_file, "%d ", u);
2510 gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2512 bitmap_set_bit (tmp, u);
2515 if (dump_file)
2516 fprintf (dump_file, "\n");
2518 sbitmap_free (tmp);
2521 /* Order the nodes of G for scheduling and pass the result in
2522 NODE_ORDER. Also set aux.count of each node to ASAP.
2523 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2524 static int
2525 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2527 int i;
2528 int rec_mii = 0;
2529 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2531 nopa nops = calculate_order_params (g, mii, pmax_asap);
2533 if (dump_file)
2534 print_sccs (dump_file, sccs, g);
2536 order_nodes_of_sccs (sccs, node_order);
2538 if (sccs->num_sccs > 0)
2539 /* First SCC has the largest recurrence_length. */
2540 rec_mii = sccs->sccs[0]->recurrence_length;
2542 /* Save ASAP before destroying node_order_params. */
2543 for (i = 0; i < g->num_nodes; i++)
2545 ddg_node_ptr v = &g->nodes[i];
2546 v->aux.count = ASAP (v);
2549 free (nops);
2550 free_ddg_all_sccs (sccs);
2551 check_nodes_order (node_order, g->num_nodes);
2553 return rec_mii;
2556 static void
2557 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2559 int i, pos = 0;
2560 ddg_ptr g = all_sccs->ddg;
2561 int num_nodes = g->num_nodes;
2562 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2563 sbitmap on_path = sbitmap_alloc (num_nodes);
2564 sbitmap tmp = sbitmap_alloc (num_nodes);
2565 sbitmap ones = sbitmap_alloc (num_nodes);
2567 bitmap_clear (prev_sccs);
2568 bitmap_ones (ones);
2570 /* Perform the node ordering starting from the SCC with the highest recMII.
2571 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2572 for (i = 0; i < all_sccs->num_sccs; i++)
2574 ddg_scc_ptr scc = all_sccs->sccs[i];
2576 /* Add nodes on paths from previous SCCs to the current SCC. */
2577 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2578 bitmap_ior (tmp, scc->nodes, on_path);
2580 /* Add nodes on paths from the current SCC to previous SCCs. */
2581 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2582 bitmap_ior (tmp, tmp, on_path);
2584 /* Remove nodes of previous SCCs from current extended SCC. */
2585 bitmap_and_compl (tmp, tmp, prev_sccs);
2587 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2588 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2591 /* Handle the remaining nodes that do not belong to any scc. Each call
2592 to order_nodes_in_scc handles a single connected component. */
2593 while (pos < g->num_nodes)
2595 bitmap_and_compl (tmp, ones, prev_sccs);
2596 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2598 sbitmap_free (prev_sccs);
2599 sbitmap_free (on_path);
2600 sbitmap_free (tmp);
2601 sbitmap_free (ones);
2604 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2605 static struct node_order_params *
2606 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2608 int u;
2609 int max_asap;
2610 int num_nodes = g->num_nodes;
2611 ddg_edge_ptr e;
2612 /* Allocate a place to hold ordering params for each node in the DDG. */
2613 nopa node_order_params_arr;
2615 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2616 node_order_params_arr = (nopa) xcalloc (num_nodes,
2617 sizeof (struct node_order_params));
2619 /* Set the aux pointer of each node to point to its order_params structure. */
2620 for (u = 0; u < num_nodes; u++)
2621 g->nodes[u].aux.info = &node_order_params_arr[u];
2623 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2624 calculate ASAP, ALAP, mobility, distance, and height for each node
2625 in the dependence (direct acyclic) graph. */
2627 /* We assume that the nodes in the array are in topological order. */
2629 max_asap = 0;
2630 for (u = 0; u < num_nodes; u++)
2632 ddg_node_ptr u_node = &g->nodes[u];
2634 ASAP (u_node) = 0;
2635 for (e = u_node->in; e; e = e->next_in)
2636 if (e->distance == 0)
2637 ASAP (u_node) = MAX (ASAP (u_node),
2638 ASAP (e->src) + e->latency);
2639 max_asap = MAX (max_asap, ASAP (u_node));
2642 for (u = num_nodes - 1; u > -1; u--)
2644 ddg_node_ptr u_node = &g->nodes[u];
2646 ALAP (u_node) = max_asap;
2647 HEIGHT (u_node) = 0;
2648 for (e = u_node->out; e; e = e->next_out)
2649 if (e->distance == 0)
2651 ALAP (u_node) = MIN (ALAP (u_node),
2652 ALAP (e->dest) - e->latency);
2653 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2654 HEIGHT (e->dest) + e->latency);
2657 if (dump_file)
2659 fprintf (dump_file, "\nOrder params\n");
2660 for (u = 0; u < num_nodes; u++)
2662 ddg_node_ptr u_node = &g->nodes[u];
2664 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2665 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2669 *pmax_asap = max_asap;
2670 return node_order_params_arr;
2673 static int
2674 find_max_asap (ddg_ptr g, sbitmap nodes)
2676 unsigned int u = 0;
2677 int max_asap = -1;
2678 int result = -1;
2679 sbitmap_iterator sbi;
2681 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2683 ddg_node_ptr u_node = &g->nodes[u];
2685 if (max_asap < ASAP (u_node))
2687 max_asap = ASAP (u_node);
2688 result = u;
2691 return result;
2694 static int
2695 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2697 unsigned int u = 0;
2698 int max_hv = -1;
2699 int min_mob = INT_MAX;
2700 int result = -1;
2701 sbitmap_iterator sbi;
2703 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2705 ddg_node_ptr u_node = &g->nodes[u];
2707 if (max_hv < HEIGHT (u_node))
2709 max_hv = HEIGHT (u_node);
2710 min_mob = MOB (u_node);
2711 result = u;
2713 else if ((max_hv == HEIGHT (u_node))
2714 && (min_mob > MOB (u_node)))
2716 min_mob = MOB (u_node);
2717 result = u;
2720 return result;
2723 static int
2724 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2726 unsigned int u = 0;
2727 int max_dv = -1;
2728 int min_mob = INT_MAX;
2729 int result = -1;
2730 sbitmap_iterator sbi;
2732 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2734 ddg_node_ptr u_node = &g->nodes[u];
2736 if (max_dv < DEPTH (u_node))
2738 max_dv = DEPTH (u_node);
2739 min_mob = MOB (u_node);
2740 result = u;
2742 else if ((max_dv == DEPTH (u_node))
2743 && (min_mob > MOB (u_node)))
2745 min_mob = MOB (u_node);
2746 result = u;
2749 return result;
2752 /* Places the nodes of SCC into the NODE_ORDER array starting
2753 at position POS, according to the SMS ordering algorithm.
2754 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2755 the NODE_ORDER array, starting from position zero. */
2756 static int
2757 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2758 int * node_order, int pos)
2760 enum sms_direction dir;
2761 int num_nodes = g->num_nodes;
2762 sbitmap workset = sbitmap_alloc (num_nodes);
2763 sbitmap tmp = sbitmap_alloc (num_nodes);
2764 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2765 sbitmap predecessors = sbitmap_alloc (num_nodes);
2766 sbitmap successors = sbitmap_alloc (num_nodes);
2768 bitmap_clear (predecessors);
2769 find_predecessors (predecessors, g, nodes_ordered);
2771 bitmap_clear (successors);
2772 find_successors (successors, g, nodes_ordered);
2774 bitmap_clear (tmp);
2775 if (bitmap_and (tmp, predecessors, scc))
2777 bitmap_copy (workset, tmp);
2778 dir = BOTTOMUP;
2780 else if (bitmap_and (tmp, successors, scc))
2782 bitmap_copy (workset, tmp);
2783 dir = TOPDOWN;
2785 else
2787 int u;
2789 bitmap_clear (workset);
2790 if ((u = find_max_asap (g, scc)) >= 0)
2791 bitmap_set_bit (workset, u);
2792 dir = BOTTOMUP;
2795 bitmap_clear (zero_bitmap);
2796 while (!bitmap_equal_p (workset, zero_bitmap))
2798 int v;
2799 ddg_node_ptr v_node;
2800 sbitmap v_node_preds;
2801 sbitmap v_node_succs;
2803 if (dir == TOPDOWN)
2805 while (!bitmap_equal_p (workset, zero_bitmap))
2807 v = find_max_hv_min_mob (g, workset);
2808 v_node = &g->nodes[v];
2809 node_order[pos++] = v;
2810 v_node_succs = NODE_SUCCESSORS (v_node);
2811 bitmap_and (tmp, v_node_succs, scc);
2813 /* Don't consider the already ordered successors again. */
2814 bitmap_and_compl (tmp, tmp, nodes_ordered);
2815 bitmap_ior (workset, workset, tmp);
2816 bitmap_clear_bit (workset, v);
2817 bitmap_set_bit (nodes_ordered, v);
2819 dir = BOTTOMUP;
2820 bitmap_clear (predecessors);
2821 find_predecessors (predecessors, g, nodes_ordered);
2822 bitmap_and (workset, predecessors, scc);
2824 else
2826 while (!bitmap_equal_p (workset, zero_bitmap))
2828 v = find_max_dv_min_mob (g, workset);
2829 v_node = &g->nodes[v];
2830 node_order[pos++] = v;
2831 v_node_preds = NODE_PREDECESSORS (v_node);
2832 bitmap_and (tmp, v_node_preds, scc);
2834 /* Don't consider the already ordered predecessors again. */
2835 bitmap_and_compl (tmp, tmp, nodes_ordered);
2836 bitmap_ior (workset, workset, tmp);
2837 bitmap_clear_bit (workset, v);
2838 bitmap_set_bit (nodes_ordered, v);
2840 dir = TOPDOWN;
2841 bitmap_clear (successors);
2842 find_successors (successors, g, nodes_ordered);
2843 bitmap_and (workset, successors, scc);
2846 sbitmap_free (tmp);
2847 sbitmap_free (workset);
2848 sbitmap_free (zero_bitmap);
2849 sbitmap_free (predecessors);
2850 sbitmap_free (successors);
2851 return pos;
2855 /* This page contains functions for manipulating partial-schedules during
2856 modulo scheduling. */
2858 /* Create a partial schedule and allocate a memory to hold II rows. */
2860 static partial_schedule_ptr
2861 create_partial_schedule (int ii, ddg_ptr g, int history)
2863 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2864 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2865 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2866 ps->reg_moves.create (0);
2867 ps->ii = ii;
2868 ps->history = history;
2869 ps->min_cycle = INT_MAX;
2870 ps->max_cycle = INT_MIN;
2871 ps->g = g;
2873 return ps;
2876 /* Free the PS_INSNs in rows array of the given partial schedule.
2877 ??? Consider caching the PS_INSN's. */
2878 static void
2879 free_ps_insns (partial_schedule_ptr ps)
2881 int i;
2883 for (i = 0; i < ps->ii; i++)
2885 while (ps->rows[i])
2887 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2889 free (ps->rows[i]);
2890 ps->rows[i] = ps_insn;
2892 ps->rows[i] = NULL;
2896 /* Free all the memory allocated to the partial schedule. */
2898 static void
2899 free_partial_schedule (partial_schedule_ptr ps)
2901 ps_reg_move_info *move;
2902 unsigned int i;
2904 if (!ps)
2905 return;
2907 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2908 sbitmap_free (move->uses);
2909 ps->reg_moves.release ();
2911 free_ps_insns (ps);
2912 free (ps->rows);
2913 free (ps->rows_length);
2914 free (ps);
2917 /* Clear the rows array with its PS_INSNs, and create a new one with
2918 NEW_II rows. */
2920 static void
2921 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2923 if (!ps)
2924 return;
2925 free_ps_insns (ps);
2926 if (new_ii == ps->ii)
2927 return;
2928 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2929 * sizeof (ps_insn_ptr));
2930 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2931 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2932 memset (ps->rows_length, 0, new_ii * sizeof (int));
2933 ps->ii = new_ii;
2934 ps->min_cycle = INT_MAX;
2935 ps->max_cycle = INT_MIN;
2938 /* Prints the partial schedule as an ii rows array, for each rows
2939 print the ids of the insns in it. */
2940 void
2941 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2943 int i;
2945 for (i = 0; i < ps->ii; i++)
2947 ps_insn_ptr ps_i = ps->rows[i];
2949 fprintf (dump, "\n[ROW %d ]: ", i);
2950 while (ps_i)
2952 rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2954 if (JUMP_P (insn))
2955 fprintf (dump, "%d (branch), ", INSN_UID (insn));
2956 else
2957 fprintf (dump, "%d, ", INSN_UID (insn));
2959 ps_i = ps_i->next_in_row;
2964 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2965 static ps_insn_ptr
2966 create_ps_insn (int id, int cycle)
2968 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2970 ps_i->id = id;
2971 ps_i->next_in_row = NULL;
2972 ps_i->prev_in_row = NULL;
2973 ps_i->cycle = cycle;
2975 return ps_i;
2979 /* Removes the given PS_INSN from the partial schedule. */
2980 static void
2981 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2983 int row;
2985 gcc_assert (ps && ps_i);
2987 row = SMODULO (ps_i->cycle, ps->ii);
2988 if (! ps_i->prev_in_row)
2990 gcc_assert (ps_i == ps->rows[row]);
2991 ps->rows[row] = ps_i->next_in_row;
2992 if (ps->rows[row])
2993 ps->rows[row]->prev_in_row = NULL;
2995 else
2997 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2998 if (ps_i->next_in_row)
2999 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
3002 ps->rows_length[row] -= 1;
3003 free (ps_i);
3004 return;
3007 /* Unlike what literature describes for modulo scheduling (which focuses
3008 on VLIW machines) the order of the instructions inside a cycle is
3009 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
3010 where the current instruction should go relative to the already
3011 scheduled instructions in the given cycle. Go over these
3012 instructions and find the first possible column to put it in. */
3013 static bool
3014 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3015 sbitmap must_precede, sbitmap must_follow)
3017 ps_insn_ptr next_ps_i;
3018 ps_insn_ptr first_must_follow = NULL;
3019 ps_insn_ptr last_must_precede = NULL;
3020 ps_insn_ptr last_in_row = NULL;
3021 int row;
3023 if (! ps_i)
3024 return false;
3026 row = SMODULO (ps_i->cycle, ps->ii);
3028 /* Find the first must follow and the last must precede
3029 and insert the node immediately after the must precede
3030 but make sure that it there is no must follow after it. */
3031 for (next_ps_i = ps->rows[row];
3032 next_ps_i;
3033 next_ps_i = next_ps_i->next_in_row)
3035 if (must_follow
3036 && bitmap_bit_p (must_follow, next_ps_i->id)
3037 && ! first_must_follow)
3038 first_must_follow = next_ps_i;
3039 if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3041 /* If we have already met a node that must follow, then
3042 there is no possible column. */
3043 if (first_must_follow)
3044 return false;
3045 else
3046 last_must_precede = next_ps_i;
3048 /* The closing branch must be the last in the row. */
3049 if (must_precede
3050 && bitmap_bit_p (must_precede, next_ps_i->id)
3051 && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3052 return false;
3054 last_in_row = next_ps_i;
3057 /* The closing branch is scheduled as well. Make sure there is no
3058 dependent instruction after it as the branch should be the last
3059 instruction in the row. */
3060 if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3062 if (first_must_follow)
3063 return false;
3064 if (last_in_row)
3066 /* Make the branch the last in the row. New instructions
3067 will be inserted at the beginning of the row or after the
3068 last must_precede instruction thus the branch is guaranteed
3069 to remain the last instruction in the row. */
3070 last_in_row->next_in_row = ps_i;
3071 ps_i->prev_in_row = last_in_row;
3072 ps_i->next_in_row = NULL;
3074 else
3075 ps->rows[row] = ps_i;
3076 return true;
3079 /* Now insert the node after INSERT_AFTER_PSI. */
3081 if (! last_must_precede)
3083 ps_i->next_in_row = ps->rows[row];
3084 ps_i->prev_in_row = NULL;
3085 if (ps_i->next_in_row)
3086 ps_i->next_in_row->prev_in_row = ps_i;
3087 ps->rows[row] = ps_i;
3089 else
3091 ps_i->next_in_row = last_must_precede->next_in_row;
3092 last_must_precede->next_in_row = ps_i;
3093 ps_i->prev_in_row = last_must_precede;
3094 if (ps_i->next_in_row)
3095 ps_i->next_in_row->prev_in_row = ps_i;
3098 return true;
3101 /* Advances the PS_INSN one column in its current row; returns false
3102 in failure and true in success. Bit N is set in MUST_FOLLOW if
3103 the node with cuid N must be come after the node pointed to by
3104 PS_I when scheduled in the same cycle. */
3105 static int
3106 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3107 sbitmap must_follow)
3109 ps_insn_ptr prev, next;
3110 int row;
3112 if (!ps || !ps_i)
3113 return false;
3115 row = SMODULO (ps_i->cycle, ps->ii);
3117 if (! ps_i->next_in_row)
3118 return false;
3120 /* Check if next_in_row is dependent on ps_i, both having same sched
3121 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3122 if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3123 return false;
3125 /* Advance PS_I over its next_in_row in the doubly linked list. */
3126 prev = ps_i->prev_in_row;
3127 next = ps_i->next_in_row;
3129 if (ps_i == ps->rows[row])
3130 ps->rows[row] = next;
3132 ps_i->next_in_row = next->next_in_row;
3134 if (next->next_in_row)
3135 next->next_in_row->prev_in_row = ps_i;
3137 next->next_in_row = ps_i;
3138 ps_i->prev_in_row = next;
3140 next->prev_in_row = prev;
3141 if (prev)
3142 prev->next_in_row = next;
3144 return true;
3147 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3148 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3149 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3150 before/after (respectively) the node pointed to by PS_I when scheduled
3151 in the same cycle. */
3152 static ps_insn_ptr
3153 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3154 sbitmap must_precede, sbitmap must_follow)
3156 ps_insn_ptr ps_i;
3157 int row = SMODULO (cycle, ps->ii);
3159 if (ps->rows_length[row] >= issue_rate)
3160 return NULL;
3162 ps_i = create_ps_insn (id, cycle);
3164 /* Finds and inserts PS_I according to MUST_FOLLOW and
3165 MUST_PRECEDE. */
3166 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3168 free (ps_i);
3169 return NULL;
3172 ps->rows_length[row] += 1;
3173 return ps_i;
3176 /* Advance time one cycle. Assumes DFA is being used. */
3177 static void
3178 advance_one_cycle (void)
3180 if (targetm.sched.dfa_pre_cycle_insn)
3181 state_transition (curr_state,
3182 targetm.sched.dfa_pre_cycle_insn ());
3184 state_transition (curr_state, NULL);
3186 if (targetm.sched.dfa_post_cycle_insn)
3187 state_transition (curr_state,
3188 targetm.sched.dfa_post_cycle_insn ());
3193 /* Checks if PS has resource conflicts according to DFA, starting from
3194 FROM cycle to TO cycle; returns true if there are conflicts and false
3195 if there are no conflicts. Assumes DFA is being used. */
3196 static int
3197 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3199 int cycle;
3201 state_reset (curr_state);
3203 for (cycle = from; cycle <= to; cycle++)
3205 ps_insn_ptr crr_insn;
3206 /* Holds the remaining issue slots in the current row. */
3207 int can_issue_more = issue_rate;
3209 /* Walk through the DFA for the current row. */
3210 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3211 crr_insn;
3212 crr_insn = crr_insn->next_in_row)
3214 rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3216 if (!NONDEBUG_INSN_P (insn))
3217 continue;
3219 /* Check if there is room for the current insn. */
3220 if (!can_issue_more || state_dead_lock_p (curr_state))
3221 return true;
3223 /* Update the DFA state and return with failure if the DFA found
3224 resource conflicts. */
3225 if (state_transition (curr_state, insn) >= 0)
3226 return true;
3228 if (targetm.sched.variable_issue)
3229 can_issue_more =
3230 targetm.sched.variable_issue (sched_dump, sched_verbose,
3231 insn, can_issue_more);
3232 /* A naked CLOBBER or USE generates no instruction, so don't
3233 let them consume issue slots. */
3234 else if (GET_CODE (PATTERN (insn)) != USE
3235 && GET_CODE (PATTERN (insn)) != CLOBBER)
3236 can_issue_more--;
3239 /* Advance the DFA to the next cycle. */
3240 advance_one_cycle ();
3242 return false;
3245 /* Checks if the given node causes resource conflicts when added to PS at
3246 cycle C. If not the node is added to PS and returned; otherwise zero
3247 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3248 cuid N must be come before/after (respectively) the node pointed to by
3249 PS_I when scheduled in the same cycle. */
3250 ps_insn_ptr
3251 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3252 int c, sbitmap must_precede,
3253 sbitmap must_follow)
3255 int has_conflicts = 0;
3256 ps_insn_ptr ps_i;
3258 /* First add the node to the PS, if this succeeds check for
3259 conflicts, trying different issue slots in the same row. */
3260 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3261 return NULL; /* Failed to insert the node at the given cycle. */
3263 has_conflicts = ps_has_conflicts (ps, c, c)
3264 || (ps->history > 0
3265 && ps_has_conflicts (ps,
3266 c - ps->history,
3267 c + ps->history));
3269 /* Try different issue slots to find one that the given node can be
3270 scheduled in without conflicts. */
3271 while (has_conflicts)
3273 if (! ps_insn_advance_column (ps, ps_i, must_follow))
3274 break;
3275 has_conflicts = ps_has_conflicts (ps, c, c)
3276 || (ps->history > 0
3277 && ps_has_conflicts (ps,
3278 c - ps->history,
3279 c + ps->history));
3282 if (has_conflicts)
3284 remove_node_from_ps (ps, ps_i);
3285 return NULL;
3288 ps->min_cycle = MIN (ps->min_cycle, c);
3289 ps->max_cycle = MAX (ps->max_cycle, c);
3290 return ps_i;
3293 /* Calculate the stage count of the partial schedule PS. The calculation
3294 takes into account the rotation amount passed in ROTATION_AMOUNT. */
3296 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3298 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3299 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3300 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3302 /* The calculation of stage count is done adding the number of stages
3303 before cycle zero and after cycle zero. */
3304 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3306 return stage_count;
3309 /* Rotate the rows of PS such that insns scheduled at time
3310 START_CYCLE will appear in row 0. Updates max/min_cycles. */
3311 void
3312 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3314 int i, row, backward_rotates;
3315 int last_row = ps->ii - 1;
3317 if (start_cycle == 0)
3318 return;
3320 backward_rotates = SMODULO (start_cycle, ps->ii);
3322 /* Revisit later and optimize this into a single loop. */
3323 for (i = 0; i < backward_rotates; i++)
3325 ps_insn_ptr first_row = ps->rows[0];
3326 int first_row_length = ps->rows_length[0];
3328 for (row = 0; row < last_row; row++)
3330 ps->rows[row] = ps->rows[row + 1];
3331 ps->rows_length[row] = ps->rows_length[row + 1];
3334 ps->rows[last_row] = first_row;
3335 ps->rows_length[last_row] = first_row_length;
3338 ps->max_cycle -= start_cycle;
3339 ps->min_cycle -= start_cycle;
3342 #endif /* INSN_SCHEDULING */
3344 /* Run instruction scheduler. */
3345 /* Perform SMS module scheduling. */
3347 namespace {
3349 const pass_data pass_data_sms =
3351 RTL_PASS, /* type */
3352 "sms", /* name */
3353 OPTGROUP_NONE, /* optinfo_flags */
3354 TV_SMS, /* tv_id */
3355 0, /* properties_required */
3356 0, /* properties_provided */
3357 0, /* properties_destroyed */
3358 0, /* todo_flags_start */
3359 TODO_df_finish, /* todo_flags_finish */
3362 class pass_sms : public rtl_opt_pass
3364 public:
3365 pass_sms (gcc::context *ctxt)
3366 : rtl_opt_pass (pass_data_sms, ctxt)
3369 /* opt_pass methods: */
3370 virtual bool gate (function *)
3372 return (optimize > 0 && flag_modulo_sched);
3375 virtual unsigned int execute (function *);
3377 }; // class pass_sms
3379 unsigned int
3380 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3382 #ifdef INSN_SCHEDULING
3383 basic_block bb;
3385 /* Collect loop information to be used in SMS. */
3386 cfg_layout_initialize (0);
3387 sms_schedule ();
3389 /* Update the life information, because we add pseudos. */
3390 max_regno = max_reg_num ();
3392 /* Finalize layout changes. */
3393 FOR_EACH_BB_FN (bb, fun)
3394 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3395 bb->aux = bb->next_bb;
3396 free_dominance_info (CDI_DOMINATORS);
3397 cfg_layout_finalize ();
3398 #endif /* INSN_SCHEDULING */
3399 return 0;
3402 } // anon namespace
3404 rtl_opt_pass *
3405 make_pass_sms (gcc::context *ctxt)
3407 return new pass_sms (ctxt);