* Makefile.in (ggc-common.o): Depend on genrtl.h.
[official-gcc.git] / gcc / haifa-sched.c
blob7279226f1daa50e41bdcb2034875eb1b76599863
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to the Free
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
24 /* Instruction scheduling pass.
26 This pass implements list scheduling within basic blocks. It is
27 run twice: (1) after flow analysis, but before register allocation,
28 and (2) after register allocation.
30 The first run performs interblock scheduling, moving insns between
31 different blocks in the same "region", and the second runs only
32 basic block scheduling.
34 Interblock motions performed are useful motions and speculative
35 motions, including speculative loads. Motions requiring code
36 duplication are not supported. The identification of motion type
37 and the check for validity of speculative motions requires
38 construction and analysis of the function's control flow graph.
39 The scheduler works as follows:
41 We compute insn priorities based on data dependencies. Flow
42 analysis only creates a fraction of the data-dependencies we must
43 observe: namely, only those dependencies which the combiner can be
44 expected to use. For this pass, we must therefore create the
45 remaining dependencies we need to observe: register dependencies,
46 memory dependencies, dependencies to keep function calls in order,
47 and the dependence between a conditional branch and the setting of
48 condition codes are all dealt with here.
50 The scheduler first traverses the data flow graph, starting with
51 the last instruction, and proceeding to the first, assigning values
52 to insn_priority as it goes. This sorts the instructions
53 topologically by data dependence.
55 Once priorities have been established, we order the insns using
56 list scheduling. This works as follows: starting with a list of
57 all the ready insns, and sorted according to priority number, we
58 schedule the insn from the end of the list by placing its
59 predecessors in the list according to their priority order. We
60 consider this insn scheduled by setting the pointer to the "end" of
61 the list to point to the previous insn. When an insn has no
62 predecessors, we either queue it until sufficient time has elapsed
63 or add it to the ready list. As the instructions are scheduled or
64 when stalls are introduced, the queue advances and dumps insns into
65 the ready list. When all insns down to the lowest priority have
66 been scheduled, the critical path of the basic block has been made
67 as short as possible. The remaining insns are then scheduled in
68 remaining slots.
70 Function unit conflicts are resolved during forward list scheduling
71 by tracking the time when each insn is committed to the schedule
72 and from that, the time the function units it uses must be free.
73 As insns on the ready list are considered for scheduling, those
74 that would result in a blockage of the already committed insns are
75 queued until no blockage will result.
77 The following list shows the order in which we want to break ties
78 among insns in the ready list:
80 1. choose insn with the longest path to end of bb, ties
81 broken by
82 2. choose insn with least contribution to register pressure,
83 ties broken by
84 3. prefer in-block upon interblock motion, ties broken by
85 4. prefer useful upon speculative motion, ties broken by
86 5. choose insn with largest control flow probability, ties
87 broken by
88 6. choose insn with the least dependences upon the previously
89 scheduled insn, or finally
90 7 choose the insn which has the most insns dependent on it.
91 8. choose insn with lowest UID.
93 Memory references complicate matters. Only if we can be certain
94 that memory references are not part of the data dependency graph
95 (via true, anti, or output dependence), can we move operations past
96 memory references. To first approximation, reads can be done
97 independently, while writes introduce dependencies. Better
98 approximations will yield fewer dependencies.
100 Before reload, an extended analysis of interblock data dependences
101 is required for interblock scheduling. This is performed in
102 compute_block_backward_dependences ().
104 Dependencies set up by memory references are treated in exactly the
105 same way as other dependencies, by using LOG_LINKS backward
106 dependences. LOG_LINKS are translated into INSN_DEPEND forward
107 dependences for the purpose of forward list scheduling.
109 Having optimized the critical path, we may have also unduly
110 extended the lifetimes of some registers. If an operation requires
111 that constants be loaded into registers, it is certainly desirable
112 to load those constants as early as necessary, but no earlier.
113 I.e., it will not do to load up a bunch of registers at the
114 beginning of a basic block only to use them at the end, if they
115 could be loaded later, since this may result in excessive register
116 utilization.
118 Note that since branches are never in basic blocks, but only end
119 basic blocks, this pass will not move branches. But that is ok,
120 since we can use GNU's delayed branch scheduling pass to take care
121 of this case.
123 Also note that no further optimizations based on algebraic
124 identities are performed, so this pass would be a good one to
125 perform instruction splitting, such as breaking up a multiply
126 instruction into shifts and adds where that is profitable.
128 Given the memory aliasing analysis that this pass should perform,
129 it should be possible to remove redundant stores to memory, and to
130 load values from registers instead of hitting memory.
132 Before reload, speculative insns are moved only if a 'proof' exists
133 that no exception will be caused by this, and if no live registers
134 exist that inhibit the motion (live registers constraints are not
135 represented by data dependence edges).
137 This pass must update information that subsequent passes expect to
138 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
139 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
140 BLOCK_END.
142 The information in the line number notes is carefully retained by
143 this pass. Notes that refer to the starting and ending of
144 exception regions are also carefully retained by this pass. All
145 other NOTE insns are grouped in their same relative order at the
146 beginning of basic blocks and regions that have been scheduled.
148 The main entry point for this pass is schedule_insns(), called for
149 each function. The work of the scheduler is organized in three
150 levels: (1) function level: insns are subject to splitting,
151 control-flow-graph is constructed, regions are computed (after
152 reload, each region is of one block), (2) region level: control
153 flow graph attributes required for interblock scheduling are
154 computed (dominators, reachability, etc.), data dependences and
155 priorities are computed, and (3) block level: insns in the block
156 are actually scheduled. */
158 #include "config.h"
159 #include "system.h"
160 #include "toplev.h"
161 #include "rtl.h"
162 #include "tm_p.h"
163 #include "basic-block.h"
164 #include "regs.h"
165 #include "function.h"
166 #include "hard-reg-set.h"
167 #include "flags.h"
168 #include "insn-config.h"
169 #include "insn-attr.h"
170 #include "except.h"
171 #include "toplev.h"
172 #include "recog.h"
174 extern char *reg_known_equiv_p;
175 extern rtx *reg_known_value;
177 #ifdef INSN_SCHEDULING
179 /* target_units bitmask has 1 for each unit in the cpu. It should be
180 possible to compute this variable from the machine description.
181 But currently it is computed by examining the insn list. Since
182 this is only needed for visualization, it seems an acceptable
183 solution. (For understanding the mapping of bits to units, see
184 definition of function_units[] in "insn-attrtab.c".) */
186 static int target_units = 0;
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
194 #ifndef ISSUE_RATE
195 #define ISSUE_RATE 1
196 #endif
198 /* sched-verbose controls the amount of debugging output the
199 scheduler prints. It is controlled by -fsched-verbose-N:
200 N>0 and no -DSR : the output is directed to stderr.
201 N>=10 will direct the printouts to stderr (regardless of -dSR).
202 N=1: same as -dSR.
203 N=2: bb's probabilities, detailed ready list info, unit/insn info.
204 N=3: rtl at abort point, control-flow, regions info.
205 N=5: dependences info. */
207 #define MAX_RGN_BLOCKS 10
208 #define MAX_RGN_INSNS 100
210 static int sched_verbose_param = 0;
211 static int sched_verbose = 0;
213 /* nr_inter/spec counts interblock/speculative motion for the function. */
214 static int nr_inter, nr_spec;
217 /* Debugging file. All printouts are sent to dump, which is always set,
218 either to stderr, or to the dump listing file (-dRS). */
219 static FILE *dump = 0;
221 /* fix_sched_param() is called from toplev.c upon detection
222 of the -fsched-***-N options. */
224 void
225 fix_sched_param (param, val)
226 const char *param, *val;
228 if (!strcmp (param, "verbose"))
229 sched_verbose_param = atoi (val);
230 else
231 warning ("fix_sched_param: unknown param: %s", param);
235 /* Element N is the next insn that sets (hard or pseudo) register
236 N within the current basic block; or zero, if there is no
237 such insn. Needed for new registers which may be introduced
238 by splitting insns. */
239 static rtx *reg_last_uses;
240 static rtx *reg_last_sets;
241 static rtx *reg_last_clobbers;
242 static regset reg_pending_sets;
243 static regset reg_pending_clobbers;
244 static int reg_pending_sets_all;
246 /* Vector indexed by INSN_UID giving the original ordering of the insns. */
247 static int *insn_luid;
248 #define INSN_LUID(INSN) (insn_luid[INSN_UID (INSN)])
250 /* To speed up the test for duplicate dependency links we keep a record
251 of true dependencies created by add_dependence.
253 Each insn has an associated bitmap for its dependencies. Each bitmap
254 has enough entries to represent a dependency on any other insn in the
255 insn chain. */
256 static sbitmap *true_dependency_cache;
258 /* Vector indexed by INSN_UID giving each instruction a priority. */
259 static int *insn_priority;
260 #define INSN_PRIORITY(INSN) (insn_priority[INSN_UID (INSN)])
262 static short *insn_costs;
263 #define INSN_COST(INSN) insn_costs[INSN_UID (INSN)]
265 /* Vector indexed by INSN_UID giving an encoding of the function units
266 used. */
267 static short *insn_units;
268 #define INSN_UNIT(INSN) insn_units[INSN_UID (INSN)]
270 /* Vector indexed by INSN_UID giving each instruction a
271 register-weight. This weight is an estimation of the insn
272 contribution to registers pressure. */
273 static int *insn_reg_weight;
274 #define INSN_REG_WEIGHT(INSN) (insn_reg_weight[INSN_UID (INSN)])
276 /* Vector indexed by INSN_UID giving list of insns which
277 depend upon INSN. Unlike LOG_LINKS, it represents forward dependences. */
278 static rtx *insn_depend;
279 #define INSN_DEPEND(INSN) insn_depend[INSN_UID (INSN)]
281 /* Vector indexed by INSN_UID. Initialized to the number of incoming
282 edges in forward dependence graph (= number of LOG_LINKS). As
283 scheduling procedes, dependence counts are decreased. An
284 instruction moves to the ready list when its counter is zero. */
285 static int *insn_dep_count;
286 #define INSN_DEP_COUNT(INSN) (insn_dep_count[INSN_UID (INSN)])
288 /* Vector indexed by INSN_UID giving an encoding of the blockage range
289 function. The unit and the range are encoded. */
290 static unsigned int *insn_blockage;
291 #define INSN_BLOCKAGE(INSN) insn_blockage[INSN_UID (INSN)]
292 #define UNIT_BITS 5
293 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
294 #define ENCODE_BLOCKAGE(U, R) \
295 (((U) << BLOCKAGE_BITS \
296 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
297 | MAX_BLOCKAGE_COST (R))
298 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
299 #define BLOCKAGE_RANGE(B) \
300 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
301 | ((B) & BLOCKAGE_MASK))
303 /* Encodings of the `<name>_unit_blockage_range' function. */
304 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
305 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
307 #define DONE_PRIORITY -1
308 #define MAX_PRIORITY 0x7fffffff
309 #define TAIL_PRIORITY 0x7ffffffe
310 #define LAUNCH_PRIORITY 0x7f000001
311 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
312 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
314 /* Vector indexed by INSN_UID giving number of insns referring to this
315 insn. */
316 static int *insn_ref_count;
317 #define INSN_REF_COUNT(INSN) (insn_ref_count[INSN_UID (INSN)])
319 /* Vector indexed by INSN_UID giving line-number note in effect for each
320 insn. For line-number notes, this indicates whether the note may be
321 reused. */
322 static rtx *line_note;
323 #define LINE_NOTE(INSN) (line_note[INSN_UID (INSN)])
325 /* Vector indexed by basic block number giving the starting line-number
326 for each basic block. */
327 static rtx *line_note_head;
329 /* List of important notes we must keep around. This is a pointer to the
330 last element in the list. */
331 static rtx note_list;
333 /* Queues, etc. */
335 /* An instruction is ready to be scheduled when all insns preceding it
336 have already been scheduled. It is important to ensure that all
337 insns which use its result will not be executed until its result
338 has been computed. An insn is maintained in one of four structures:
340 (P) the "Pending" set of insns which cannot be scheduled until
341 their dependencies have been satisfied.
342 (Q) the "Queued" set of insns that can be scheduled when sufficient
343 time has passed.
344 (R) the "Ready" list of unscheduled, uncommitted insns.
345 (S) the "Scheduled" list of insns.
347 Initially, all insns are either "Pending" or "Ready" depending on
348 whether their dependencies are satisfied.
350 Insns move from the "Ready" list to the "Scheduled" list as they
351 are committed to the schedule. As this occurs, the insns in the
352 "Pending" list have their dependencies satisfied and move to either
353 the "Ready" list or the "Queued" set depending on whether
354 sufficient time has passed to make them ready. As time passes,
355 insns move from the "Queued" set to the "Ready" list. Insns may
356 move from the "Ready" list to the "Queued" set if they are blocked
357 due to a function unit conflict.
359 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
360 insns, i.e., those that are ready, queued, and pending.
361 The "Queued" set (Q) is implemented by the variable `insn_queue'.
362 The "Ready" list (R) is implemented by the variables `ready' and
363 `n_ready'.
364 The "Scheduled" list (S) is the new insn chain built by this pass.
366 The transition (R->S) is implemented in the scheduling loop in
367 `schedule_block' when the best insn to schedule is chosen.
368 The transition (R->Q) is implemented in `queue_insn' when an
369 insn is found to have a function unit conflict with the already
370 committed insns.
371 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
372 insns move from the ready list to the scheduled list.
373 The transition (Q->R) is implemented in 'queue_to_insn' as time
374 passes or stalls are introduced. */
376 /* Implement a circular buffer to delay instructions until sufficient
377 time has passed. INSN_QUEUE_SIZE is a power of two larger than
378 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
379 longest time an isnsn may be queued. */
380 static rtx insn_queue[INSN_QUEUE_SIZE];
381 static int q_ptr = 0;
382 static int q_size = 0;
383 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
384 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
386 /* Vector indexed by INSN_UID giving the minimum clock tick at which
387 the insn becomes ready. This is used to note timing constraints for
388 insns in the pending list. */
389 static int *insn_tick;
390 #define INSN_TICK(INSN) (insn_tick[INSN_UID (INSN)])
392 /* Forward declarations. */
393 static void add_dependence PROTO ((rtx, rtx, enum reg_note));
394 static void remove_dependence PROTO ((rtx, rtx));
395 static rtx find_insn_list PROTO ((rtx, rtx));
396 static int insn_unit PROTO ((rtx));
397 static unsigned int blockage_range PROTO ((int, rtx));
398 static void clear_units PROTO ((void));
399 static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int));
400 static void schedule_unit PROTO ((int, rtx, int));
401 static int actual_hazard PROTO ((int, rtx, int, int));
402 static int potential_hazard PROTO ((int, rtx, int));
403 static int insn_cost PROTO ((rtx, rtx, rtx));
404 static int priority PROTO ((rtx));
405 static void free_pending_lists PROTO ((void));
406 static void add_insn_mem_dependence PROTO ((rtx *, rtx *, rtx, rtx));
407 static void flush_pending_lists PROTO ((rtx, int));
408 static void sched_analyze_1 PROTO ((rtx, rtx));
409 static void sched_analyze_2 PROTO ((rtx, rtx));
410 static void sched_analyze_insn PROTO ((rtx, rtx, rtx));
411 static void sched_analyze PROTO ((rtx, rtx));
412 static int rank_for_schedule PROTO ((const PTR, const PTR));
413 static void swap_sort PROTO ((rtx *, int));
414 static void queue_insn PROTO ((rtx, int));
415 static int schedule_insn PROTO ((rtx, rtx *, int, int));
416 static void find_insn_reg_weight PROTO ((int));
417 static int schedule_block PROTO ((int, int));
418 static char *safe_concat PROTO ((char *, char *, const char *));
419 static int insn_issue_delay PROTO ((rtx));
420 static void adjust_priority PROTO ((rtx));
422 /* Mapping of insns to their original block prior to scheduling. */
423 static int *insn_orig_block;
424 #define INSN_BLOCK(insn) (insn_orig_block[INSN_UID (insn)])
426 /* Some insns (e.g. call) are not allowed to move across blocks. */
427 static char *cant_move;
428 #define CANT_MOVE(insn) (cant_move[INSN_UID (insn)])
430 /* Control flow graph edges are kept in circular lists. */
431 typedef struct
433 int from_block;
434 int to_block;
435 int next_in;
436 int next_out;
438 haifa_edge;
439 static haifa_edge *edge_table;
441 #define NEXT_IN(edge) (edge_table[edge].next_in)
442 #define NEXT_OUT(edge) (edge_table[edge].next_out)
443 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
444 #define TO_BLOCK(edge) (edge_table[edge].to_block)
446 /* Number of edges in the control flow graph. (In fact, larger than
447 that by 1, since edge 0 is unused.) */
448 static int nr_edges;
450 /* Circular list of incoming/outgoing edges of a block. */
451 static int *in_edges;
452 static int *out_edges;
454 #define IN_EDGES(block) (in_edges[block])
455 #define OUT_EDGES(block) (out_edges[block])
459 static int is_cfg_nonregular PROTO ((void));
460 static int build_control_flow PROTO ((int_list_ptr *, int_list_ptr *,
461 int *, int *));
462 static void new_edge PROTO ((int, int));
465 /* A region is the main entity for interblock scheduling: insns
466 are allowed to move between blocks in the same region, along
467 control flow graph edges, in the 'up' direction. */
468 typedef struct
470 int rgn_nr_blocks; /* Number of blocks in region. */
471 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
473 region;
475 /* Number of regions in the procedure. */
476 static int nr_regions;
478 /* Table of region descriptions. */
479 static region *rgn_table;
481 /* Array of lists of regions' blocks. */
482 static int *rgn_bb_table;
484 /* Topological order of blocks in the region (if b2 is reachable from
485 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
486 always referred to by either block or b, while its topological
487 order name (in the region) is refered to by bb. */
488 static int *block_to_bb;
490 /* The number of the region containing a block. */
491 static int *containing_rgn;
493 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
494 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
495 #define BLOCK_TO_BB(block) (block_to_bb[block])
496 #define CONTAINING_RGN(block) (containing_rgn[block])
498 void debug_regions PROTO ((void));
499 static void find_single_block_region PROTO ((void));
500 static void find_rgns PROTO ((int_list_ptr *, int_list_ptr *,
501 int *, int *, sbitmap *));
502 static int too_large PROTO ((int, int *, int *));
504 extern void debug_live PROTO ((int, int));
506 /* Blocks of the current region being scheduled. */
507 static int current_nr_blocks;
508 static int current_blocks;
510 /* The mapping from bb to block. */
511 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
514 /* Bit vectors and bitset operations are needed for computations on
515 the control flow graph. */
517 typedef unsigned HOST_WIDE_INT *bitset;
518 typedef struct
520 int *first_member; /* Pointer to the list start in bitlst_table. */
521 int nr_members; /* The number of members of the bit list. */
523 bitlst;
525 static int bitlst_table_last;
526 static int bitlst_table_size;
527 static int *bitlst_table;
529 static char bitset_member PROTO ((bitset, int, int));
530 static void extract_bitlst PROTO ((bitset, int, bitlst *));
532 /* Target info declarations.
534 The block currently being scheduled is referred to as the "target" block,
535 while other blocks in the region from which insns can be moved to the
536 target are called "source" blocks. The candidate structure holds info
537 about such sources: are they valid? Speculative? Etc. */
538 typedef bitlst bblst;
539 typedef struct
541 char is_valid;
542 char is_speculative;
543 int src_prob;
544 bblst split_bbs;
545 bblst update_bbs;
547 candidate;
549 static candidate *candidate_table;
551 /* A speculative motion requires checking live information on the path
552 from 'source' to 'target'. The split blocks are those to be checked.
553 After a speculative motion, live information should be modified in
554 the 'update' blocks.
556 Lists of split and update blocks for each candidate of the current
557 target are in array bblst_table. */
558 static int *bblst_table, bblst_size, bblst_last;
560 #define IS_VALID(src) ( candidate_table[src].is_valid )
561 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
562 #define SRC_PROB(src) ( candidate_table[src].src_prob )
564 /* The bb being currently scheduled. */
565 static int target_bb;
567 /* List of edges. */
568 typedef bitlst edgelst;
570 /* Target info functions. */
571 static void split_edges PROTO ((int, int, edgelst *));
572 static void compute_trg_info PROTO ((int));
573 void debug_candidate PROTO ((int));
574 void debug_candidates PROTO ((int));
577 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
578 typedef bitset bbset;
580 /* Number of words of the bbset. */
581 static int bbset_size;
583 /* Dominators array: dom[i] contains the bbset of dominators of
584 bb i in the region. */
585 static bbset *dom;
587 /* bb 0 is the only region entry. */
588 #define IS_RGN_ENTRY(bb) (!bb)
590 /* Is bb_src dominated by bb_trg. */
591 #define IS_DOMINATED(bb_src, bb_trg) \
592 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
594 /* Probability: Prob[i] is a float in [0, 1] which is the probability
595 of bb i relative to the region entry. */
596 static float *prob;
598 /* The probability of bb_src, relative to bb_trg. Note, that while the
599 'prob[bb]' is a float in [0, 1], this macro returns an integer
600 in [0, 100]. */
601 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
602 prob[bb_trg])))
604 /* Bit-set of edges, where bit i stands for edge i. */
605 typedef bitset edgeset;
607 /* Number of edges in the region. */
608 static int rgn_nr_edges;
610 /* Array of size rgn_nr_edges. */
611 static int *rgn_edges;
613 /* Number of words in an edgeset. */
614 static int edgeset_size;
616 /* Mapping from each edge in the graph to its number in the rgn. */
617 static int *edge_to_bit;
618 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
620 /* The split edges of a source bb is different for each target
621 bb. In order to compute this efficiently, the 'potential-split edges'
622 are computed for each bb prior to scheduling a region. This is actually
623 the split edges of each bb relative to the region entry.
625 pot_split[bb] is the set of potential split edges of bb. */
626 static edgeset *pot_split;
628 /* For every bb, a set of its ancestor edges. */
629 static edgeset *ancestor_edges;
631 static void compute_dom_prob_ps PROTO ((int));
633 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
634 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (INSN_BLOCK (INSN))))
635 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (INSN_BLOCK (INSN))))
636 #define INSN_BB(INSN) (BLOCK_TO_BB (INSN_BLOCK (INSN)))
638 /* Parameters affecting the decision of rank_for_schedule(). */
639 #define MIN_DIFF_PRIORITY 2
640 #define MIN_PROBABILITY 40
641 #define MIN_PROB_DIFF 10
643 /* Speculative scheduling functions. */
644 static int check_live_1 PROTO ((int, rtx));
645 static void update_live_1 PROTO ((int, rtx));
646 static int check_live PROTO ((rtx, int));
647 static void update_live PROTO ((rtx, int));
648 static void set_spec_fed PROTO ((rtx));
649 static int is_pfree PROTO ((rtx, int, int));
650 static int find_conditional_protection PROTO ((rtx, int));
651 static int is_conditionally_protected PROTO ((rtx, int, int));
652 static int may_trap_exp PROTO ((rtx, int));
653 static int haifa_classify_insn PROTO ((rtx));
654 static int is_prisky PROTO ((rtx, int, int));
655 static int is_exception_free PROTO ((rtx, int, int));
657 static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
658 static void compute_block_forward_dependences PROTO ((int));
659 static void init_rgn_data_dependences PROTO ((int));
660 static void add_branch_dependences PROTO ((rtx, rtx));
661 static void compute_block_backward_dependences PROTO ((int));
662 void debug_dependencies PROTO ((void));
664 /* Notes handling mechanism:
665 =========================
666 Generally, NOTES are saved before scheduling and restored after scheduling.
667 The scheduler distinguishes between three types of notes:
669 (1) LINE_NUMBER notes, generated and used for debugging. Here,
670 before scheduling a region, a pointer to the LINE_NUMBER note is
671 added to the insn following it (in save_line_notes()), and the note
672 is removed (in rm_line_notes() and unlink_line_notes()). After
673 scheduling the region, this pointer is used for regeneration of
674 the LINE_NUMBER note (in restore_line_notes()).
676 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
677 Before scheduling a region, a pointer to the note is added to the insn
678 that follows or precedes it. (This happens as part of the data dependence
679 computation). After scheduling an insn, the pointer contained in it is
680 used for regenerating the corresponding note (in reemit_notes).
682 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
683 these notes are put in a list (in rm_other_notes() and
684 unlink_other_notes ()). After scheduling the block, these notes are
685 inserted at the beginning of the block (in schedule_block()). */
687 static rtx unlink_other_notes PROTO ((rtx, rtx));
688 static rtx unlink_line_notes PROTO ((rtx, rtx));
689 static void rm_line_notes PROTO ((int));
690 static void save_line_notes PROTO ((int));
691 static void restore_line_notes PROTO ((int));
692 static void rm_redundant_line_notes PROTO ((void));
693 static void rm_other_notes PROTO ((rtx, rtx));
694 static rtx reemit_notes PROTO ((rtx, rtx));
696 static void get_block_head_tail PROTO ((int, rtx *, rtx *));
698 static int queue_to_ready PROTO ((rtx [], int));
700 static void debug_ready_list PROTO ((rtx[], int));
701 static void init_target_units PROTO ((void));
702 static void insn_print_units PROTO ((rtx));
703 static int get_visual_tbl_length PROTO ((void));
704 static void init_block_visualization PROTO ((void));
705 static void print_block_visualization PROTO ((int, const char *));
706 static void visualize_scheduled_insns PROTO ((int, int));
707 static void visualize_no_unit PROTO ((rtx));
708 static void visualize_stall_cycles PROTO ((int, int));
709 static void print_exp PROTO ((char *, rtx, int));
710 static void print_value PROTO ((char *, rtx, int));
711 static void print_pattern PROTO ((char *, rtx, int));
712 static void print_insn PROTO ((char *, rtx, int));
713 void debug_reg_vector PROTO ((regset));
715 static rtx move_insn1 PROTO ((rtx, rtx));
716 static rtx move_insn PROTO ((rtx, rtx));
717 static rtx group_leader PROTO ((rtx));
718 static int set_priorities PROTO ((int));
719 static void init_rtx_vector PROTO ((rtx **, rtx *, int, int));
720 static void schedule_region PROTO ((int));
722 #endif /* INSN_SCHEDULING */
724 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
726 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
727 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
728 of dependence that this link represents. */
730 static void
731 add_dependence (insn, elem, dep_type)
732 rtx insn;
733 rtx elem;
734 enum reg_note dep_type;
736 rtx link, next;
738 /* Don't depend an insn on itself. */
739 if (insn == elem)
740 return;
742 /* We can get a dependency on deleted insns due to optimizations in
743 the register allocation and reloading or due to splitting. Any
744 such dependency is useless and can be ignored. */
745 if (GET_CODE (elem) == NOTE)
746 return;
748 /* If elem is part of a sequence that must be scheduled together, then
749 make the dependence point to the last insn of the sequence.
750 When HAVE_cc0, it is possible for NOTEs to exist between users and
751 setters of the condition codes, so we must skip past notes here.
752 Otherwise, NOTEs are impossible here. */
754 next = NEXT_INSN (elem);
756 #ifdef HAVE_cc0
757 while (next && GET_CODE (next) == NOTE)
758 next = NEXT_INSN (next);
759 #endif
761 if (next && SCHED_GROUP_P (next)
762 && GET_CODE (next) != CODE_LABEL)
764 /* Notes will never intervene here though, so don't bother checking
765 for them. */
766 /* We must reject CODE_LABELs, so that we don't get confused by one
767 that has LABEL_PRESERVE_P set, which is represented by the same
768 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
769 SCHED_GROUP_P. */
770 while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next))
771 && GET_CODE (NEXT_INSN (next)) != CODE_LABEL)
772 next = NEXT_INSN (next);
774 /* Again, don't depend an insn on itself. */
775 if (insn == next)
776 return;
778 /* Make the dependence to NEXT, the last insn of the group, instead
779 of the original ELEM. */
780 elem = next;
783 #ifdef INSN_SCHEDULING
784 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
785 No need for interblock dependences with calls, since
786 calls are not moved between blocks. Note: the edge where
787 elem is a CALL is still required. */
788 if (GET_CODE (insn) == CALL_INSN
789 && (INSN_BB (elem) != INSN_BB (insn)))
790 return;
792 #endif
794 /* If we already have a true dependency for ELEM, then we do not
795 need to do anything. Avoiding the list walk below can cut
796 compile times dramatically for some code. */
797 if (TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
798 return;
800 /* Check that we don't already have this dependence. */
801 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
802 if (XEXP (link, 0) == elem)
804 /* If this is a more restrictive type of dependence than the existing
805 one, then change the existing dependence to this type. */
806 if ((int) dep_type < (int) REG_NOTE_KIND (link))
807 PUT_REG_NOTE_KIND (link, dep_type);
809 /* If we are adding a true dependency to INSN's LOG_LINKs, then
810 note that in the bitmap cache of true dependency information. */
811 if ((int)dep_type == 0)
812 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
813 return;
815 /* Might want to check one level of transitivity to save conses. */
817 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
818 LOG_LINKS (insn) = link;
820 /* Insn dependency, not data dependency. */
821 PUT_REG_NOTE_KIND (link, dep_type);
824 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
825 of INSN. Abort if not found. */
827 static void
828 remove_dependence (insn, elem)
829 rtx insn;
830 rtx elem;
832 rtx prev, link, next;
833 int found = 0;
835 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
837 next = XEXP (link, 1);
838 if (XEXP (link, 0) == elem)
840 if (prev)
841 XEXP (prev, 1) = next;
842 else
843 LOG_LINKS (insn) = next;
845 /* If we are removing a true dependency from the LOG_LINKS list,
846 make sure to remove it from the cache too. */
847 if (REG_NOTE_KIND (link) == 0)
848 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
849 INSN_LUID (elem));
851 free_INSN_LIST_node (link);
853 found = 1;
855 else
856 prev = link;
859 if (!found)
860 abort ();
861 return;
864 #ifndef INSN_SCHEDULING
865 void
866 schedule_insns (dump_file)
867 FILE *dump_file;
870 #else
871 #ifndef __GNUC__
872 #define __inline
873 #endif
875 #ifndef HAIFA_INLINE
876 #define HAIFA_INLINE __inline
877 #endif
879 /* Computation of memory dependencies. */
881 /* The *_insns and *_mems are paired lists. Each pending memory operation
882 will have a pointer to the MEM rtx on one list and a pointer to the
883 containing insn on the other list in the same place in the list. */
885 /* We can't use add_dependence like the old code did, because a single insn
886 may have multiple memory accesses, and hence needs to be on the list
887 once for each memory access. Add_dependence won't let you add an insn
888 to a list more than once. */
890 /* An INSN_LIST containing all insns with pending read operations. */
891 static rtx pending_read_insns;
893 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
894 static rtx pending_read_mems;
896 /* An INSN_LIST containing all insns with pending write operations. */
897 static rtx pending_write_insns;
899 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
900 static rtx pending_write_mems;
902 /* Indicates the combined length of the two pending lists. We must prevent
903 these lists from ever growing too large since the number of dependencies
904 produced is at least O(N*N), and execution time is at least O(4*N*N), as
905 a function of the length of these pending lists. */
907 static int pending_lists_length;
909 /* The last insn upon which all memory references must depend.
910 This is an insn which flushed the pending lists, creating a dependency
911 between it and all previously pending memory references. This creates
912 a barrier (or a checkpoint) which no memory reference is allowed to cross.
914 This includes all non constant CALL_INSNs. When we do interprocedural
915 alias analysis, this restriction can be relaxed.
916 This may also be an INSN that writes memory if the pending lists grow
917 too large. */
919 static rtx last_pending_memory_flush;
921 /* The last function call we have seen. All hard regs, and, of course,
922 the last function call, must depend on this. */
924 static rtx last_function_call;
926 /* The LOG_LINKS field of this is a list of insns which use a pseudo register
927 that does not already cross a call. We create dependencies between each
928 of those insn and the next call insn, to ensure that they won't cross a call
929 after scheduling is done. */
931 static rtx sched_before_next_call;
933 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
934 so that insns independent of the last scheduled insn will be preferred
935 over dependent instructions. */
937 static rtx last_scheduled_insn;
939 /* Data structures for the computation of data dependences in a regions. We
940 keep one copy of each of the declared above variables for each bb in the
941 region. Before analyzing the data dependences for a bb, its variables
942 are initialized as a function of the variables of its predecessors. When
943 the analysis for a bb completes, we save the contents of each variable X
944 to a corresponding bb_X[bb] variable. For example, pending_read_insns is
945 copied to bb_pending_read_insns[bb]. Another change is that few
946 variables are now a list of insns rather than a single insn:
947 last_pending_memory_flash, last_function_call, reg_last_sets. The
948 manipulation of these variables was changed appropriately. */
950 static rtx **bb_reg_last_uses;
951 static rtx **bb_reg_last_sets;
952 static rtx **bb_reg_last_clobbers;
954 static rtx *bb_pending_read_insns;
955 static rtx *bb_pending_read_mems;
956 static rtx *bb_pending_write_insns;
957 static rtx *bb_pending_write_mems;
958 static int *bb_pending_lists_length;
960 static rtx *bb_last_pending_memory_flush;
961 static rtx *bb_last_function_call;
962 static rtx *bb_sched_before_next_call;
964 /* Functions for construction of the control flow graph. */
966 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
968 We decide not to build the control flow graph if there is possibly more
969 than one entry to the function, if computed branches exist, of if we
970 have nonlocal gotos. */
972 static int
973 is_cfg_nonregular ()
975 int b;
976 rtx insn;
977 RTX_CODE code;
979 /* If we have a label that could be the target of a nonlocal goto, then
980 the cfg is not well structured. */
981 if (nonlocal_goto_handler_labels)
982 return 1;
984 /* If we have any forced labels, then the cfg is not well structured. */
985 if (forced_labels)
986 return 1;
988 /* If this function has a computed jump, then we consider the cfg
989 not well structured. */
990 if (current_function_has_computed_jump)
991 return 1;
993 /* If we have exception handlers, then we consider the cfg not well
994 structured. ?!? We should be able to handle this now that flow.c
995 computes an accurate cfg for EH. */
996 if (exception_handler_labels)
997 return 1;
999 /* If we have non-jumping insns which refer to labels, then we consider
1000 the cfg not well structured. */
1001 /* Check for labels referred to other thn by jumps. */
1002 for (b = 0; b < n_basic_blocks; b++)
1003 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1005 code = GET_CODE (insn);
1006 if (GET_RTX_CLASS (code) == 'i')
1008 rtx note;
1010 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1011 if (REG_NOTE_KIND (note) == REG_LABEL)
1012 return 1;
1015 if (insn == BLOCK_END (b))
1016 break;
1019 /* All the tests passed. Consider the cfg well structured. */
1020 return 0;
1023 /* Build the control flow graph and set nr_edges.
1025 Instead of trying to build a cfg ourselves, we rely on flow to
1026 do it for us. Stamp out useless code (and bug) duplication.
1028 Return nonzero if an irregularity in the cfg is found which would
1029 prevent cross block scheduling. */
1031 static int
1032 build_control_flow (s_preds, s_succs, num_preds, num_succs)
1033 int_list_ptr *s_preds;
1034 int_list_ptr *s_succs;
1035 int *num_preds;
1036 int *num_succs;
1038 int i;
1039 int_list_ptr succ;
1040 int unreachable;
1042 /* Count the number of edges in the cfg. */
1043 nr_edges = 0;
1044 unreachable = 0;
1045 for (i = 0; i < n_basic_blocks; i++)
1047 nr_edges += num_succs[i];
1049 /* Unreachable loops with more than one basic block are detected
1050 during the DFS traversal in find_rgns.
1052 Unreachable loops with a single block are detected here. This
1053 test is redundant with the one in find_rgns, but it's much
1054 cheaper to go ahead and catch the trivial case here. */
1055 if (num_preds[i] == 0
1056 || (num_preds[i] == 1 && INT_LIST_VAL (s_preds[i]) == i))
1057 unreachable = 1;
1060 /* Account for entry/exit edges. */
1061 nr_edges += 2;
1063 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1064 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1065 edge_table = (haifa_edge *) xcalloc (nr_edges, sizeof (haifa_edge));
1067 nr_edges = 0;
1068 for (i = 0; i < n_basic_blocks; i++)
1069 for (succ = s_succs[i]; succ; succ = succ->next)
1071 if (INT_LIST_VAL (succ) != EXIT_BLOCK)
1072 new_edge (i, INT_LIST_VAL (succ));
1075 /* Increment by 1, since edge 0 is unused. */
1076 nr_edges++;
1078 return unreachable;
1082 /* Record an edge in the control flow graph from SOURCE to TARGET.
1084 In theory, this is redundant with the s_succs computed above, but
1085 we have not converted all of haifa to use information from the
1086 integer lists. */
1088 static void
1089 new_edge (source, target)
1090 int source, target;
1092 int e, next_edge;
1093 int curr_edge, fst_edge;
1095 /* Check for duplicates. */
1096 fst_edge = curr_edge = OUT_EDGES (source);
1097 while (curr_edge)
1099 if (FROM_BLOCK (curr_edge) == source
1100 && TO_BLOCK (curr_edge) == target)
1102 return;
1105 curr_edge = NEXT_OUT (curr_edge);
1107 if (fst_edge == curr_edge)
1108 break;
1111 e = ++nr_edges;
1113 FROM_BLOCK (e) = source;
1114 TO_BLOCK (e) = target;
1116 if (OUT_EDGES (source))
1118 next_edge = NEXT_OUT (OUT_EDGES (source));
1119 NEXT_OUT (OUT_EDGES (source)) = e;
1120 NEXT_OUT (e) = next_edge;
1122 else
1124 OUT_EDGES (source) = e;
1125 NEXT_OUT (e) = e;
1128 if (IN_EDGES (target))
1130 next_edge = NEXT_IN (IN_EDGES (target));
1131 NEXT_IN (IN_EDGES (target)) = e;
1132 NEXT_IN (e) = next_edge;
1134 else
1136 IN_EDGES (target) = e;
1137 NEXT_IN (e) = e;
1142 /* BITSET macros for operations on the control flow graph. */
1144 /* Compute bitwise union of two bitsets. */
1145 #define BITSET_UNION(set1, set2, len) \
1146 do { register bitset tp = set1, sp = set2; \
1147 register int i; \
1148 for (i = 0; i < len; i++) \
1149 *(tp++) |= *(sp++); } while (0)
1151 /* Compute bitwise intersection of two bitsets. */
1152 #define BITSET_INTER(set1, set2, len) \
1153 do { register bitset tp = set1, sp = set2; \
1154 register int i; \
1155 for (i = 0; i < len; i++) \
1156 *(tp++) &= *(sp++); } while (0)
1158 /* Compute bitwise difference of two bitsets. */
1159 #define BITSET_DIFFER(set1, set2, len) \
1160 do { register bitset tp = set1, sp = set2; \
1161 register int i; \
1162 for (i = 0; i < len; i++) \
1163 *(tp++) &= ~*(sp++); } while (0)
1165 /* Inverts every bit of bitset 'set'. */
1166 #define BITSET_INVERT(set, len) \
1167 do { register bitset tmpset = set; \
1168 register int i; \
1169 for (i = 0; i < len; i++, tmpset++) \
1170 *tmpset = ~*tmpset; } while (0)
1172 /* Turn on the index'th bit in bitset set. */
1173 #define BITSET_ADD(set, index, len) \
1175 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1176 abort (); \
1177 else \
1178 set[index/HOST_BITS_PER_WIDE_INT] |= \
1179 1 << (index % HOST_BITS_PER_WIDE_INT); \
1182 /* Turn off the index'th bit in set. */
1183 #define BITSET_REMOVE(set, index, len) \
1185 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1186 abort (); \
1187 else \
1188 set[index/HOST_BITS_PER_WIDE_INT] &= \
1189 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1193 /* Check if the index'th bit in bitset set is on. */
1195 static char
1196 bitset_member (set, index, len)
1197 bitset set;
1198 int index, len;
1200 if (index >= HOST_BITS_PER_WIDE_INT * len)
1201 abort ();
1202 return (set[index / HOST_BITS_PER_WIDE_INT] &
1203 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1207 /* Translate a bit-set SET to a list BL of the bit-set members. */
1209 static void
1210 extract_bitlst (set, len, bl)
1211 bitset set;
1212 int len;
1213 bitlst *bl;
1215 int i, j, offset;
1216 unsigned HOST_WIDE_INT word;
1218 /* bblst table space is reused in each call to extract_bitlst. */
1219 bitlst_table_last = 0;
1221 bl->first_member = &bitlst_table[bitlst_table_last];
1222 bl->nr_members = 0;
1224 for (i = 0; i < len; i++)
1226 word = set[i];
1227 offset = i * HOST_BITS_PER_WIDE_INT;
1228 for (j = 0; word; j++)
1230 if (word & 1)
1232 bitlst_table[bitlst_table_last++] = offset;
1233 (bl->nr_members)++;
1235 word >>= 1;
1236 ++offset;
1243 /* Functions for the construction of regions. */
1245 /* Print the regions, for debugging purposes. Callable from debugger. */
1247 void
1248 debug_regions ()
1250 int rgn, bb;
1252 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1253 for (rgn = 0; rgn < nr_regions; rgn++)
1255 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1256 rgn_table[rgn].rgn_nr_blocks);
1257 fprintf (dump, ";;\tbb/block: ");
1259 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1261 current_blocks = RGN_BLOCKS (rgn);
1263 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1264 abort ();
1266 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1269 fprintf (dump, "\n\n");
1274 /* Build a single block region for each basic block in the function.
1275 This allows for using the same code for interblock and basic block
1276 scheduling. */
1278 static void
1279 find_single_block_region ()
1281 int i;
1283 for (i = 0; i < n_basic_blocks; i++)
1285 rgn_bb_table[i] = i;
1286 RGN_NR_BLOCKS (i) = 1;
1287 RGN_BLOCKS (i) = i;
1288 CONTAINING_RGN (i) = i;
1289 BLOCK_TO_BB (i) = 0;
1291 nr_regions = n_basic_blocks;
1295 /* Update number of blocks and the estimate for number of insns
1296 in the region. Return 1 if the region is "too large" for interblock
1297 scheduling (compile time considerations), otherwise return 0. */
1299 static int
1300 too_large (block, num_bbs, num_insns)
1301 int block, *num_bbs, *num_insns;
1303 (*num_bbs)++;
1304 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1305 INSN_LUID (BLOCK_HEAD (block)));
1306 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1307 return 1;
1308 else
1309 return 0;
1313 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1314 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1315 loop containing blk. */
1316 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1318 if (max_hdr[blk] == -1) \
1319 max_hdr[blk] = hdr; \
1320 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1321 RESET_BIT (inner, hdr); \
1322 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1324 RESET_BIT (inner,max_hdr[blk]); \
1325 max_hdr[blk] = hdr; \
1330 /* Find regions for interblock scheduling.
1332 A region for scheduling can be:
1334 * A loop-free procedure, or
1336 * A reducible inner loop, or
1338 * A basic block not contained in any other region.
1341 ?!? In theory we could build other regions based on extended basic
1342 blocks or reverse extended basic blocks. Is it worth the trouble?
1344 Loop blocks that form a region are put into the region's block list
1345 in topological order.
1347 This procedure stores its results into the following global (ick) variables
1349 * rgn_nr
1350 * rgn_table
1351 * rgn_bb_table
1352 * block_to_bb
1353 * containing region
1356 We use dominator relationships to avoid making regions out of non-reducible
1357 loops.
1359 This procedure needs to be converted to work on pred/succ lists instead
1360 of edge tables. That would simplify it somewhat. */
1362 static void
1363 find_rgns (s_preds, s_succs, num_preds, num_succs, dom)
1364 int_list_ptr *s_preds;
1365 int_list_ptr *s_succs;
1366 int *num_preds;
1367 int *num_succs;
1368 sbitmap *dom;
1370 int *max_hdr, *dfs_nr, *stack, *queue, *degree;
1371 char no_loops = 1;
1372 int node, child, loop_head, i, head, tail;
1373 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1374 int num_bbs, num_insns, unreachable;
1375 int too_large_failure;
1377 /* Note if an edge has been passed. */
1378 sbitmap passed;
1380 /* Note if a block is a natural loop header. */
1381 sbitmap header;
1383 /* Note if a block is an natural inner loop header. */
1384 sbitmap inner;
1386 /* Note if a block is in the block queue. */
1387 sbitmap in_queue;
1389 /* Note if a block is in the block queue. */
1390 sbitmap in_stack;
1392 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1393 and a mapping from block to its loop header (if the block is contained
1394 in a loop, else -1).
1396 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1397 be used as inputs to the second traversal.
1399 STACK, SP and DFS_NR are only used during the first traversal. */
1401 /* Allocate and initialize variables for the first traversal. */
1402 max_hdr = (int *) alloca (n_basic_blocks * sizeof (int));
1403 dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int));
1404 bzero ((char *) dfs_nr, n_basic_blocks * sizeof (int));
1405 stack = (int *) alloca (nr_edges * sizeof (int));
1407 inner = sbitmap_alloc (n_basic_blocks);
1408 sbitmap_ones (inner);
1410 header = sbitmap_alloc (n_basic_blocks);
1411 sbitmap_zero (header);
1413 passed = sbitmap_alloc (nr_edges);
1414 sbitmap_zero (passed);
1416 in_queue = sbitmap_alloc (n_basic_blocks);
1417 sbitmap_zero (in_queue);
1419 in_stack = sbitmap_alloc (n_basic_blocks);
1420 sbitmap_zero (in_stack);
1422 for (i = 0; i < n_basic_blocks; i++)
1423 max_hdr[i] = -1;
1425 /* DFS traversal to find inner loops in the cfg. */
1427 sp = -1;
1428 while (1)
1430 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1432 /* We have reached a leaf node or a node that was already
1433 processed. Pop edges off the stack until we find
1434 an edge that has not yet been processed. */
1435 while (sp >= 0
1436 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1438 /* Pop entry off the stack. */
1439 current_edge = stack[sp--];
1440 node = FROM_BLOCK (current_edge);
1441 child = TO_BLOCK (current_edge);
1442 RESET_BIT (in_stack, child);
1443 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1444 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1445 current_edge = NEXT_OUT (current_edge);
1448 /* See if have finished the DFS tree traversal. */
1449 if (sp < 0 && TEST_BIT (passed, current_edge))
1450 break;
1452 /* Nope, continue the traversal with the popped node. */
1453 continue;
1456 /* Process a node. */
1457 node = FROM_BLOCK (current_edge);
1458 child = TO_BLOCK (current_edge);
1459 SET_BIT (in_stack, node);
1460 dfs_nr[node] = ++count;
1462 /* If the successor is in the stack, then we've found a loop.
1463 Mark the loop, if it is not a natural loop, then it will
1464 be rejected during the second traversal. */
1465 if (TEST_BIT (in_stack, child))
1467 no_loops = 0;
1468 SET_BIT (header, child);
1469 UPDATE_LOOP_RELATIONS (node, child);
1470 SET_BIT (passed, current_edge);
1471 current_edge = NEXT_OUT (current_edge);
1472 continue;
1475 /* If the child was already visited, then there is no need to visit
1476 it again. Just update the loop relationships and restart
1477 with a new edge. */
1478 if (dfs_nr[child])
1480 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1481 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1482 SET_BIT (passed, current_edge);
1483 current_edge = NEXT_OUT (current_edge);
1484 continue;
1487 /* Push an entry on the stack and continue DFS traversal. */
1488 stack[++sp] = current_edge;
1489 SET_BIT (passed, current_edge);
1490 current_edge = OUT_EDGES (child);
1492 /* This is temporary until haifa is converted to use rth's new
1493 cfg routines which have true entry/exit blocks and the
1494 appropriate edges from/to those blocks.
1496 Generally we update dfs_nr for a node when we process its
1497 out edge. However, if the node has no out edge then we will
1498 not set dfs_nr for that node. This can confuse the scheduler
1499 into thinking that we have unreachable blocks, which in turn
1500 disables cross block scheduling.
1502 So, if we have a node with no out edges, go ahead and mark it
1503 as reachable now. */
1504 if (current_edge == 0)
1505 dfs_nr[child] = ++count;
1508 /* Another check for unreachable blocks. The earlier test in
1509 is_cfg_nonregular only finds unreachable blocks that do not
1510 form a loop.
1512 The DFS traversal will mark every block that is reachable from
1513 the entry node by placing a nonzero value in dfs_nr. Thus if
1514 dfs_nr is zero for any block, then it must be unreachable. */
1515 unreachable = 0;
1516 for (i = 0; i < n_basic_blocks; i++)
1517 if (dfs_nr[i] == 0)
1519 unreachable = 1;
1520 break;
1523 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1524 to hold degree counts. */
1525 degree = dfs_nr;
1527 /* Compute the in-degree of every block in the graph. */
1528 for (i = 0; i < n_basic_blocks; i++)
1529 degree[i] = num_preds[i];
1531 /* Do not perform region scheduling if there are any unreachable
1532 blocks. */
1533 if (!unreachable)
1535 if (no_loops)
1536 SET_BIT (header, 0);
1538 /* Second travsersal:find reducible inner loops and topologically sort
1539 block of each region. */
1541 queue = (int *) alloca (n_basic_blocks * sizeof (int));
1543 /* Find blocks which are inner loop headers. We still have non-reducible
1544 loops to consider at this point. */
1545 for (i = 0; i < n_basic_blocks; i++)
1547 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1549 int_list_ptr ps;
1550 int j;
1552 /* Now check that the loop is reducible. We do this separate
1553 from finding inner loops so that we do not find a reducible
1554 loop which contains an inner non-reducible loop.
1556 A simple way to find reducible/natural loops is to verify
1557 that each block in the loop is dominated by the loop
1558 header.
1560 If there exists a block that is not dominated by the loop
1561 header, then the block is reachable from outside the loop
1562 and thus the loop is not a natural loop. */
1563 for (j = 0; j < n_basic_blocks; j++)
1565 /* First identify blocks in the loop, except for the loop
1566 entry block. */
1567 if (i == max_hdr[j] && i != j)
1569 /* Now verify that the block is dominated by the loop
1570 header. */
1571 if (!TEST_BIT (dom[j], i))
1572 break;
1576 /* If we exited the loop early, then I is the header of
1577 a non-reducible loop and we should quit processing it
1578 now. */
1579 if (j != n_basic_blocks)
1580 continue;
1582 /* I is a header of an inner loop, or block 0 in a subroutine
1583 with no loops at all. */
1584 head = tail = -1;
1585 too_large_failure = 0;
1586 loop_head = max_hdr[i];
1588 /* Decrease degree of all I's successors for topological
1589 ordering. */
1590 for (ps = s_succs[i]; ps; ps = ps->next)
1591 if (INT_LIST_VAL (ps) != EXIT_BLOCK
1592 && INT_LIST_VAL (ps) != ENTRY_BLOCK)
1593 --degree[INT_LIST_VAL(ps)];
1595 /* Estimate # insns, and count # blocks in the region. */
1596 num_bbs = 1;
1597 num_insns = (INSN_LUID (BLOCK_END (i))
1598 - INSN_LUID (BLOCK_HEAD (i)));
1601 /* Find all loop latches (blocks with back edges to the loop
1602 header) or all the leaf blocks in the cfg has no loops.
1604 Place those blocks into the queue. */
1605 if (no_loops)
1607 for (j = 0; j < n_basic_blocks; j++)
1608 /* Leaf nodes have only a single successor which must
1609 be EXIT_BLOCK. */
1610 if (num_succs[j] == 1
1611 && INT_LIST_VAL (s_succs[j]) == EXIT_BLOCK)
1613 queue[++tail] = j;
1614 SET_BIT (in_queue, j);
1616 if (too_large (j, &num_bbs, &num_insns))
1618 too_large_failure = 1;
1619 break;
1623 else
1625 int_list_ptr ps;
1627 for (ps = s_preds[i]; ps; ps = ps->next)
1629 node = INT_LIST_VAL (ps);
1631 if (node == ENTRY_BLOCK || node == EXIT_BLOCK)
1632 continue;
1634 if (max_hdr[node] == loop_head && node != i)
1636 /* This is a loop latch. */
1637 queue[++tail] = node;
1638 SET_BIT (in_queue, node);
1640 if (too_large (node, &num_bbs, &num_insns))
1642 too_large_failure = 1;
1643 break;
1650 /* Now add all the blocks in the loop to the queue.
1652 We know the loop is a natural loop; however the algorithm
1653 above will not always mark certain blocks as being in the
1654 loop. Consider:
1655 node children
1656 a b,c
1658 c a,d
1662 The algorithm in the DFS traversal may not mark B & D as part
1663 of the loop (ie they will not have max_hdr set to A).
1665 We know they can not be loop latches (else they would have
1666 had max_hdr set since they'd have a backedge to a dominator
1667 block). So we don't need them on the initial queue.
1669 We know they are part of the loop because they are dominated
1670 by the loop header and can be reached by a backwards walk of
1671 the edges starting with nodes on the initial queue.
1673 It is safe and desirable to include those nodes in the
1674 loop/scheduling region. To do so we would need to decrease
1675 the degree of a node if it is the target of a backedge
1676 within the loop itself as the node is placed in the queue.
1678 We do not do this because I'm not sure that the actual
1679 scheduling code will properly handle this case. ?!? */
1681 while (head < tail && !too_large_failure)
1683 int_list_ptr ps;
1684 child = queue[++head];
1686 for (ps = s_preds[child]; ps; ps = ps->next)
1688 node = INT_LIST_VAL (ps);
1690 /* See discussion above about nodes not marked as in
1691 this loop during the initial DFS traversal. */
1692 if (node == ENTRY_BLOCK || node == EXIT_BLOCK
1693 || max_hdr[node] != loop_head)
1695 tail = -1;
1696 break;
1698 else if (!TEST_BIT (in_queue, node) && node != i)
1700 queue[++tail] = node;
1701 SET_BIT (in_queue, node);
1703 if (too_large (node, &num_bbs, &num_insns))
1705 too_large_failure = 1;
1706 break;
1712 if (tail >= 0 && !too_large_failure)
1714 /* Place the loop header into list of region blocks. */
1715 degree[i] = -1;
1716 rgn_bb_table[idx] = i;
1717 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1718 RGN_BLOCKS (nr_regions) = idx++;
1719 CONTAINING_RGN (i) = nr_regions;
1720 BLOCK_TO_BB (i) = count = 0;
1722 /* Remove blocks from queue[] when their in degree
1723 becomes zero. Repeat until no blocks are left on the
1724 list. This produces a topological list of blocks in
1725 the region. */
1726 while (tail >= 0)
1728 int_list_ptr ps;
1730 if (head < 0)
1731 head = tail;
1732 child = queue[head];
1733 if (degree[child] == 0)
1735 degree[child] = -1;
1736 rgn_bb_table[idx++] = child;
1737 BLOCK_TO_BB (child) = ++count;
1738 CONTAINING_RGN (child) = nr_regions;
1739 queue[head] = queue[tail--];
1741 for (ps = s_succs[child]; ps; ps = ps->next)
1742 if (INT_LIST_VAL (ps) != ENTRY_BLOCK
1743 && INT_LIST_VAL (ps) != EXIT_BLOCK)
1744 --degree[INT_LIST_VAL (ps)];
1746 else
1747 --head;
1749 ++nr_regions;
1755 /* Any block that did not end up in a region is placed into a region
1756 by itself. */
1757 for (i = 0; i < n_basic_blocks; i++)
1758 if (degree[i] >= 0)
1760 rgn_bb_table[idx] = i;
1761 RGN_NR_BLOCKS (nr_regions) = 1;
1762 RGN_BLOCKS (nr_regions) = idx++;
1763 CONTAINING_RGN (i) = nr_regions++;
1764 BLOCK_TO_BB (i) = 0;
1767 free (passed);
1768 free (header);
1769 free (inner);
1770 free (in_queue);
1771 free (in_stack);
1775 /* Functions for regions scheduling information. */
1777 /* Compute dominators, probability, and potential-split-edges of bb.
1778 Assume that these values were already computed for bb's predecessors. */
1780 static void
1781 compute_dom_prob_ps (bb)
1782 int bb;
1784 int nxt_in_edge, fst_in_edge, pred;
1785 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1787 prob[bb] = 0.0;
1788 if (IS_RGN_ENTRY (bb))
1790 BITSET_ADD (dom[bb], 0, bbset_size);
1791 prob[bb] = 1.0;
1792 return;
1795 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1797 /* Intialize dom[bb] to '111..1'. */
1798 BITSET_INVERT (dom[bb], bbset_size);
1802 pred = FROM_BLOCK (nxt_in_edge);
1803 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1805 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1806 edgeset_size);
1808 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1810 nr_out_edges = 1;
1811 nr_rgn_out_edges = 0;
1812 fst_out_edge = OUT_EDGES (pred);
1813 nxt_out_edge = NEXT_OUT (fst_out_edge);
1814 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1815 edgeset_size);
1817 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1819 /* The successor doesn't belong in the region? */
1820 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1821 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1822 ++nr_rgn_out_edges;
1824 while (fst_out_edge != nxt_out_edge)
1826 ++nr_out_edges;
1827 /* The successor doesn't belong in the region? */
1828 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1829 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1830 ++nr_rgn_out_edges;
1831 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1832 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1836 /* Now nr_rgn_out_edges is the number of region-exit edges from
1837 pred, and nr_out_edges will be the number of pred out edges
1838 not leaving the region. */
1839 nr_out_edges -= nr_rgn_out_edges;
1840 if (nr_rgn_out_edges > 0)
1841 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1842 else
1843 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1844 nxt_in_edge = NEXT_IN (nxt_in_edge);
1846 while (fst_in_edge != nxt_in_edge);
1848 BITSET_ADD (dom[bb], bb, bbset_size);
1849 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1851 if (sched_verbose >= 2)
1852 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]));
1853 } /* compute_dom_prob_ps */
1855 /* Functions for target info. */
1857 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1858 Note that bb_trg dominates bb_src. */
1860 static void
1861 split_edges (bb_src, bb_trg, bl)
1862 int bb_src;
1863 int bb_trg;
1864 edgelst *bl;
1866 int es = edgeset_size;
1867 edgeset src = (edgeset) alloca (es * sizeof (HOST_WIDE_INT));
1869 while (es--)
1870 src[es] = (pot_split[bb_src])[es];
1871 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1872 extract_bitlst (src, edgeset_size, bl);
1876 /* Find the valid candidate-source-blocks for the target block TRG, compute
1877 their probability, and check if they are speculative or not.
1878 For speculative sources, compute their update-blocks and split-blocks. */
1880 static void
1881 compute_trg_info (trg)
1882 int trg;
1884 register candidate *sp;
1885 edgelst el;
1886 int check_block, update_idx;
1887 int i, j, k, fst_edge, nxt_edge;
1889 /* Define some of the fields for the target bb as well. */
1890 sp = candidate_table + trg;
1891 sp->is_valid = 1;
1892 sp->is_speculative = 0;
1893 sp->src_prob = 100;
1895 for (i = trg + 1; i < current_nr_blocks; i++)
1897 sp = candidate_table + i;
1899 sp->is_valid = IS_DOMINATED (i, trg);
1900 if (sp->is_valid)
1902 sp->src_prob = GET_SRC_PROB (i, trg);
1903 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1906 if (sp->is_valid)
1908 split_edges (i, trg, &el);
1909 sp->is_speculative = (el.nr_members) ? 1 : 0;
1910 if (sp->is_speculative && !flag_schedule_speculative)
1911 sp->is_valid = 0;
1914 if (sp->is_valid)
1916 sp->split_bbs.first_member = &bblst_table[bblst_last];
1917 sp->split_bbs.nr_members = el.nr_members;
1918 for (j = 0; j < el.nr_members; bblst_last++, j++)
1919 bblst_table[bblst_last] =
1920 TO_BLOCK (rgn_edges[el.first_member[j]]);
1921 sp->update_bbs.first_member = &bblst_table[bblst_last];
1922 update_idx = 0;
1923 for (j = 0; j < el.nr_members; j++)
1925 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1926 fst_edge = nxt_edge = OUT_EDGES (check_block);
1929 for (k = 0; k < el.nr_members; k++)
1930 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1931 break;
1933 if (k >= el.nr_members)
1935 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1936 update_idx++;
1939 nxt_edge = NEXT_OUT (nxt_edge);
1941 while (fst_edge != nxt_edge);
1943 sp->update_bbs.nr_members = update_idx;
1946 else
1948 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1950 sp->is_speculative = 0;
1951 sp->src_prob = 0;
1954 } /* compute_trg_info */
1957 /* Print candidates info, for debugging purposes. Callable from debugger. */
1959 void
1960 debug_candidate (i)
1961 int i;
1963 if (!candidate_table[i].is_valid)
1964 return;
1966 if (candidate_table[i].is_speculative)
1968 int j;
1969 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1971 fprintf (dump, "split path: ");
1972 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1974 int b = candidate_table[i].split_bbs.first_member[j];
1976 fprintf (dump, " %d ", b);
1978 fprintf (dump, "\n");
1980 fprintf (dump, "update path: ");
1981 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1983 int b = candidate_table[i].update_bbs.first_member[j];
1985 fprintf (dump, " %d ", b);
1987 fprintf (dump, "\n");
1989 else
1991 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1996 /* Print candidates info, for debugging purposes. Callable from debugger. */
1998 void
1999 debug_candidates (trg)
2000 int trg;
2002 int i;
2004 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2005 BB_TO_BLOCK (trg), trg);
2006 for (i = trg + 1; i < current_nr_blocks; i++)
2007 debug_candidate (i);
2011 /* Functions for speculative scheduing. */
2013 /* Return 0 if x is a set of a register alive in the beginning of one
2014 of the split-blocks of src, otherwise return 1. */
2016 static int
2017 check_live_1 (src, x)
2018 int src;
2019 rtx x;
2021 register int i;
2022 register int regno;
2023 register rtx reg = SET_DEST (x);
2025 if (reg == 0)
2026 return 1;
2028 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2029 || GET_CODE (reg) == SIGN_EXTRACT
2030 || GET_CODE (reg) == STRICT_LOW_PART)
2031 reg = XEXP (reg, 0);
2033 if (GET_CODE (reg) == PARALLEL
2034 && GET_MODE (reg) == BLKmode)
2036 register int i;
2037 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2038 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2039 return 1;
2040 return 0;
2043 if (GET_CODE (reg) != REG)
2044 return 1;
2046 regno = REGNO (reg);
2048 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2050 /* Global registers are assumed live. */
2051 return 0;
2053 else
2055 if (regno < FIRST_PSEUDO_REGISTER)
2057 /* Check for hard registers. */
2058 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2059 while (--j >= 0)
2061 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2063 int b = candidate_table[src].split_bbs.first_member[i];
2065 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2066 regno + j))
2068 return 0;
2073 else
2075 /* Check for psuedo registers. */
2076 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2078 int b = candidate_table[src].split_bbs.first_member[i];
2080 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2082 return 0;
2088 return 1;
2092 /* If x is a set of a register R, mark that R is alive in the beginning
2093 of every update-block of src. */
2095 static void
2096 update_live_1 (src, x)
2097 int src;
2098 rtx x;
2100 register int i;
2101 register int regno;
2102 register rtx reg = SET_DEST (x);
2104 if (reg == 0)
2105 return;
2107 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2108 || GET_CODE (reg) == SIGN_EXTRACT
2109 || GET_CODE (reg) == STRICT_LOW_PART)
2110 reg = XEXP (reg, 0);
2112 if (GET_CODE (reg) == PARALLEL
2113 && GET_MODE (reg) == BLKmode)
2115 register int i;
2116 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2117 update_live_1 (src, XVECEXP (reg, 0, i));
2118 return;
2121 if (GET_CODE (reg) != REG)
2122 return;
2124 /* Global registers are always live, so the code below does not apply
2125 to them. */
2127 regno = REGNO (reg);
2129 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2131 if (regno < FIRST_PSEUDO_REGISTER)
2133 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2134 while (--j >= 0)
2136 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2138 int b = candidate_table[src].update_bbs.first_member[i];
2140 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2141 regno + j);
2145 else
2147 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2149 int b = candidate_table[src].update_bbs.first_member[i];
2151 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2158 /* Return 1 if insn can be speculatively moved from block src to trg,
2159 otherwise return 0. Called before first insertion of insn to
2160 ready-list or before the scheduling. */
2162 static int
2163 check_live (insn, src)
2164 rtx insn;
2165 int src;
2167 /* Find the registers set by instruction. */
2168 if (GET_CODE (PATTERN (insn)) == SET
2169 || GET_CODE (PATTERN (insn)) == CLOBBER)
2170 return check_live_1 (src, PATTERN (insn));
2171 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2173 int j;
2174 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2175 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2176 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2177 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2178 return 0;
2180 return 1;
2183 return 1;
2187 /* Update the live registers info after insn was moved speculatively from
2188 block src to trg. */
2190 static void
2191 update_live (insn, src)
2192 rtx insn;
2193 int src;
2195 /* Find the registers set by instruction. */
2196 if (GET_CODE (PATTERN (insn)) == SET
2197 || GET_CODE (PATTERN (insn)) == CLOBBER)
2198 update_live_1 (src, PATTERN (insn));
2199 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2201 int j;
2202 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2203 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2204 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2205 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2209 /* Exception Free Loads:
2211 We define five classes of speculative loads: IFREE, IRISKY,
2212 PFREE, PRISKY, and MFREE.
2214 IFREE loads are loads that are proved to be exception-free, just
2215 by examining the load insn. Examples for such loads are loads
2216 from TOC and loads of global data.
2218 IRISKY loads are loads that are proved to be exception-risky,
2219 just by examining the load insn. Examples for such loads are
2220 volatile loads and loads from shared memory.
2222 PFREE loads are loads for which we can prove, by examining other
2223 insns, that they are exception-free. Currently, this class consists
2224 of loads for which we are able to find a "similar load", either in
2225 the target block, or, if only one split-block exists, in that split
2226 block. Load2 is similar to load1 if both have same single base
2227 register. We identify only part of the similar loads, by finding
2228 an insn upon which both load1 and load2 have a DEF-USE dependence.
2230 PRISKY loads are loads for which we can prove, by examining other
2231 insns, that they are exception-risky. Currently we have two proofs for
2232 such loads. The first proof detects loads that are probably guarded by a
2233 test on the memory address. This proof is based on the
2234 backward and forward data dependence information for the region.
2235 Let load-insn be the examined load.
2236 Load-insn is PRISKY iff ALL the following hold:
2238 - insn1 is not in the same block as load-insn
2239 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2240 - test-insn is either a compare or a branch, not in the same block
2241 as load-insn
2242 - load-insn is reachable from test-insn
2243 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2245 This proof might fail when the compare and the load are fed
2246 by an insn not in the region. To solve this, we will add to this
2247 group all loads that have no input DEF-USE dependence.
2249 The second proof detects loads that are directly or indirectly
2250 fed by a speculative load. This proof is affected by the
2251 scheduling process. We will use the flag fed_by_spec_load.
2252 Initially, all insns have this flag reset. After a speculative
2253 motion of an insn, if insn is either a load, or marked as
2254 fed_by_spec_load, we will also mark as fed_by_spec_load every
2255 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2256 load which is fed_by_spec_load is also PRISKY.
2258 MFREE (maybe-free) loads are all the remaining loads. They may be
2259 exception-free, but we cannot prove it.
2261 Now, all loads in IFREE and PFREE classes are considered
2262 exception-free, while all loads in IRISKY and PRISKY classes are
2263 considered exception-risky. As for loads in the MFREE class,
2264 these are considered either exception-free or exception-risky,
2265 depending on whether we are pessimistic or optimistic. We have
2266 to take the pessimistic approach to assure the safety of
2267 speculative scheduling, but we can take the optimistic approach
2268 by invoking the -fsched_spec_load_dangerous option. */
2270 enum INSN_TRAP_CLASS
2272 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2273 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2276 #define WORST_CLASS(class1, class2) \
2277 ((class1 > class2) ? class1 : class2)
2279 /* Indexed by INSN_UID, and set if there's DEF-USE dependence between
2280 some speculatively moved load insn and this one. */
2281 char *fed_by_spec_load;
2282 char *is_load_insn;
2284 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2285 #define IS_REACHABLE(bb_from, bb_to) \
2286 (bb_from == bb_to \
2287 || IS_RGN_ENTRY (bb_from) \
2288 || (bitset_member (ancestor_edges[bb_to], \
2289 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2290 edgeset_size)))
2291 #define FED_BY_SPEC_LOAD(insn) (fed_by_spec_load[INSN_UID (insn)])
2292 #define IS_LOAD_INSN(insn) (is_load_insn[INSN_UID (insn)])
2294 /* Non-zero iff the address is comprised from at most 1 register. */
2295 #define CONST_BASED_ADDRESS_P(x) \
2296 (GET_CODE (x) == REG \
2297 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2298 || (GET_CODE (x) == LO_SUM)) \
2299 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2300 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2302 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2304 static void
2305 set_spec_fed (load_insn)
2306 rtx load_insn;
2308 rtx link;
2310 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2311 if (GET_MODE (link) == VOIDmode)
2312 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2313 } /* set_spec_fed */
2315 /* On the path from the insn to load_insn_bb, find a conditional
2316 branch depending on insn, that guards the speculative load. */
2318 static int
2319 find_conditional_protection (insn, load_insn_bb)
2320 rtx insn;
2321 int load_insn_bb;
2323 rtx link;
2325 /* Iterate through DEF-USE forward dependences. */
2326 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2328 rtx next = XEXP (link, 0);
2329 if ((CONTAINING_RGN (INSN_BLOCK (next)) ==
2330 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2331 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2332 && load_insn_bb != INSN_BB (next)
2333 && GET_MODE (link) == VOIDmode
2334 && (GET_CODE (next) == JUMP_INSN
2335 || find_conditional_protection (next, load_insn_bb)))
2336 return 1;
2338 return 0;
2339 } /* find_conditional_protection */
2341 /* Returns 1 if the same insn1 that participates in the computation
2342 of load_insn's address is feeding a conditional branch that is
2343 guarding on load_insn. This is true if we find a the two DEF-USE
2344 chains:
2345 insn1 -> ... -> conditional-branch
2346 insn1 -> ... -> load_insn,
2347 and if a flow path exist:
2348 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2349 and if insn1 is on the path
2350 region-entry -> ... -> bb_trg -> ... load_insn.
2352 Locate insn1 by climbing on LOG_LINKS from load_insn.
2353 Locate the branch by following INSN_DEPEND from insn1. */
2355 static int
2356 is_conditionally_protected (load_insn, bb_src, bb_trg)
2357 rtx load_insn;
2358 int bb_src, bb_trg;
2360 rtx link;
2362 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2364 rtx insn1 = XEXP (link, 0);
2366 /* Must be a DEF-USE dependence upon non-branch. */
2367 if (GET_MODE (link) != VOIDmode
2368 || GET_CODE (insn1) == JUMP_INSN)
2369 continue;
2371 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2372 if (INSN_BB (insn1) == bb_src
2373 || (CONTAINING_RGN (INSN_BLOCK (insn1))
2374 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2375 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2376 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2377 continue;
2379 /* Now search for the conditional-branch. */
2380 if (find_conditional_protection (insn1, bb_src))
2381 return 1;
2383 /* Recursive step: search another insn1, "above" current insn1. */
2384 return is_conditionally_protected (insn1, bb_src, bb_trg);
2387 /* The chain does not exist. */
2388 return 0;
2389 } /* is_conditionally_protected */
2391 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2392 load_insn can move speculatively from bb_src to bb_trg. All the
2393 following must hold:
2395 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2396 (2) load_insn and load1 have a def-use dependence upon
2397 the same insn 'insn1'.
2398 (3) either load2 is in bb_trg, or:
2399 - there's only one split-block, and
2400 - load1 is on the escape path, and
2402 From all these we can conclude that the two loads access memory
2403 addresses that differ at most by a constant, and hence if moving
2404 load_insn would cause an exception, it would have been caused by
2405 load2 anyhow. */
2407 static int
2408 is_pfree (load_insn, bb_src, bb_trg)
2409 rtx load_insn;
2410 int bb_src, bb_trg;
2412 rtx back_link;
2413 register candidate *candp = candidate_table + bb_src;
2415 if (candp->split_bbs.nr_members != 1)
2416 /* Must have exactly one escape block. */
2417 return 0;
2419 for (back_link = LOG_LINKS (load_insn);
2420 back_link; back_link = XEXP (back_link, 1))
2422 rtx insn1 = XEXP (back_link, 0);
2424 if (GET_MODE (back_link) == VOIDmode)
2426 /* Found a DEF-USE dependence (insn1, load_insn). */
2427 rtx fore_link;
2429 for (fore_link = INSN_DEPEND (insn1);
2430 fore_link; fore_link = XEXP (fore_link, 1))
2432 rtx insn2 = XEXP (fore_link, 0);
2433 if (GET_MODE (fore_link) == VOIDmode)
2435 /* Found a DEF-USE dependence (insn1, insn2). */
2436 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2437 /* insn2 not guaranteed to be a 1 base reg load. */
2438 continue;
2440 if (INSN_BB (insn2) == bb_trg)
2441 /* insn2 is the similar load, in the target block. */
2442 return 1;
2444 if (*(candp->split_bbs.first_member) == INSN_BLOCK (insn2))
2445 /* insn2 is a similar load, in a split-block. */
2446 return 1;
2452 /* Couldn't find a similar load. */
2453 return 0;
2454 } /* is_pfree */
2456 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2457 as found by analyzing insn's expression. */
2459 static int
2460 may_trap_exp (x, is_store)
2461 rtx x;
2462 int is_store;
2464 enum rtx_code code;
2466 if (x == 0)
2467 return TRAP_FREE;
2468 code = GET_CODE (x);
2469 if (is_store)
2471 if (code == MEM)
2472 return TRAP_RISKY;
2473 else
2474 return TRAP_FREE;
2476 if (code == MEM)
2478 /* The insn uses memory: a volatile load. */
2479 if (MEM_VOLATILE_P (x))
2480 return IRISKY;
2481 /* An exception-free load. */
2482 if (!may_trap_p (x))
2483 return IFREE;
2484 /* A load with 1 base register, to be further checked. */
2485 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2486 return PFREE_CANDIDATE;
2487 /* No info on the load, to be further checked. */
2488 return PRISKY_CANDIDATE;
2490 else
2492 const char *fmt;
2493 int i, insn_class = TRAP_FREE;
2495 /* Neither store nor load, check if it may cause a trap. */
2496 if (may_trap_p (x))
2497 return TRAP_RISKY;
2498 /* Recursive step: walk the insn... */
2499 fmt = GET_RTX_FORMAT (code);
2500 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2502 if (fmt[i] == 'e')
2504 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2505 insn_class = WORST_CLASS (insn_class, tmp_class);
2507 else if (fmt[i] == 'E')
2509 int j;
2510 for (j = 0; j < XVECLEN (x, i); j++)
2512 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2513 insn_class = WORST_CLASS (insn_class, tmp_class);
2514 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2515 break;
2518 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2519 break;
2521 return insn_class;
2523 } /* may_trap_exp */
2526 /* Classifies insn for the purpose of verifying that it can be
2527 moved speculatively, by examining it's patterns, returning:
2528 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2529 TRAP_FREE: non-load insn.
2530 IFREE: load from a globaly safe location.
2531 IRISKY: volatile load.
2532 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2533 being either PFREE or PRISKY. */
2535 static int
2536 haifa_classify_insn (insn)
2537 rtx insn;
2539 rtx pat = PATTERN (insn);
2540 int tmp_class = TRAP_FREE;
2541 int insn_class = TRAP_FREE;
2542 enum rtx_code code;
2544 if (GET_CODE (pat) == PARALLEL)
2546 int i, len = XVECLEN (pat, 0);
2548 for (i = len - 1; i >= 0; i--)
2550 code = GET_CODE (XVECEXP (pat, 0, i));
2551 switch (code)
2553 case CLOBBER:
2554 /* Test if it is a 'store'. */
2555 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2556 break;
2557 case SET:
2558 /* Test if it is a store. */
2559 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2560 if (tmp_class == TRAP_RISKY)
2561 break;
2562 /* Test if it is a load. */
2563 tmp_class =
2564 WORST_CLASS (tmp_class,
2565 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2566 break;
2567 case TRAP_IF:
2568 tmp_class = TRAP_RISKY;
2569 break;
2570 default:;
2572 insn_class = WORST_CLASS (insn_class, tmp_class);
2573 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2574 break;
2577 else
2579 code = GET_CODE (pat);
2580 switch (code)
2582 case CLOBBER:
2583 /* Test if it is a 'store'. */
2584 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2585 break;
2586 case SET:
2587 /* Test if it is a store. */
2588 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2589 if (tmp_class == TRAP_RISKY)
2590 break;
2591 /* Test if it is a load. */
2592 tmp_class =
2593 WORST_CLASS (tmp_class,
2594 may_trap_exp (SET_SRC (pat), 0));
2595 break;
2596 case TRAP_IF:
2597 tmp_class = TRAP_RISKY;
2598 break;
2599 default:;
2601 insn_class = tmp_class;
2604 return insn_class;
2606 } /* haifa_classify_insn */
2608 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2609 a load moved speculatively, or if load_insn is protected by
2610 a compare on load_insn's address). */
2612 static int
2613 is_prisky (load_insn, bb_src, bb_trg)
2614 rtx load_insn;
2615 int bb_src, bb_trg;
2617 if (FED_BY_SPEC_LOAD (load_insn))
2618 return 1;
2620 if (LOG_LINKS (load_insn) == NULL)
2621 /* Dependence may 'hide' out of the region. */
2622 return 1;
2624 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2625 return 1;
2627 return 0;
2628 } /* is_prisky */
2630 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2631 Return 1 if insn is exception-free (and the motion is valid)
2632 and 0 otherwise. */
2634 static int
2635 is_exception_free (insn, bb_src, bb_trg)
2636 rtx insn;
2637 int bb_src, bb_trg;
2639 int insn_class = haifa_classify_insn (insn);
2641 /* Handle non-load insns. */
2642 switch (insn_class)
2644 case TRAP_FREE:
2645 return 1;
2646 case TRAP_RISKY:
2647 return 0;
2648 default:;
2651 /* Handle loads. */
2652 if (!flag_schedule_speculative_load)
2653 return 0;
2654 IS_LOAD_INSN (insn) = 1;
2655 switch (insn_class)
2657 case IFREE:
2658 return (1);
2659 case IRISKY:
2660 return 0;
2661 case PFREE_CANDIDATE:
2662 if (is_pfree (insn, bb_src, bb_trg))
2663 return 1;
2664 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2665 case PRISKY_CANDIDATE:
2666 if (!flag_schedule_speculative_load_dangerous
2667 || is_prisky (insn, bb_src, bb_trg))
2668 return 0;
2669 break;
2670 default:;
2673 return flag_schedule_speculative_load_dangerous;
2674 } /* is_exception_free */
2677 /* Process an insn's memory dependencies. There are four kinds of
2678 dependencies:
2680 (0) read dependence: read follows read
2681 (1) true dependence: read follows write
2682 (2) anti dependence: write follows read
2683 (3) output dependence: write follows write
2685 We are careful to build only dependencies which actually exist, and
2686 use transitivity to avoid building too many links. */
2688 /* Return the INSN_LIST containing INSN in LIST, or NULL
2689 if LIST does not contain INSN. */
2691 HAIFA_INLINE static rtx
2692 find_insn_list (insn, list)
2693 rtx insn;
2694 rtx list;
2696 while (list)
2698 if (XEXP (list, 0) == insn)
2699 return list;
2700 list = XEXP (list, 1);
2702 return 0;
2706 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2707 otherwise. */
2709 HAIFA_INLINE static char
2710 find_insn_mem_list (insn, x, list, list1)
2711 rtx insn, x;
2712 rtx list, list1;
2714 while (list)
2716 if (XEXP (list, 0) == insn
2717 && XEXP (list1, 0) == x)
2718 return 1;
2719 list = XEXP (list, 1);
2720 list1 = XEXP (list1, 1);
2722 return 0;
2726 /* Compute the function units used by INSN. This caches the value
2727 returned by function_units_used. A function unit is encoded as the
2728 unit number if the value is non-negative and the compliment of a
2729 mask if the value is negative. A function unit index is the
2730 non-negative encoding. */
2732 HAIFA_INLINE static int
2733 insn_unit (insn)
2734 rtx insn;
2736 register int unit = INSN_UNIT (insn);
2738 if (unit == 0)
2740 recog_memoized (insn);
2742 /* A USE insn, or something else we don't need to understand.
2743 We can't pass these directly to function_units_used because it will
2744 trigger a fatal error for unrecognizable insns. */
2745 if (INSN_CODE (insn) < 0)
2746 unit = -1;
2747 else
2749 unit = function_units_used (insn);
2750 /* Increment non-negative values so we can cache zero. */
2751 if (unit >= 0)
2752 unit++;
2754 /* We only cache 16 bits of the result, so if the value is out of
2755 range, don't cache it. */
2756 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2757 || unit >= 0
2758 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2759 INSN_UNIT (insn) = unit;
2761 return (unit > 0 ? unit - 1 : unit);
2764 /* Compute the blockage range for executing INSN on UNIT. This caches
2765 the value returned by the blockage_range_function for the unit.
2766 These values are encoded in an int where the upper half gives the
2767 minimum value and the lower half gives the maximum value. */
2769 HAIFA_INLINE static unsigned int
2770 blockage_range (unit, insn)
2771 int unit;
2772 rtx insn;
2774 unsigned int blockage = INSN_BLOCKAGE (insn);
2775 unsigned int range;
2777 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2779 range = function_units[unit].blockage_range_function (insn);
2780 /* We only cache the blockage range for one unit and then only if
2781 the values fit. */
2782 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2783 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2785 else
2786 range = BLOCKAGE_RANGE (blockage);
2788 return range;
2791 /* A vector indexed by function unit instance giving the last insn to use
2792 the unit. The value of the function unit instance index for unit U
2793 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2794 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2796 /* A vector indexed by function unit instance giving the minimum time when
2797 the unit will unblock based on the maximum blockage cost. */
2798 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2800 /* A vector indexed by function unit number giving the number of insns
2801 that remain to use the unit. */
2802 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2804 /* Reset the function unit state to the null state. */
2806 static void
2807 clear_units ()
2809 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2810 bzero ((char *) unit_tick, sizeof (unit_tick));
2811 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2814 /* Return the issue-delay of an insn. */
2816 HAIFA_INLINE static int
2817 insn_issue_delay (insn)
2818 rtx insn;
2820 int i, delay = 0;
2821 int unit = insn_unit (insn);
2823 /* Efficiency note: in fact, we are working 'hard' to compute a
2824 value that was available in md file, and is not available in
2825 function_units[] structure. It would be nice to have this
2826 value there, too. */
2827 if (unit >= 0)
2829 if (function_units[unit].blockage_range_function &&
2830 function_units[unit].blockage_function)
2831 delay = function_units[unit].blockage_function (insn, insn);
2833 else
2834 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2835 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2836 && function_units[i].blockage_function)
2837 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2839 return delay;
2842 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2843 instance INSTANCE at time CLOCK if the previous actual hazard cost
2844 was COST. */
2846 HAIFA_INLINE static int
2847 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2848 int unit, instance, clock, cost;
2849 rtx insn;
2851 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2853 if (tick - clock > cost)
2855 /* The scheduler is operating forward, so unit's last insn is the
2856 executing insn and INSN is the candidate insn. We want a
2857 more exact measure of the blockage if we execute INSN at CLOCK
2858 given when we committed the execution of the unit's last insn.
2860 The blockage value is given by either the unit's max blockage
2861 constant, blockage range function, or blockage function. Use
2862 the most exact form for the given unit. */
2864 if (function_units[unit].blockage_range_function)
2866 if (function_units[unit].blockage_function)
2867 tick += (function_units[unit].blockage_function
2868 (unit_last_insn[instance], insn)
2869 - function_units[unit].max_blockage);
2870 else
2871 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2872 - function_units[unit].max_blockage);
2874 if (tick - clock > cost)
2875 cost = tick - clock;
2877 return cost;
2880 /* Record INSN as having begun execution on the units encoded by UNIT at
2881 time CLOCK. */
2883 HAIFA_INLINE static void
2884 schedule_unit (unit, insn, clock)
2885 int unit, clock;
2886 rtx insn;
2888 int i;
2890 if (unit >= 0)
2892 int instance = unit;
2893 #if MAX_MULTIPLICITY > 1
2894 /* Find the first free instance of the function unit and use that
2895 one. We assume that one is free. */
2896 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2898 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2899 break;
2900 instance += FUNCTION_UNITS_SIZE;
2902 #endif
2903 unit_last_insn[instance] = insn;
2904 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2906 else
2907 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2908 if ((unit & 1) != 0)
2909 schedule_unit (i, insn, clock);
2912 /* Return the actual hazard cost of executing INSN on the units encoded by
2913 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2915 HAIFA_INLINE static int
2916 actual_hazard (unit, insn, clock, cost)
2917 int unit, clock, cost;
2918 rtx insn;
2920 int i;
2922 if (unit >= 0)
2924 /* Find the instance of the function unit with the minimum hazard. */
2925 int instance = unit;
2926 int best_cost = actual_hazard_this_instance (unit, instance, insn,
2927 clock, cost);
2928 #if MAX_MULTIPLICITY > 1
2929 int this_cost;
2931 if (best_cost > cost)
2933 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2935 instance += FUNCTION_UNITS_SIZE;
2936 this_cost = actual_hazard_this_instance (unit, instance, insn,
2937 clock, cost);
2938 if (this_cost < best_cost)
2940 best_cost = this_cost;
2941 if (this_cost <= cost)
2942 break;
2946 #endif
2947 cost = MAX (cost, best_cost);
2949 else
2950 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2951 if ((unit & 1) != 0)
2952 cost = actual_hazard (i, insn, clock, cost);
2954 return cost;
2957 /* Return the potential hazard cost of executing an instruction on the
2958 units encoded by UNIT if the previous potential hazard cost was COST.
2959 An insn with a large blockage time is chosen in preference to one
2960 with a smaller time; an insn that uses a unit that is more likely
2961 to be used is chosen in preference to one with a unit that is less
2962 used. We are trying to minimize a subsequent actual hazard. */
2964 HAIFA_INLINE static int
2965 potential_hazard (unit, insn, cost)
2966 int unit, cost;
2967 rtx insn;
2969 int i, ncost;
2970 unsigned int minb, maxb;
2972 if (unit >= 0)
2974 minb = maxb = function_units[unit].max_blockage;
2975 if (maxb > 1)
2977 if (function_units[unit].blockage_range_function)
2979 maxb = minb = blockage_range (unit, insn);
2980 maxb = MAX_BLOCKAGE_COST (maxb);
2981 minb = MIN_BLOCKAGE_COST (minb);
2984 if (maxb > 1)
2986 /* Make the number of instructions left dominate. Make the
2987 minimum delay dominate the maximum delay. If all these
2988 are the same, use the unit number to add an arbitrary
2989 ordering. Other terms can be added. */
2990 ncost = minb * 0x40 + maxb;
2991 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
2992 if (ncost > cost)
2993 cost = ncost;
2997 else
2998 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2999 if ((unit & 1) != 0)
3000 cost = potential_hazard (i, insn, cost);
3002 return cost;
3005 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3006 This is the number of cycles between instruction issue and
3007 instruction results. */
3009 HAIFA_INLINE static int
3010 insn_cost (insn, link, used)
3011 rtx insn, link, used;
3013 register int cost = INSN_COST (insn);
3015 if (cost == 0)
3017 recog_memoized (insn);
3019 /* A USE insn, or something else we don't need to understand.
3020 We can't pass these directly to result_ready_cost because it will
3021 trigger a fatal error for unrecognizable insns. */
3022 if (INSN_CODE (insn) < 0)
3024 INSN_COST (insn) = 1;
3025 return 1;
3027 else
3029 cost = result_ready_cost (insn);
3031 if (cost < 1)
3032 cost = 1;
3034 INSN_COST (insn) = cost;
3038 /* In this case estimate cost without caring how insn is used. */
3039 if (link == 0 && used == 0)
3040 return cost;
3042 /* A USE insn should never require the value used to be computed. This
3043 allows the computation of a function's result and parameter values to
3044 overlap the return and call. */
3045 recog_memoized (used);
3046 if (INSN_CODE (used) < 0)
3047 LINK_COST_FREE (link) = 1;
3049 /* If some dependencies vary the cost, compute the adjustment. Most
3050 commonly, the adjustment is complete: either the cost is ignored
3051 (in the case of an output- or anti-dependence), or the cost is
3052 unchanged. These values are cached in the link as LINK_COST_FREE
3053 and LINK_COST_ZERO. */
3055 if (LINK_COST_FREE (link))
3056 cost = 0;
3057 #ifdef ADJUST_COST
3058 else if (!LINK_COST_ZERO (link))
3060 int ncost = cost;
3062 ADJUST_COST (used, link, insn, ncost);
3063 if (ncost < 1)
3065 LINK_COST_FREE (link) = 1;
3066 ncost = 0;
3068 if (cost == ncost)
3069 LINK_COST_ZERO (link) = 1;
3070 cost = ncost;
3072 #endif
3073 return cost;
3076 /* Compute the priority number for INSN. */
3078 static int
3079 priority (insn)
3080 rtx insn;
3082 int this_priority;
3083 rtx link;
3085 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3086 return 0;
3088 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3090 if (INSN_DEPEND (insn) == 0)
3091 this_priority = insn_cost (insn, 0, 0);
3092 else
3093 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3095 rtx next;
3096 int next_priority;
3098 if (RTX_INTEGRATED_P (link))
3099 continue;
3101 next = XEXP (link, 0);
3103 /* Critical path is meaningful in block boundaries only. */
3104 if (INSN_BLOCK (next) != INSN_BLOCK (insn))
3105 continue;
3107 next_priority = insn_cost (insn, link, next) + priority (next);
3108 if (next_priority > this_priority)
3109 this_priority = next_priority;
3111 INSN_PRIORITY (insn) = this_priority;
3113 return this_priority;
3117 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3118 them to the unused_*_list variables, so that they can be reused. */
3120 static void
3121 free_pending_lists ()
3123 if (current_nr_blocks <= 1)
3125 free_INSN_LIST_list (&pending_read_insns);
3126 free_INSN_LIST_list (&pending_write_insns);
3127 free_EXPR_LIST_list (&pending_read_mems);
3128 free_EXPR_LIST_list (&pending_write_mems);
3130 else
3132 /* Interblock scheduling. */
3133 int bb;
3135 for (bb = 0; bb < current_nr_blocks; bb++)
3137 free_INSN_LIST_list (&bb_pending_read_insns[bb]);
3138 free_INSN_LIST_list (&bb_pending_write_insns[bb]);
3139 free_EXPR_LIST_list (&bb_pending_read_mems[bb]);
3140 free_EXPR_LIST_list (&bb_pending_write_mems[bb]);
3145 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3146 The MEM is a memory reference contained within INSN, which we are saving
3147 so that we can do memory aliasing on it. */
3149 static void
3150 add_insn_mem_dependence (insn_list, mem_list, insn, mem)
3151 rtx *insn_list, *mem_list, insn, mem;
3153 register rtx link;
3155 link = alloc_INSN_LIST (insn, *insn_list);
3156 *insn_list = link;
3158 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3159 *mem_list = link;
3161 pending_lists_length++;
3165 /* Make a dependency between every memory reference on the pending lists
3166 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3167 the read list. */
3169 static void
3170 flush_pending_lists (insn, only_write)
3171 rtx insn;
3172 int only_write;
3174 rtx u;
3175 rtx link;
3177 while (pending_read_insns && ! only_write)
3179 add_dependence (insn, XEXP (pending_read_insns, 0), REG_DEP_ANTI);
3181 link = pending_read_insns;
3182 pending_read_insns = XEXP (pending_read_insns, 1);
3183 free_INSN_LIST_node (link);
3185 link = pending_read_mems;
3186 pending_read_mems = XEXP (pending_read_mems, 1);
3187 free_EXPR_LIST_node (link);
3189 while (pending_write_insns)
3191 add_dependence (insn, XEXP (pending_write_insns, 0), REG_DEP_ANTI);
3193 link = pending_write_insns;
3194 pending_write_insns = XEXP (pending_write_insns, 1);
3195 free_INSN_LIST_node (link);
3197 link = pending_write_mems;
3198 pending_write_mems = XEXP (pending_write_mems, 1);
3199 free_EXPR_LIST_node (link);
3201 pending_lists_length = 0;
3203 /* last_pending_memory_flush is now a list of insns. */
3204 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3205 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3207 free_INSN_LIST_list (&last_pending_memory_flush);
3208 last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3211 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3212 rtx, X, creating all dependencies generated by the write to the
3213 destination of X, and reads of everything mentioned. */
3215 static void
3216 sched_analyze_1 (x, insn)
3217 rtx x;
3218 rtx insn;
3220 register int regno;
3221 register rtx dest = XEXP (x, 0);
3222 enum rtx_code code = GET_CODE (x);
3224 if (dest == 0)
3225 return;
3227 if (GET_CODE (dest) == PARALLEL
3228 && GET_MODE (dest) == BLKmode)
3230 register int i;
3231 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3232 sched_analyze_1 (XVECEXP (dest, 0, i), insn);
3233 if (GET_CODE (x) == SET)
3234 sched_analyze_2 (SET_SRC (x), insn);
3235 return;
3238 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3239 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3241 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3243 /* The second and third arguments are values read by this insn. */
3244 sched_analyze_2 (XEXP (dest, 1), insn);
3245 sched_analyze_2 (XEXP (dest, 2), insn);
3247 dest = XEXP (dest, 0);
3250 if (GET_CODE (dest) == REG)
3252 register int i;
3254 regno = REGNO (dest);
3256 /* A hard reg in a wide mode may really be multiple registers.
3257 If so, mark all of them just like the first. */
3258 if (regno < FIRST_PSEUDO_REGISTER)
3260 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3261 while (--i >= 0)
3263 rtx u;
3265 for (u = reg_last_uses[regno + i]; u; u = XEXP (u, 1))
3266 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3268 for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
3269 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3271 /* Clobbers need not be ordered with respect to one
3272 another, but sets must be ordered with respect to a
3273 pending clobber. */
3274 if (code == SET)
3276 free_INSN_LIST_list (&reg_last_uses[regno + i]);
3277 for (u = reg_last_clobbers[regno + i]; u; u = XEXP (u, 1))
3278 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3279 SET_REGNO_REG_SET (reg_pending_sets, regno + i);
3281 else
3282 SET_REGNO_REG_SET (reg_pending_clobbers, regno + i);
3284 /* Function calls clobber all call_used regs. */
3285 if (global_regs[regno + i]
3286 || (code == SET && call_used_regs[regno + i]))
3287 for (u = last_function_call; u; u = XEXP (u, 1))
3288 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3291 else
3293 rtx u;
3295 for (u = reg_last_uses[regno]; u; u = XEXP (u, 1))
3296 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3298 for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
3299 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3301 if (code == SET)
3303 free_INSN_LIST_list (&reg_last_uses[regno]);
3304 for (u = reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3305 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3306 SET_REGNO_REG_SET (reg_pending_sets, regno);
3308 else
3309 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3311 /* Pseudos that are REG_EQUIV to something may be replaced
3312 by that during reloading. We need only add dependencies for
3313 the address in the REG_EQUIV note. */
3314 if (!reload_completed
3315 && reg_known_equiv_p[regno]
3316 && GET_CODE (reg_known_value[regno]) == MEM)
3317 sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
3319 /* Don't let it cross a call after scheduling if it doesn't
3320 already cross one. */
3322 if (REG_N_CALLS_CROSSED (regno) == 0)
3323 for (u = last_function_call; u; u = XEXP (u, 1))
3324 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3327 else if (GET_CODE (dest) == MEM)
3329 /* Writing memory. */
3331 if (pending_lists_length > 32)
3333 /* Flush all pending reads and writes to prevent the pending lists
3334 from getting any larger. Insn scheduling runs too slowly when
3335 these lists get long. The number 32 was chosen because it
3336 seems like a reasonable number. When compiling GCC with itself,
3337 this flush occurs 8 times for sparc, and 10 times for m88k using
3338 the number 32. */
3339 flush_pending_lists (insn, 0);
3341 else
3343 rtx u;
3344 rtx pending, pending_mem;
3346 pending = pending_read_insns;
3347 pending_mem = pending_read_mems;
3348 while (pending)
3350 if (anti_dependence (XEXP (pending_mem, 0), dest))
3351 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3353 pending = XEXP (pending, 1);
3354 pending_mem = XEXP (pending_mem, 1);
3357 pending = pending_write_insns;
3358 pending_mem = pending_write_mems;
3359 while (pending)
3361 if (output_dependence (XEXP (pending_mem, 0), dest))
3362 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3364 pending = XEXP (pending, 1);
3365 pending_mem = XEXP (pending_mem, 1);
3368 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3369 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3371 add_insn_mem_dependence (&pending_write_insns, &pending_write_mems,
3372 insn, dest);
3374 sched_analyze_2 (XEXP (dest, 0), insn);
3377 /* Analyze reads. */
3378 if (GET_CODE (x) == SET)
3379 sched_analyze_2 (SET_SRC (x), insn);
3382 /* Analyze the uses of memory and registers in rtx X in INSN. */
3384 static void
3385 sched_analyze_2 (x, insn)
3386 rtx x;
3387 rtx insn;
3389 register int i;
3390 register int j;
3391 register enum rtx_code code;
3392 register const char *fmt;
3394 if (x == 0)
3395 return;
3397 code = GET_CODE (x);
3399 switch (code)
3401 case CONST_INT:
3402 case CONST_DOUBLE:
3403 case SYMBOL_REF:
3404 case CONST:
3405 case LABEL_REF:
3406 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3407 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3408 this does not mean that this insn is using cc0. */
3409 return;
3411 #ifdef HAVE_cc0
3412 case CC0:
3414 rtx link, prev;
3416 /* User of CC0 depends on immediately preceding insn. */
3417 SCHED_GROUP_P (insn) = 1;
3419 /* There may be a note before this insn now, but all notes will
3420 be removed before we actually try to schedule the insns, so
3421 it won't cause a problem later. We must avoid it here though. */
3422 prev = prev_nonnote_insn (insn);
3424 /* Make a copy of all dependencies on the immediately previous insn,
3425 and add to this insn. This is so that all the dependencies will
3426 apply to the group. Remove an explicit dependence on this insn
3427 as SCHED_GROUP_P now represents it. */
3429 if (find_insn_list (prev, LOG_LINKS (insn)))
3430 remove_dependence (insn, prev);
3432 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
3433 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3435 return;
3437 #endif
3439 case REG:
3441 rtx u;
3442 int regno = REGNO (x);
3443 if (regno < FIRST_PSEUDO_REGISTER)
3445 int i;
3447 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3448 while (--i >= 0)
3450 reg_last_uses[regno + i]
3451 = alloc_INSN_LIST (insn, reg_last_uses[regno + i]);
3453 for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
3454 add_dependence (insn, XEXP (u, 0), 0);
3456 /* ??? This should never happen. */
3457 for (u = reg_last_clobbers[regno + i]; u; u = XEXP (u, 1))
3458 add_dependence (insn, XEXP (u, 0), 0);
3460 if ((call_used_regs[regno + i] || global_regs[regno + i]))
3461 /* Function calls clobber all call_used regs. */
3462 for (u = last_function_call; u; u = XEXP (u, 1))
3463 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3466 else
3468 reg_last_uses[regno] = alloc_INSN_LIST (insn,
3469 reg_last_uses[regno]);
3471 for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
3472 add_dependence (insn, XEXP (u, 0), 0);
3474 /* ??? This should never happen. */
3475 for (u = reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3476 add_dependence (insn, XEXP (u, 0), 0);
3478 /* Pseudos that are REG_EQUIV to something may be replaced
3479 by that during reloading. We need only add dependencies for
3480 the address in the REG_EQUIV note. */
3481 if (!reload_completed
3482 && reg_known_equiv_p[regno]
3483 && GET_CODE (reg_known_value[regno]) == MEM)
3484 sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
3486 /* If the register does not already cross any calls, then add this
3487 insn to the sched_before_next_call list so that it will still
3488 not cross calls after scheduling. */
3489 if (REG_N_CALLS_CROSSED (regno) == 0)
3490 add_dependence (sched_before_next_call, insn, REG_DEP_ANTI);
3492 return;
3495 case MEM:
3497 /* Reading memory. */
3498 rtx u;
3499 rtx pending, pending_mem;
3501 pending = pending_read_insns;
3502 pending_mem = pending_read_mems;
3503 while (pending)
3505 if (read_dependence (XEXP (pending_mem, 0), x))
3506 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3508 pending = XEXP (pending, 1);
3509 pending_mem = XEXP (pending_mem, 1);
3512 pending = pending_write_insns;
3513 pending_mem = pending_write_mems;
3514 while (pending)
3516 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3517 x, rtx_varies_p))
3518 add_dependence (insn, XEXP (pending, 0), 0);
3520 pending = XEXP (pending, 1);
3521 pending_mem = XEXP (pending_mem, 1);
3524 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3525 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3527 /* Always add these dependencies to pending_reads, since
3528 this insn may be followed by a write. */
3529 add_insn_mem_dependence (&pending_read_insns, &pending_read_mems,
3530 insn, x);
3532 /* Take advantage of tail recursion here. */
3533 sched_analyze_2 (XEXP (x, 0), insn);
3534 return;
3537 /* Force pending stores to memory in case a trap handler needs them. */
3538 case TRAP_IF:
3539 flush_pending_lists (insn, 1);
3540 break;
3542 case ASM_OPERANDS:
3543 case ASM_INPUT:
3544 case UNSPEC_VOLATILE:
3546 rtx u;
3548 /* Traditional and volatile asm instructions must be considered to use
3549 and clobber all hard registers, all pseudo-registers and all of
3550 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3552 Consider for instance a volatile asm that changes the fpu rounding
3553 mode. An insn should not be moved across this even if it only uses
3554 pseudo-regs because it might give an incorrectly rounded result. */
3555 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3557 int max_reg = max_reg_num ();
3558 for (i = 0; i < max_reg; i++)
3560 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3561 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3562 free_INSN_LIST_list (&reg_last_uses[i]);
3564 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3565 add_dependence (insn, XEXP (u, 0), 0);
3567 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3568 add_dependence (insn, XEXP (u, 0), 0);
3570 reg_pending_sets_all = 1;
3572 flush_pending_lists (insn, 0);
3575 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3576 We can not just fall through here since then we would be confused
3577 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3578 traditional asms unlike their normal usage. */
3580 if (code == ASM_OPERANDS)
3582 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3583 sched_analyze_2 (ASM_OPERANDS_INPUT (x, j), insn);
3584 return;
3586 break;
3589 case PRE_DEC:
3590 case POST_DEC:
3591 case PRE_INC:
3592 case POST_INC:
3593 /* These both read and modify the result. We must handle them as writes
3594 to get proper dependencies for following instructions. We must handle
3595 them as reads to get proper dependencies from this to previous
3596 instructions. Thus we need to pass them to both sched_analyze_1
3597 and sched_analyze_2. We must call sched_analyze_2 first in order
3598 to get the proper antecedent for the read. */
3599 sched_analyze_2 (XEXP (x, 0), insn);
3600 sched_analyze_1 (x, insn);
3601 return;
3603 default:
3604 break;
3607 /* Other cases: walk the insn. */
3608 fmt = GET_RTX_FORMAT (code);
3609 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3611 if (fmt[i] == 'e')
3612 sched_analyze_2 (XEXP (x, i), insn);
3613 else if (fmt[i] == 'E')
3614 for (j = 0; j < XVECLEN (x, i); j++)
3615 sched_analyze_2 (XVECEXP (x, i, j), insn);
3619 /* Analyze an INSN with pattern X to find all dependencies. */
3621 static void
3622 sched_analyze_insn (x, insn, loop_notes)
3623 rtx x, insn;
3624 rtx loop_notes;
3626 register RTX_CODE code = GET_CODE (x);
3627 rtx link;
3628 int maxreg = max_reg_num ();
3629 int i;
3631 if (code == SET || code == CLOBBER)
3632 sched_analyze_1 (x, insn);
3633 else if (code == PARALLEL)
3635 register int i;
3636 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3638 code = GET_CODE (XVECEXP (x, 0, i));
3639 if (code == SET || code == CLOBBER)
3640 sched_analyze_1 (XVECEXP (x, 0, i), insn);
3641 else
3642 sched_analyze_2 (XVECEXP (x, 0, i), insn);
3645 else
3646 sched_analyze_2 (x, insn);
3648 /* Mark registers CLOBBERED or used by called function. */
3649 if (GET_CODE (insn) == CALL_INSN)
3650 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3652 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3653 sched_analyze_1 (XEXP (link, 0), insn);
3654 else
3655 sched_analyze_2 (XEXP (link, 0), insn);
3658 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3659 block, then we must be sure that no instructions are scheduled across it.
3660 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3661 become incorrect. */
3663 if (loop_notes)
3665 int max_reg = max_reg_num ();
3666 int schedule_barrier_found = 0;
3667 rtx link;
3669 /* Update loop_notes with any notes from this insn. Also determine
3670 if any of the notes on the list correspond to instruction scheduling
3671 barriers (loop, eh & setjmp notes, but not range notes. */
3672 link = loop_notes;
3673 while (XEXP (link, 1))
3675 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3676 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3677 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3678 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3679 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3680 schedule_barrier_found = 1;
3682 link = XEXP (link, 1);
3684 XEXP (link, 1) = REG_NOTES (insn);
3685 REG_NOTES (insn) = loop_notes;
3687 /* Add dependencies if a scheduling barrier was found. */
3688 if (schedule_barrier_found)
3690 for (i = 0; i < max_reg; i++)
3692 rtx u;
3693 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3694 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3695 free_INSN_LIST_list (&reg_last_uses[i]);
3697 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3698 add_dependence (insn, XEXP (u, 0), 0);
3700 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3701 add_dependence (insn, XEXP (u, 0), 0);
3703 reg_pending_sets_all = 1;
3705 flush_pending_lists (insn, 0);
3710 /* Accumulate clobbers until the next set so that it will be output dependent
3711 on all of them. At the next set we can clear the clobber list, since
3712 subsequent sets will be output dependent on it. */
3713 EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i,
3715 free_INSN_LIST_list (&reg_last_sets[i]);
3716 free_INSN_LIST_list (&reg_last_clobbers[i]);
3717 reg_last_sets[i]
3718 = alloc_INSN_LIST (insn, NULL_RTX);
3720 EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i,
3722 reg_last_clobbers[i]
3723 = alloc_INSN_LIST (insn,
3724 reg_last_clobbers[i]);
3726 CLEAR_REG_SET (reg_pending_sets);
3727 CLEAR_REG_SET (reg_pending_clobbers);
3729 if (reg_pending_sets_all)
3731 for (i = 0; i < maxreg; i++)
3733 free_INSN_LIST_list (&reg_last_sets[i]);
3734 reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3737 reg_pending_sets_all = 0;
3740 /* Handle function calls and function returns created by the epilogue
3741 threading code. */
3742 if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
3744 rtx dep_insn;
3745 rtx prev_dep_insn;
3747 /* When scheduling instructions, we make sure calls don't lose their
3748 accompanying USE insns by depending them one on another in order.
3750 Also, we must do the same thing for returns created by the epilogue
3751 threading code. Note this code works only in this special case,
3752 because other passes make no guarantee that they will never emit
3753 an instruction between a USE and a RETURN. There is such a guarantee
3754 for USE instructions immediately before a call. */
3756 prev_dep_insn = insn;
3757 dep_insn = PREV_INSN (insn);
3758 while (GET_CODE (dep_insn) == INSN
3759 && GET_CODE (PATTERN (dep_insn)) == USE
3760 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG)
3762 SCHED_GROUP_P (prev_dep_insn) = 1;
3764 /* Make a copy of all dependencies on dep_insn, and add to insn.
3765 This is so that all of the dependencies will apply to the
3766 group. */
3768 for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1))
3769 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3771 prev_dep_insn = dep_insn;
3772 dep_insn = PREV_INSN (dep_insn);
3777 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3778 for every dependency. */
3780 static void
3781 sched_analyze (head, tail)
3782 rtx head, tail;
3784 register rtx insn;
3785 register rtx u;
3786 rtx loop_notes = 0;
3788 for (insn = head;; insn = NEXT_INSN (insn))
3790 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3792 /* Clear out the stale LOG_LINKS from flow. */
3793 free_INSN_LIST_list (&LOG_LINKS (insn));
3795 /* Make each JUMP_INSN a scheduling barrier for memory
3796 references. */
3797 if (GET_CODE (insn) == JUMP_INSN)
3798 last_pending_memory_flush
3799 = alloc_INSN_LIST (insn, last_pending_memory_flush);
3800 sched_analyze_insn (PATTERN (insn), insn, loop_notes);
3801 loop_notes = 0;
3803 else if (GET_CODE (insn) == CALL_INSN)
3805 rtx x;
3806 register int i;
3808 CANT_MOVE (insn) = 1;
3810 /* Clear out the stale LOG_LINKS from flow. */
3811 free_INSN_LIST_list (&LOG_LINKS (insn));
3813 /* Any instruction using a hard register which may get clobbered
3814 by a call needs to be marked as dependent on this call.
3815 This prevents a use of a hard return reg from being moved
3816 past a void call (i.e. it does not explicitly set the hard
3817 return reg). */
3819 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3820 all registers, not just hard registers, may be clobbered by this
3821 call. */
3823 /* Insn, being a CALL_INSN, magically depends on
3824 `last_function_call' already. */
3826 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3827 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3829 int max_reg = max_reg_num ();
3830 for (i = 0; i < max_reg; i++)
3832 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3833 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3834 free_INSN_LIST_list (&reg_last_uses[i]);
3836 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3837 add_dependence (insn, XEXP (u, 0), 0);
3839 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3840 add_dependence (insn, XEXP (u, 0), 0);
3842 reg_pending_sets_all = 1;
3844 /* Add a pair of REG_SAVE_NOTEs which we will later
3845 convert back into a NOTE_INSN_SETJMP note. See
3846 reemit_notes for why we use a pair of NOTEs. */
3847 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3848 GEN_INT (0),
3849 REG_NOTES (insn));
3850 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3851 GEN_INT (NOTE_INSN_SETJMP),
3852 REG_NOTES (insn));
3854 else
3856 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3857 if (call_used_regs[i] || global_regs[i])
3859 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3860 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3862 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3863 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3865 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3869 /* For each insn which shouldn't cross a call, add a dependence
3870 between that insn and this call insn. */
3871 x = LOG_LINKS (sched_before_next_call);
3872 while (x)
3874 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3875 x = XEXP (x, 1);
3877 free_INSN_LIST_list (&LOG_LINKS (sched_before_next_call));
3879 sched_analyze_insn (PATTERN (insn), insn, loop_notes);
3880 loop_notes = 0;
3882 /* In the absence of interprocedural alias analysis, we must flush
3883 all pending reads and writes, and start new dependencies starting
3884 from here. But only flush writes for constant calls (which may
3885 be passed a pointer to something we haven't written yet). */
3886 flush_pending_lists (insn, CONST_CALL_P (insn));
3888 /* Depend this function call (actually, the user of this
3889 function call) on all hard register clobberage. */
3891 /* last_function_call is now a list of insns. */
3892 free_INSN_LIST_list(&last_function_call);
3893 last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
3896 /* See comments on reemit_notes as to why we do this.
3897 ??? Actually, the reemit_notes just say what is done, not why. */
3899 else if (GET_CODE (insn) == NOTE
3900 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_START
3901 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
3903 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
3904 loop_notes);
3905 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3906 GEN_INT (NOTE_LINE_NUMBER (insn)),
3907 loop_notes);
3909 else if (GET_CODE (insn) == NOTE
3910 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
3911 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
3912 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3913 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
3914 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
3915 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
3917 rtx rtx_region;
3919 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3920 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
3921 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
3922 else
3923 rtx_region = GEN_INT (0);
3925 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3926 rtx_region,
3927 loop_notes);
3928 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3929 GEN_INT (NOTE_LINE_NUMBER (insn)),
3930 loop_notes);
3931 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
3934 if (insn == tail)
3935 return;
3937 abort ();
3940 /* Macros and functions for keeping the priority queue sorted, and
3941 dealing with queueing and dequeueing of instructions. */
3943 #define SCHED_SORT(READY, N_READY) \
3944 do { if ((N_READY) == 2) \
3945 swap_sort (READY, N_READY); \
3946 else if ((N_READY) > 2) \
3947 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3948 while (0)
3950 /* Returns a positive value if x is preferred; returns a negative value if
3951 y is preferred. Should never return 0, since that will make the sort
3952 unstable. */
3954 static int
3955 rank_for_schedule (x, y)
3956 const PTR x;
3957 const PTR y;
3959 rtx tmp = *(rtx *)y;
3960 rtx tmp2 = *(rtx *)x;
3961 rtx link;
3962 int tmp_class, tmp2_class, depend_count1, depend_count2;
3963 int val, priority_val, spec_val, prob_val, weight_val;
3966 /* Prefer insn with higher priority. */
3967 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
3968 if (priority_val)
3969 return priority_val;
3971 /* Prefer an insn with smaller contribution to registers-pressure. */
3972 if (!reload_completed &&
3973 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
3974 return (weight_val);
3976 /* Some comparison make sense in interblock scheduling only. */
3977 if (INSN_BB (tmp) != INSN_BB (tmp2))
3979 /* Prefer an inblock motion on an interblock motion. */
3980 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
3981 return 1;
3982 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
3983 return -1;
3985 /* Prefer a useful motion on a speculative one. */
3986 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
3987 return (spec_val);
3989 /* Prefer a more probable (speculative) insn. */
3990 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
3991 if (prob_val)
3992 return (prob_val);
3995 /* Compare insns based on their relation to the last-scheduled-insn. */
3996 if (last_scheduled_insn)
3998 /* Classify the instructions into three classes:
3999 1) Data dependent on last schedule insn.
4000 2) Anti/Output dependent on last scheduled insn.
4001 3) Independent of last scheduled insn, or has latency of one.
4002 Choose the insn from the highest numbered class if different. */
4003 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4004 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4005 tmp_class = 3;
4006 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4007 tmp_class = 1;
4008 else
4009 tmp_class = 2;
4011 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4012 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4013 tmp2_class = 3;
4014 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4015 tmp2_class = 1;
4016 else
4017 tmp2_class = 2;
4019 if ((val = tmp2_class - tmp_class))
4020 return val;
4023 /* Prefer the insn which has more later insns that depend on it.
4024 This gives the scheduler more freedom when scheduling later
4025 instructions at the expense of added register pressure. */
4026 depend_count1 = 0;
4027 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4028 depend_count1++;
4030 depend_count2 = 0;
4031 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4032 depend_count2++;
4034 val = depend_count2 - depend_count1;
4035 if (val)
4036 return val;
4038 /* If insns are equally good, sort by INSN_LUID (original insn order),
4039 so that we make the sort stable. This minimizes instruction movement,
4040 thus minimizing sched's effect on debugging and cross-jumping. */
4041 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4044 /* Resort the array A in which only element at index N may be out of order. */
4046 HAIFA_INLINE static void
4047 swap_sort (a, n)
4048 rtx *a;
4049 int n;
4051 rtx insn = a[n - 1];
4052 int i = n - 2;
4054 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4056 a[i + 1] = a[i];
4057 i -= 1;
4059 a[i + 1] = insn;
4062 static int max_priority;
4064 /* Add INSN to the insn queue so that it can be executed at least
4065 N_CYCLES after the currently executing insn. Preserve insns
4066 chain for debugging purposes. */
4068 HAIFA_INLINE static void
4069 queue_insn (insn, n_cycles)
4070 rtx insn;
4071 int n_cycles;
4073 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4074 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4075 insn_queue[next_q] = link;
4076 q_size += 1;
4078 if (sched_verbose >= 2)
4080 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4082 if (INSN_BB (insn) != target_bb)
4083 fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
4085 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4090 /* PREV is an insn that is ready to execute. Adjust its priority if that
4091 will help shorten or lengthen register lifetimes as appropriate. Also
4092 provide a hook for the target to tweek itself. */
4094 HAIFA_INLINE static void
4095 adjust_priority (prev)
4096 rtx prev ATTRIBUTE_UNUSED;
4098 /* ??? There used to be code here to try and estimate how an insn
4099 affected register lifetimes, but it did it by looking at REG_DEAD
4100 notes, which we removed in schedule_region. Nor did it try to
4101 take into account register pressure or anything useful like that.
4103 Revisit when we have a machine model to work with and not before. */
4105 #ifdef ADJUST_PRIORITY
4106 ADJUST_PRIORITY (prev);
4107 #endif
4110 /* Clock at which the previous instruction was issued. */
4111 static int last_clock_var;
4113 /* INSN is the "currently executing insn". Launch each insn which was
4114 waiting on INSN. READY is a vector of insns which are ready to fire.
4115 N_READY is the number of elements in READY. CLOCK is the current
4116 cycle. */
4118 static int
4119 schedule_insn (insn, ready, n_ready, clock)
4120 rtx insn;
4121 rtx *ready;
4122 int n_ready;
4123 int clock;
4125 rtx link;
4126 int unit;
4128 unit = insn_unit (insn);
4130 if (sched_verbose >= 2)
4132 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4133 INSN_UID (insn));
4134 insn_print_units (insn);
4135 fprintf (dump, "\n");
4138 if (sched_verbose && unit == -1)
4139 visualize_no_unit (insn);
4141 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4142 schedule_unit (unit, insn, clock);
4144 if (INSN_DEPEND (insn) == 0)
4145 return n_ready;
4147 /* This is used by the function adjust_priority above. */
4148 if (n_ready > 0)
4149 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4150 else
4151 max_priority = INSN_PRIORITY (insn);
4153 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4155 rtx next = XEXP (link, 0);
4156 int cost = insn_cost (insn, link, next);
4158 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4160 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4162 int effective_cost = INSN_TICK (next) - clock;
4164 /* For speculative insns, before inserting to ready/queue,
4165 check live, exception-free, and issue-delay. */
4166 if (INSN_BB (next) != target_bb
4167 && (!IS_VALID (INSN_BB (next))
4168 || CANT_MOVE (next)
4169 || (IS_SPECULATIVE_INSN (next)
4170 && (insn_issue_delay (next) > 3
4171 || !check_live (next, INSN_BB (next))
4172 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4173 continue;
4175 if (sched_verbose >= 2)
4177 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4178 INSN_UID (next));
4180 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4181 fprintf (dump, "/b%d ", INSN_BLOCK (next));
4183 if (effective_cost < 1)
4184 fprintf (dump, "into ready\n");
4185 else
4186 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4189 /* Adjust the priority of NEXT and either put it on the ready
4190 list or queue it. */
4191 adjust_priority (next);
4192 if (effective_cost < 1)
4193 ready[n_ready++] = next;
4194 else
4195 queue_insn (next, effective_cost);
4199 /* Annotate the instruction with issue information -- TImode
4200 indicates that the instruction is expected not to be able
4201 to issue on the same cycle as the previous insn. A machine
4202 may use this information to decide how the instruction should
4203 be aligned. */
4204 if (reload_completed && issue_rate > 1)
4206 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4207 last_clock_var = clock;
4210 return n_ready;
4213 /* Functions for handling of notes. */
4215 /* Delete notes beginning with INSN and put them in the chain
4216 of notes ended by NOTE_LIST.
4217 Returns the insn following the notes. */
4219 static rtx
4220 unlink_other_notes (insn, tail)
4221 rtx insn, tail;
4223 rtx prev = PREV_INSN (insn);
4225 while (insn != tail && GET_CODE (insn) == NOTE)
4227 rtx next = NEXT_INSN (insn);
4228 /* Delete the note from its current position. */
4229 if (prev)
4230 NEXT_INSN (prev) = next;
4231 if (next)
4232 PREV_INSN (next) = prev;
4234 /* See sched_analyze to see how these are handled. */
4235 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4236 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4237 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4238 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_START
4239 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4240 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4241 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4243 /* Insert the note at the end of the notes list. */
4244 PREV_INSN (insn) = note_list;
4245 if (note_list)
4246 NEXT_INSN (note_list) = insn;
4247 note_list = insn;
4250 insn = next;
4252 return insn;
4255 /* Delete line notes beginning with INSN. Record line-number notes so
4256 they can be reused. Returns the insn following the notes. */
4258 static rtx
4259 unlink_line_notes (insn, tail)
4260 rtx insn, tail;
4262 rtx prev = PREV_INSN (insn);
4264 while (insn != tail && GET_CODE (insn) == NOTE)
4266 rtx next = NEXT_INSN (insn);
4268 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4270 /* Delete the note from its current position. */
4271 if (prev)
4272 NEXT_INSN (prev) = next;
4273 if (next)
4274 PREV_INSN (next) = prev;
4276 /* Record line-number notes so they can be reused. */
4277 LINE_NOTE (insn) = insn;
4279 else
4280 prev = insn;
4282 insn = next;
4284 return insn;
4287 /* Return the head and tail pointers of BB. */
4289 HAIFA_INLINE static void
4290 get_block_head_tail (bb, headp, tailp)
4291 int bb;
4292 rtx *headp;
4293 rtx *tailp;
4296 rtx head;
4297 rtx tail;
4298 int b;
4300 b = BB_TO_BLOCK (bb);
4302 /* HEAD and TAIL delimit the basic block being scheduled. */
4303 head = BLOCK_HEAD (b);
4304 tail = BLOCK_END (b);
4306 /* Don't include any notes or labels at the beginning of the
4307 basic block, or notes at the ends of basic blocks. */
4308 while (head != tail)
4310 if (GET_CODE (head) == NOTE)
4311 head = NEXT_INSN (head);
4312 else if (GET_CODE (tail) == NOTE)
4313 tail = PREV_INSN (tail);
4314 else if (GET_CODE (head) == CODE_LABEL)
4315 head = NEXT_INSN (head);
4316 else
4317 break;
4320 *headp = head;
4321 *tailp = tail;
4324 /* Delete line notes from bb. Save them so they can be later restored
4325 (in restore_line_notes ()). */
4327 static void
4328 rm_line_notes (bb)
4329 int bb;
4331 rtx next_tail;
4332 rtx tail;
4333 rtx head;
4334 rtx insn;
4336 get_block_head_tail (bb, &head, &tail);
4338 if (head == tail
4339 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4340 return;
4342 next_tail = NEXT_INSN (tail);
4343 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4345 rtx prev;
4347 /* Farm out notes, and maybe save them in NOTE_LIST.
4348 This is needed to keep the debugger from
4349 getting completely deranged. */
4350 if (GET_CODE (insn) == NOTE)
4352 prev = insn;
4353 insn = unlink_line_notes (insn, next_tail);
4355 if (prev == tail)
4356 abort ();
4357 if (prev == head)
4358 abort ();
4359 if (insn == next_tail)
4360 abort ();
4365 /* Save line number notes for each insn in bb. */
4367 static void
4368 save_line_notes (bb)
4369 int bb;
4371 rtx head, tail;
4372 rtx next_tail;
4374 /* We must use the true line number for the first insn in the block
4375 that was computed and saved at the start of this pass. We can't
4376 use the current line number, because scheduling of the previous
4377 block may have changed the current line number. */
4379 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4380 rtx insn;
4382 get_block_head_tail (bb, &head, &tail);
4383 next_tail = NEXT_INSN (tail);
4385 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4386 insn != next_tail;
4387 insn = NEXT_INSN (insn))
4388 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4389 line = insn;
4390 else
4391 LINE_NOTE (insn) = line;
4395 /* After bb was scheduled, insert line notes into the insns list. */
4397 static void
4398 restore_line_notes (bb)
4399 int bb;
4401 rtx line, note, prev, new;
4402 int added_notes = 0;
4403 int b;
4404 rtx head, next_tail, insn;
4406 b = BB_TO_BLOCK (bb);
4408 head = BLOCK_HEAD (b);
4409 next_tail = NEXT_INSN (BLOCK_END (b));
4411 /* Determine the current line-number. We want to know the current
4412 line number of the first insn of the block here, in case it is
4413 different from the true line number that was saved earlier. If
4414 different, then we need a line number note before the first insn
4415 of this block. If it happens to be the same, then we don't want to
4416 emit another line number note here. */
4417 for (line = head; line; line = PREV_INSN (line))
4418 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4419 break;
4421 /* Walk the insns keeping track of the current line-number and inserting
4422 the line-number notes as needed. */
4423 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4424 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4425 line = insn;
4426 /* This used to emit line number notes before every non-deleted note.
4427 However, this confuses a debugger, because line notes not separated
4428 by real instructions all end up at the same address. I can find no
4429 use for line number notes before other notes, so none are emitted. */
4430 else if (GET_CODE (insn) != NOTE
4431 && (note = LINE_NOTE (insn)) != 0
4432 && note != line
4433 && (line == 0
4434 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4435 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4437 line = note;
4438 prev = PREV_INSN (insn);
4439 if (LINE_NOTE (note))
4441 /* Re-use the original line-number note. */
4442 LINE_NOTE (note) = 0;
4443 PREV_INSN (note) = prev;
4444 NEXT_INSN (prev) = note;
4445 PREV_INSN (insn) = note;
4446 NEXT_INSN (note) = insn;
4448 else
4450 added_notes++;
4451 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4452 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4453 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4456 if (sched_verbose && added_notes)
4457 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4460 /* After scheduling the function, delete redundant line notes from the
4461 insns list. */
4463 static void
4464 rm_redundant_line_notes ()
4466 rtx line = 0;
4467 rtx insn = get_insns ();
4468 int active_insn = 0;
4469 int notes = 0;
4471 /* Walk the insns deleting redundant line-number notes. Many of these
4472 are already present. The remainder tend to occur at basic
4473 block boundaries. */
4474 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4475 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4477 /* If there are no active insns following, INSN is redundant. */
4478 if (active_insn == 0)
4480 notes++;
4481 NOTE_SOURCE_FILE (insn) = 0;
4482 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4484 /* If the line number is unchanged, LINE is redundant. */
4485 else if (line
4486 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4487 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4489 notes++;
4490 NOTE_SOURCE_FILE (line) = 0;
4491 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4492 line = insn;
4494 else
4495 line = insn;
4496 active_insn = 0;
4498 else if (!((GET_CODE (insn) == NOTE
4499 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4500 || (GET_CODE (insn) == INSN
4501 && (GET_CODE (PATTERN (insn)) == USE
4502 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4503 active_insn++;
4505 if (sched_verbose && notes)
4506 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4509 /* Delete notes between head and tail and put them in the chain
4510 of notes ended by NOTE_LIST. */
4512 static void
4513 rm_other_notes (head, tail)
4514 rtx head;
4515 rtx tail;
4517 rtx next_tail;
4518 rtx insn;
4520 if (head == tail
4521 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4522 return;
4524 next_tail = NEXT_INSN (tail);
4525 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4527 rtx prev;
4529 /* Farm out notes, and maybe save them in NOTE_LIST.
4530 This is needed to keep the debugger from
4531 getting completely deranged. */
4532 if (GET_CODE (insn) == NOTE)
4534 prev = insn;
4536 insn = unlink_other_notes (insn, next_tail);
4538 if (prev == tail)
4539 abort ();
4540 if (prev == head)
4541 abort ();
4542 if (insn == next_tail)
4543 abort ();
4548 /* Functions for computation of registers live/usage info. */
4550 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4552 static void
4553 find_insn_reg_weight (bb)
4554 int bb;
4556 rtx insn, next_tail, head, tail;
4558 get_block_head_tail (bb, &head, &tail);
4559 next_tail = NEXT_INSN (tail);
4561 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4563 int reg_weight = 0;
4564 rtx x;
4566 /* Handle register life information. */
4567 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4568 continue;
4570 /* Increment weight for each register born here. */
4571 x = PATTERN (insn);
4572 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4573 && register_operand (SET_DEST (x), VOIDmode))
4574 reg_weight++;
4575 else if (GET_CODE (x) == PARALLEL)
4577 int j;
4578 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4580 x = XVECEXP (PATTERN (insn), 0, j);
4581 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4582 && register_operand (SET_DEST (x), VOIDmode))
4583 reg_weight++;
4587 /* Decrement weight for each register that dies here. */
4588 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4590 if (REG_NOTE_KIND (x) == REG_DEAD
4591 || REG_NOTE_KIND (x) == REG_UNUSED)
4592 reg_weight--;
4595 INSN_REG_WEIGHT (insn) = reg_weight;
4599 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4600 static int clock_var;
4602 /* Move insns that became ready to fire from queue to ready list. */
4604 static int
4605 queue_to_ready (ready, n_ready)
4606 rtx ready[];
4607 int n_ready;
4609 rtx insn;
4610 rtx link;
4612 q_ptr = NEXT_Q (q_ptr);
4614 /* Add all pending insns that can be scheduled without stalls to the
4615 ready list. */
4616 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4619 insn = XEXP (link, 0);
4620 q_size -= 1;
4622 if (sched_verbose >= 2)
4623 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4625 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4626 fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
4628 ready[n_ready++] = insn;
4629 if (sched_verbose >= 2)
4630 fprintf (dump, "moving to ready without stalls\n");
4632 insn_queue[q_ptr] = 0;
4634 /* If there are no ready insns, stall until one is ready and add all
4635 of the pending insns at that point to the ready list. */
4636 if (n_ready == 0)
4638 register int stalls;
4640 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4642 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4644 for (; link; link = XEXP (link, 1))
4646 insn = XEXP (link, 0);
4647 q_size -= 1;
4649 if (sched_verbose >= 2)
4650 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4652 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4653 fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
4655 ready[n_ready++] = insn;
4656 if (sched_verbose >= 2)
4657 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4659 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4661 if (n_ready)
4662 break;
4666 if (sched_verbose && stalls)
4667 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4668 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4669 clock_var += stalls;
4671 return n_ready;
4674 /* Print the ready list for debugging purposes. Callable from debugger. */
4676 static void
4677 debug_ready_list (ready, n_ready)
4678 rtx ready[];
4679 int n_ready;
4681 int i;
4683 for (i = 0; i < n_ready; i++)
4685 fprintf (dump, " %d", INSN_UID (ready[i]));
4686 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4687 fprintf (dump, "/b%d", INSN_BLOCK (ready[i]));
4689 fprintf (dump, "\n");
4692 /* Print names of units on which insn can/should execute, for debugging. */
4694 static void
4695 insn_print_units (insn)
4696 rtx insn;
4698 int i;
4699 int unit = insn_unit (insn);
4701 if (unit == -1)
4702 fprintf (dump, "none");
4703 else if (unit >= 0)
4704 fprintf (dump, "%s", function_units[unit].name);
4705 else
4707 fprintf (dump, "[");
4708 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4709 if (unit & 1)
4711 fprintf (dump, "%s", function_units[i].name);
4712 if (unit != 1)
4713 fprintf (dump, " ");
4715 fprintf (dump, "]");
4719 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4720 of a basic block. If more lines are needed, table is splitted to two.
4721 n_visual_lines is the number of lines printed so far for a block.
4722 visual_tbl contains the block visualization info.
4723 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4724 #define MAX_VISUAL_LINES 100
4725 #define INSN_LEN 30
4726 int n_visual_lines;
4727 char *visual_tbl;
4728 int n_vis_no_unit;
4729 rtx vis_no_unit[10];
4731 /* Finds units that are in use in this fuction. Required only
4732 for visualization. */
4734 static void
4735 init_target_units ()
4737 rtx insn;
4738 int unit;
4740 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4742 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4743 continue;
4745 unit = insn_unit (insn);
4747 if (unit < 0)
4748 target_units |= ~unit;
4749 else
4750 target_units |= (1 << unit);
4754 /* Return the length of the visualization table. */
4756 static int
4757 get_visual_tbl_length ()
4759 int unit, i;
4760 int n, n1;
4761 char *s;
4763 /* Compute length of one field in line. */
4764 s = (char *) alloca (INSN_LEN + 6);
4765 sprintf (s, " %33s", "uname");
4766 n1 = strlen (s);
4768 /* Compute length of one line. */
4769 n = strlen (";; ");
4770 n += n1;
4771 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4772 if (function_units[unit].bitmask & target_units)
4773 for (i = 0; i < function_units[unit].multiplicity; i++)
4774 n += n1;
4775 n += n1;
4776 n += strlen ("\n") + 2;
4778 /* Compute length of visualization string. */
4779 return (MAX_VISUAL_LINES * n);
4782 /* Init block visualization debugging info. */
4784 static void
4785 init_block_visualization ()
4787 strcpy (visual_tbl, "");
4788 n_visual_lines = 0;
4789 n_vis_no_unit = 0;
4792 #define BUF_LEN 256
4794 static char *
4795 safe_concat (buf, cur, str)
4796 char *buf;
4797 char *cur;
4798 const char *str;
4800 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4801 int c;
4803 if (cur > end)
4805 *end = '\0';
4806 return end;
4809 while (cur < end && (c = *str++) != '\0')
4810 *cur++ = c;
4812 *cur = '\0';
4813 return cur;
4816 /* This recognizes rtx, I classified as expressions. These are always
4817 represent some action on values or results of other expression, that
4818 may be stored in objects representing values. */
4820 static void
4821 print_exp (buf, x, verbose)
4822 char *buf;
4823 rtx x;
4824 int verbose;
4826 char tmp[BUF_LEN];
4827 const char *st[4];
4828 char *cur = buf;
4829 const char *fun = (char *)0;
4830 const char *sep;
4831 rtx op[4];
4832 int i;
4834 for (i = 0; i < 4; i++)
4836 st[i] = (char *)0;
4837 op[i] = NULL_RTX;
4840 switch (GET_CODE (x))
4842 case PLUS:
4843 op[0] = XEXP (x, 0);
4844 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4845 && INTVAL (XEXP (x, 1)) < 0)
4847 st[1] = "-";
4848 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4850 else
4852 st[1] = "+";
4853 op[1] = XEXP (x, 1);
4855 break;
4856 case LO_SUM:
4857 op[0] = XEXP (x, 0);
4858 st[1] = "+low(";
4859 op[1] = XEXP (x, 1);
4860 st[2] = ")";
4861 break;
4862 case MINUS:
4863 op[0] = XEXP (x, 0);
4864 st[1] = "-";
4865 op[1] = XEXP (x, 1);
4866 break;
4867 case COMPARE:
4868 fun = "cmp";
4869 op[0] = XEXP (x, 0);
4870 op[1] = XEXP (x, 1);
4871 break;
4872 case NEG:
4873 st[0] = "-";
4874 op[0] = XEXP (x, 0);
4875 break;
4876 case MULT:
4877 op[0] = XEXP (x, 0);
4878 st[1] = "*";
4879 op[1] = XEXP (x, 1);
4880 break;
4881 case DIV:
4882 op[0] = XEXP (x, 0);
4883 st[1] = "/";
4884 op[1] = XEXP (x, 1);
4885 break;
4886 case UDIV:
4887 fun = "udiv";
4888 op[0] = XEXP (x, 0);
4889 op[1] = XEXP (x, 1);
4890 break;
4891 case MOD:
4892 op[0] = XEXP (x, 0);
4893 st[1] = "%";
4894 op[1] = XEXP (x, 1);
4895 break;
4896 case UMOD:
4897 fun = "umod";
4898 op[0] = XEXP (x, 0);
4899 op[1] = XEXP (x, 1);
4900 break;
4901 case SMIN:
4902 fun = "smin";
4903 op[0] = XEXP (x, 0);
4904 op[1] = XEXP (x, 1);
4905 break;
4906 case SMAX:
4907 fun = "smax";
4908 op[0] = XEXP (x, 0);
4909 op[1] = XEXP (x, 1);
4910 break;
4911 case UMIN:
4912 fun = "umin";
4913 op[0] = XEXP (x, 0);
4914 op[1] = XEXP (x, 1);
4915 break;
4916 case UMAX:
4917 fun = "umax";
4918 op[0] = XEXP (x, 0);
4919 op[1] = XEXP (x, 1);
4920 break;
4921 case NOT:
4922 st[0] = "!";
4923 op[0] = XEXP (x, 0);
4924 break;
4925 case AND:
4926 op[0] = XEXP (x, 0);
4927 st[1] = "&";
4928 op[1] = XEXP (x, 1);
4929 break;
4930 case IOR:
4931 op[0] = XEXP (x, 0);
4932 st[1] = "|";
4933 op[1] = XEXP (x, 1);
4934 break;
4935 case XOR:
4936 op[0] = XEXP (x, 0);
4937 st[1] = "^";
4938 op[1] = XEXP (x, 1);
4939 break;
4940 case ASHIFT:
4941 op[0] = XEXP (x, 0);
4942 st[1] = "<<";
4943 op[1] = XEXP (x, 1);
4944 break;
4945 case LSHIFTRT:
4946 op[0] = XEXP (x, 0);
4947 st[1] = " 0>>";
4948 op[1] = XEXP (x, 1);
4949 break;
4950 case ASHIFTRT:
4951 op[0] = XEXP (x, 0);
4952 st[1] = ">>";
4953 op[1] = XEXP (x, 1);
4954 break;
4955 case ROTATE:
4956 op[0] = XEXP (x, 0);
4957 st[1] = "<-<";
4958 op[1] = XEXP (x, 1);
4959 break;
4960 case ROTATERT:
4961 op[0] = XEXP (x, 0);
4962 st[1] = ">->";
4963 op[1] = XEXP (x, 1);
4964 break;
4965 case ABS:
4966 fun = "abs";
4967 op[0] = XEXP (x, 0);
4968 break;
4969 case SQRT:
4970 fun = "sqrt";
4971 op[0] = XEXP (x, 0);
4972 break;
4973 case FFS:
4974 fun = "ffs";
4975 op[0] = XEXP (x, 0);
4976 break;
4977 case EQ:
4978 op[0] = XEXP (x, 0);
4979 st[1] = "==";
4980 op[1] = XEXP (x, 1);
4981 break;
4982 case NE:
4983 op[0] = XEXP (x, 0);
4984 st[1] = "!=";
4985 op[1] = XEXP (x, 1);
4986 break;
4987 case GT:
4988 op[0] = XEXP (x, 0);
4989 st[1] = ">";
4990 op[1] = XEXP (x, 1);
4991 break;
4992 case GTU:
4993 fun = "gtu";
4994 op[0] = XEXP (x, 0);
4995 op[1] = XEXP (x, 1);
4996 break;
4997 case LT:
4998 op[0] = XEXP (x, 0);
4999 st[1] = "<";
5000 op[1] = XEXP (x, 1);
5001 break;
5002 case LTU:
5003 fun = "ltu";
5004 op[0] = XEXP (x, 0);
5005 op[1] = XEXP (x, 1);
5006 break;
5007 case GE:
5008 op[0] = XEXP (x, 0);
5009 st[1] = ">=";
5010 op[1] = XEXP (x, 1);
5011 break;
5012 case GEU:
5013 fun = "geu";
5014 op[0] = XEXP (x, 0);
5015 op[1] = XEXP (x, 1);
5016 break;
5017 case LE:
5018 op[0] = XEXP (x, 0);
5019 st[1] = "<=";
5020 op[1] = XEXP (x, 1);
5021 break;
5022 case LEU:
5023 fun = "leu";
5024 op[0] = XEXP (x, 0);
5025 op[1] = XEXP (x, 1);
5026 break;
5027 case SIGN_EXTRACT:
5028 fun = (verbose) ? "sign_extract" : "sxt";
5029 op[0] = XEXP (x, 0);
5030 op[1] = XEXP (x, 1);
5031 op[2] = XEXP (x, 2);
5032 break;
5033 case ZERO_EXTRACT:
5034 fun = (verbose) ? "zero_extract" : "zxt";
5035 op[0] = XEXP (x, 0);
5036 op[1] = XEXP (x, 1);
5037 op[2] = XEXP (x, 2);
5038 break;
5039 case SIGN_EXTEND:
5040 fun = (verbose) ? "sign_extend" : "sxn";
5041 op[0] = XEXP (x, 0);
5042 break;
5043 case ZERO_EXTEND:
5044 fun = (verbose) ? "zero_extend" : "zxn";
5045 op[0] = XEXP (x, 0);
5046 break;
5047 case FLOAT_EXTEND:
5048 fun = (verbose) ? "float_extend" : "fxn";
5049 op[0] = XEXP (x, 0);
5050 break;
5051 case TRUNCATE:
5052 fun = (verbose) ? "trunc" : "trn";
5053 op[0] = XEXP (x, 0);
5054 break;
5055 case FLOAT_TRUNCATE:
5056 fun = (verbose) ? "float_trunc" : "ftr";
5057 op[0] = XEXP (x, 0);
5058 break;
5059 case FLOAT:
5060 fun = (verbose) ? "float" : "flt";
5061 op[0] = XEXP (x, 0);
5062 break;
5063 case UNSIGNED_FLOAT:
5064 fun = (verbose) ? "uns_float" : "ufl";
5065 op[0] = XEXP (x, 0);
5066 break;
5067 case FIX:
5068 fun = "fix";
5069 op[0] = XEXP (x, 0);
5070 break;
5071 case UNSIGNED_FIX:
5072 fun = (verbose) ? "uns_fix" : "ufx";
5073 op[0] = XEXP (x, 0);
5074 break;
5075 case PRE_DEC:
5076 st[0] = "--";
5077 op[0] = XEXP (x, 0);
5078 break;
5079 case PRE_INC:
5080 st[0] = "++";
5081 op[0] = XEXP (x, 0);
5082 break;
5083 case POST_DEC:
5084 op[0] = XEXP (x, 0);
5085 st[1] = "--";
5086 break;
5087 case POST_INC:
5088 op[0] = XEXP (x, 0);
5089 st[1] = "++";
5090 break;
5091 case CALL:
5092 st[0] = "call ";
5093 op[0] = XEXP (x, 0);
5094 if (verbose)
5096 st[1] = " argc:";
5097 op[1] = XEXP (x, 1);
5099 break;
5100 case IF_THEN_ELSE:
5101 st[0] = "{(";
5102 op[0] = XEXP (x, 0);
5103 st[1] = ")?";
5104 op[1] = XEXP (x, 1);
5105 st[2] = ":";
5106 op[2] = XEXP (x, 2);
5107 st[3] = "}";
5108 break;
5109 case TRAP_IF:
5110 fun = "trap_if";
5111 op[0] = TRAP_CONDITION (x);
5112 break;
5113 case UNSPEC:
5114 case UNSPEC_VOLATILE:
5116 cur = safe_concat (buf, cur, "unspec");
5117 if (GET_CODE (x) == UNSPEC_VOLATILE)
5118 cur = safe_concat (buf, cur, "/v");
5119 cur = safe_concat (buf, cur, "[");
5120 sep = "";
5121 for (i = 0; i < XVECLEN (x, 0); i++)
5123 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5124 cur = safe_concat (buf, cur, sep);
5125 cur = safe_concat (buf, cur, tmp);
5126 sep = ",";
5128 cur = safe_concat (buf, cur, "] ");
5129 sprintf (tmp, "%d", XINT (x, 1));
5130 cur = safe_concat (buf, cur, tmp);
5132 break;
5133 default:
5134 /* If (verbose) debug_rtx (x); */
5135 st[0] = GET_RTX_NAME (GET_CODE (x));
5136 break;
5139 /* Print this as a function? */
5140 if (fun)
5142 cur = safe_concat (buf, cur, fun);
5143 cur = safe_concat (buf, cur, "(");
5146 for (i = 0; i < 4; i++)
5148 if (st[i])
5149 cur = safe_concat (buf, cur, st[i]);
5151 if (op[i])
5153 if (fun && i != 0)
5154 cur = safe_concat (buf, cur, ",");
5156 print_value (tmp, op[i], verbose);
5157 cur = safe_concat (buf, cur, tmp);
5161 if (fun)
5162 cur = safe_concat (buf, cur, ")");
5163 } /* print_exp */
5165 /* Prints rtxes, I customly classified as values. They're constants,
5166 registers, labels, symbols and memory accesses. */
5168 static void
5169 print_value (buf, x, verbose)
5170 char *buf;
5171 rtx x;
5172 int verbose;
5174 char t[BUF_LEN];
5175 char *cur = buf;
5177 switch (GET_CODE (x))
5179 case CONST_INT:
5180 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5181 cur = safe_concat (buf, cur, t);
5182 break;
5183 case CONST_DOUBLE:
5184 sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
5185 cur = safe_concat (buf, cur, t);
5186 break;
5187 case CONST_STRING:
5188 cur = safe_concat (buf, cur, "\"");
5189 cur = safe_concat (buf, cur, XSTR (x, 0));
5190 cur = safe_concat (buf, cur, "\"");
5191 break;
5192 case SYMBOL_REF:
5193 cur = safe_concat (buf, cur, "`");
5194 cur = safe_concat (buf, cur, XSTR (x, 0));
5195 cur = safe_concat (buf, cur, "'");
5196 break;
5197 case LABEL_REF:
5198 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5199 cur = safe_concat (buf, cur, t);
5200 break;
5201 case CONST:
5202 print_value (t, XEXP (x, 0), verbose);
5203 cur = safe_concat (buf, cur, "const(");
5204 cur = safe_concat (buf, cur, t);
5205 cur = safe_concat (buf, cur, ")");
5206 break;
5207 case HIGH:
5208 print_value (t, XEXP (x, 0), verbose);
5209 cur = safe_concat (buf, cur, "high(");
5210 cur = safe_concat (buf, cur, t);
5211 cur = safe_concat (buf, cur, ")");
5212 break;
5213 case REG:
5214 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5216 int c = reg_names[ REGNO (x) ][0];
5217 if (c >= '0' && c <= '9')
5218 cur = safe_concat (buf, cur, "%");
5220 cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
5222 else
5224 sprintf (t, "r%d", REGNO (x));
5225 cur = safe_concat (buf, cur, t);
5227 break;
5228 case SUBREG:
5229 print_value (t, SUBREG_REG (x), verbose);
5230 cur = safe_concat (buf, cur, t);
5231 sprintf (t, "#%d", SUBREG_WORD (x));
5232 cur = safe_concat (buf, cur, t);
5233 break;
5234 case SCRATCH:
5235 cur = safe_concat (buf, cur, "scratch");
5236 break;
5237 case CC0:
5238 cur = safe_concat (buf, cur, "cc0");
5239 break;
5240 case PC:
5241 cur = safe_concat (buf, cur, "pc");
5242 break;
5243 case MEM:
5244 print_value (t, XEXP (x, 0), verbose);
5245 cur = safe_concat (buf, cur, "[");
5246 cur = safe_concat (buf, cur, t);
5247 cur = safe_concat (buf, cur, "]");
5248 break;
5249 default:
5250 print_exp (t, x, verbose);
5251 cur = safe_concat (buf, cur, t);
5252 break;
5254 } /* print_value */
5256 /* The next step in insn detalization, its pattern recognition. */
5258 static void
5259 print_pattern (buf, x, verbose)
5260 char *buf;
5261 rtx x;
5262 int verbose;
5264 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5266 switch (GET_CODE (x))
5268 case SET:
5269 print_value (t1, SET_DEST (x), verbose);
5270 print_value (t2, SET_SRC (x), verbose);
5271 sprintf (buf, "%s=%s", t1, t2);
5272 break;
5273 case RETURN:
5274 sprintf (buf, "return");
5275 break;
5276 case CALL:
5277 print_exp (buf, x, verbose);
5278 break;
5279 case CLOBBER:
5280 print_value (t1, XEXP (x, 0), verbose);
5281 sprintf (buf, "clobber %s", t1);
5282 break;
5283 case USE:
5284 print_value (t1, XEXP (x, 0), verbose);
5285 sprintf (buf, "use %s", t1);
5286 break;
5287 case PARALLEL:
5289 int i;
5291 sprintf (t1, "{");
5292 for (i = 0; i < XVECLEN (x, 0); i++)
5294 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5295 sprintf (t3, "%s%s;", t1, t2);
5296 strcpy (t1, t3);
5298 sprintf (buf, "%s}", t1);
5300 break;
5301 case SEQUENCE:
5303 int i;
5305 sprintf (t1, "%%{");
5306 for (i = 0; i < XVECLEN (x, 0); i++)
5308 print_insn (t2, XVECEXP (x, 0, i), verbose);
5309 sprintf (t3, "%s%s;", t1, t2);
5310 strcpy (t1, t3);
5312 sprintf (buf, "%s%%}", t1);
5314 break;
5315 case ASM_INPUT:
5316 sprintf (buf, "asm {%s}", XSTR (x, 0));
5317 break;
5318 case ADDR_VEC:
5319 break;
5320 case ADDR_DIFF_VEC:
5321 print_value (buf, XEXP (x, 0), verbose);
5322 break;
5323 case TRAP_IF:
5324 print_value (t1, TRAP_CONDITION (x), verbose);
5325 sprintf (buf, "trap_if %s", t1);
5326 break;
5327 case UNSPEC:
5329 int i;
5331 sprintf (t1, "unspec{");
5332 for (i = 0; i < XVECLEN (x, 0); i++)
5334 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5335 sprintf (t3, "%s%s;", t1, t2);
5336 strcpy (t1, t3);
5338 sprintf (buf, "%s}", t1);
5340 break;
5341 case UNSPEC_VOLATILE:
5343 int i;
5345 sprintf (t1, "unspec/v{");
5346 for (i = 0; i < XVECLEN (x, 0); i++)
5348 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5349 sprintf (t3, "%s%s;", t1, t2);
5350 strcpy (t1, t3);
5352 sprintf (buf, "%s}", t1);
5354 break;
5355 default:
5356 print_value (buf, x, verbose);
5358 } /* print_pattern */
5360 /* This is the main function in rtl visualization mechanism. It
5361 accepts an rtx and tries to recognize it as an insn, then prints it
5362 properly in human readable form, resembling assembler mnemonics.
5363 For every insn it prints its UID and BB the insn belongs too.
5364 (Probably the last "option" should be extended somehow, since it
5365 depends now on sched.c inner variables ...) */
5367 static void
5368 print_insn (buf, x, verbose)
5369 char *buf;
5370 rtx x;
5371 int verbose;
5373 char t[BUF_LEN];
5374 rtx insn = x;
5376 switch (GET_CODE (x))
5378 case INSN:
5379 print_pattern (t, PATTERN (x), verbose);
5380 if (verbose)
5381 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5382 INSN_UID (x), t);
5383 else
5384 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5385 break;
5386 case JUMP_INSN:
5387 print_pattern (t, PATTERN (x), verbose);
5388 if (verbose)
5389 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5390 INSN_UID (x), t);
5391 else
5392 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5393 break;
5394 case CALL_INSN:
5395 x = PATTERN (insn);
5396 if (GET_CODE (x) == PARALLEL)
5398 x = XVECEXP (x, 0, 0);
5399 print_pattern (t, x, verbose);
5401 else
5402 strcpy (t, "call <...>");
5403 if (verbose)
5404 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5405 INSN_UID (insn), t);
5406 else
5407 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5408 break;
5409 case CODE_LABEL:
5410 sprintf (buf, "L%d:", INSN_UID (x));
5411 break;
5412 case BARRIER:
5413 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5414 break;
5415 case NOTE:
5416 if (NOTE_LINE_NUMBER (x) > 0)
5417 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5418 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5419 else
5420 sprintf (buf, "%4d %s", INSN_UID (x),
5421 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5422 break;
5423 default:
5424 if (verbose)
5426 sprintf (buf, "Not an INSN at all\n");
5427 debug_rtx (x);
5429 else
5430 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5432 } /* print_insn */
5434 /* Print visualization debugging info. */
5436 static void
5437 print_block_visualization (b, s)
5438 int b;
5439 const char *s;
5441 int unit, i;
5443 /* Print header. */
5444 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5446 /* Print names of units. */
5447 fprintf (dump, ";; %-8s", "clock");
5448 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5449 if (function_units[unit].bitmask & target_units)
5450 for (i = 0; i < function_units[unit].multiplicity; i++)
5451 fprintf (dump, " %-33s", function_units[unit].name);
5452 fprintf (dump, " %-8s\n", "no-unit");
5454 fprintf (dump, ";; %-8s", "=====");
5455 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5456 if (function_units[unit].bitmask & target_units)
5457 for (i = 0; i < function_units[unit].multiplicity; i++)
5458 fprintf (dump, " %-33s", "==============================");
5459 fprintf (dump, " %-8s\n", "=======");
5461 /* Print insns in each cycle. */
5462 fprintf (dump, "%s\n", visual_tbl);
5465 /* Print insns in the 'no_unit' column of visualization. */
5467 static void
5468 visualize_no_unit (insn)
5469 rtx insn;
5471 vis_no_unit[n_vis_no_unit] = insn;
5472 n_vis_no_unit++;
5475 /* Print insns scheduled in clock, for visualization. */
5477 static void
5478 visualize_scheduled_insns (b, clock)
5479 int b, clock;
5481 int i, unit;
5483 /* If no more room, split table into two. */
5484 if (n_visual_lines >= MAX_VISUAL_LINES)
5486 print_block_visualization (b, "(incomplete)");
5487 init_block_visualization ();
5490 n_visual_lines++;
5492 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5493 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5494 if (function_units[unit].bitmask & target_units)
5495 for (i = 0; i < function_units[unit].multiplicity; i++)
5497 int instance = unit + i * FUNCTION_UNITS_SIZE;
5498 rtx insn = unit_last_insn[instance];
5500 /* Print insns that still keep the unit busy. */
5501 if (insn &&
5502 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5504 char str[BUF_LEN];
5505 print_insn (str, insn, 0);
5506 str[INSN_LEN] = '\0';
5507 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5509 else
5510 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5513 /* Print insns that are not assigned to any unit. */
5514 for (i = 0; i < n_vis_no_unit; i++)
5515 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5516 INSN_UID (vis_no_unit[i]));
5517 n_vis_no_unit = 0;
5519 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5522 /* Print stalled cycles. */
5524 static void
5525 visualize_stall_cycles (b, stalls)
5526 int b, stalls;
5528 int i;
5530 /* If no more room, split table into two. */
5531 if (n_visual_lines >= MAX_VISUAL_LINES)
5533 print_block_visualization (b, "(incomplete)");
5534 init_block_visualization ();
5537 n_visual_lines++;
5539 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5540 for (i = 0; i < stalls; i++)
5541 sprintf (visual_tbl + strlen (visual_tbl), ".");
5542 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5545 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5547 static rtx
5548 move_insn1 (insn, last)
5549 rtx insn, last;
5551 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5552 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5554 NEXT_INSN (insn) = NEXT_INSN (last);
5555 PREV_INSN (NEXT_INSN (last)) = insn;
5557 NEXT_INSN (last) = insn;
5558 PREV_INSN (insn) = last;
5560 return insn;
5563 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5564 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5565 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5566 saved value for NOTE_BLOCK_NUMBER which is useful for
5567 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5568 output by the instruction scheduler. Return the new value of LAST. */
5570 static rtx
5571 reemit_notes (insn, last)
5572 rtx insn;
5573 rtx last;
5575 rtx note, retval;
5577 retval = last;
5578 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5580 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5582 int note_type = INTVAL (XEXP (note, 0));
5583 if (note_type == NOTE_INSN_SETJMP)
5585 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5586 CONST_CALL_P (retval) = CONST_CALL_P (note);
5587 remove_note (insn, note);
5588 note = XEXP (note, 1);
5590 else if (note_type == NOTE_INSN_RANGE_START
5591 || note_type == NOTE_INSN_RANGE_END)
5593 last = emit_note_before (note_type, last);
5594 remove_note (insn, note);
5595 note = XEXP (note, 1);
5596 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5598 else
5600 last = emit_note_before (note_type, last);
5601 remove_note (insn, note);
5602 note = XEXP (note, 1);
5603 if (note_type == NOTE_INSN_EH_REGION_BEG
5604 || note_type == NOTE_INSN_EH_REGION_END)
5605 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5607 remove_note (insn, note);
5610 return retval;
5613 /* Move INSN, and all insns which should be issued before it,
5614 due to SCHED_GROUP_P flag. Reemit notes if needed.
5616 Return the last insn emitted by the scheduler, which is the
5617 return value from the first call to reemit_notes. */
5619 static rtx
5620 move_insn (insn, last)
5621 rtx insn, last;
5623 rtx retval = NULL;
5625 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5626 insns with SCHED_GROUP_P set first. */
5627 while (SCHED_GROUP_P (insn))
5629 rtx prev = PREV_INSN (insn);
5631 /* Move a SCHED_GROUP_P insn. */
5632 move_insn1 (insn, last);
5633 /* If this is the first call to reemit_notes, then record
5634 its return value. */
5635 if (retval == NULL_RTX)
5636 retval = reemit_notes (insn, insn);
5637 else
5638 reemit_notes (insn, insn);
5639 insn = prev;
5642 /* Now move the first non SCHED_GROUP_P insn. */
5643 move_insn1 (insn, last);
5645 /* If this is the first call to reemit_notes, then record
5646 its return value. */
5647 if (retval == NULL_RTX)
5648 retval = reemit_notes (insn, insn);
5649 else
5650 reemit_notes (insn, insn);
5652 return retval;
5655 /* Return an insn which represents a SCHED_GROUP, which is
5656 the last insn in the group. */
5658 static rtx
5659 group_leader (insn)
5660 rtx insn;
5662 rtx prev;
5666 prev = insn;
5667 insn = next_nonnote_insn (insn);
5669 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5671 return prev;
5674 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5675 possibly bringing insns from subsequent blocks in the same region.
5676 Return number of insns scheduled. */
5678 static int
5679 schedule_block (bb, rgn_n_insns)
5680 int bb;
5681 int rgn_n_insns;
5683 /* Local variables. */
5684 rtx insn, last;
5685 rtx *ready;
5686 int n_ready = 0;
5687 int can_issue_more;
5689 /* Flow block of this bb. */
5690 int b = BB_TO_BLOCK (bb);
5692 /* target_n_insns == number of insns in b before scheduling starts.
5693 sched_target_n_insns == how many of b's insns were scheduled.
5694 sched_n_insns == how many insns were scheduled in b. */
5695 int target_n_insns = 0;
5696 int sched_target_n_insns = 0;
5697 int sched_n_insns = 0;
5699 #define NEED_NOTHING 0
5700 #define NEED_HEAD 1
5701 #define NEED_TAIL 2
5702 int new_needs;
5704 /* Head/tail info for this block. */
5705 rtx prev_head;
5706 rtx next_tail;
5707 rtx head;
5708 rtx tail;
5709 int bb_src;
5711 /* We used to have code to avoid getting parameters moved from hard
5712 argument registers into pseudos.
5714 However, it was removed when it proved to be of marginal benefit
5715 and caused problems because schedule_block and compute_forward_dependences
5716 had different notions of what the "head" insn was. */
5717 get_block_head_tail (bb, &head, &tail);
5719 /* Interblock scheduling could have moved the original head insn from this
5720 block into a proceeding block. This may also cause schedule_block and
5721 compute_forward_dependences to have different notions of what the
5722 "head" insn was.
5724 If the interblock movement happened to make this block start with
5725 some notes (LOOP, EH or SETJMP) before the first real insn, then
5726 HEAD will have various special notes attached to it which must be
5727 removed so that we don't end up with extra copies of the notes. */
5728 if (GET_RTX_CLASS (GET_CODE (head)) == 'i')
5730 rtx note;
5732 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5733 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5734 remove_note (head, note);
5737 next_tail = NEXT_INSN (tail);
5738 prev_head = PREV_INSN (head);
5740 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5741 to schedule this block. */
5742 if (head == tail
5743 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5744 return (sched_n_insns);
5746 /* Debug info. */
5747 if (sched_verbose)
5749 fprintf (dump, ";; ======================================================\n");
5750 fprintf (dump,
5751 ";; -- basic block %d from %d to %d -- %s reload\n",
5752 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5753 (reload_completed ? "after" : "before"));
5754 fprintf (dump, ";; ======================================================\n");
5755 fprintf (dump, "\n");
5757 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5758 init_block_visualization ();
5761 /* Remove remaining note insns from the block, save them in
5762 note_list. These notes are restored at the end of
5763 schedule_block (). */
5764 note_list = 0;
5765 rm_other_notes (head, tail);
5767 target_bb = bb;
5769 /* Prepare current target block info. */
5770 if (current_nr_blocks > 1)
5772 candidate_table = (candidate *) alloca (current_nr_blocks
5773 * sizeof (candidate));
5775 bblst_last = 0;
5776 /* ??? It is not clear why bblst_size is computed this way. The original
5777 number was clearly too small as it resulted in compiler failures.
5778 Multiplying by the original number by 2 (to account for update_bbs
5779 members) seems to be a reasonable solution. */
5780 /* ??? Or perhaps there is a bug somewhere else in this file? */
5781 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5782 bblst_table = (int *) alloca (bblst_size * sizeof (int));
5784 bitlst_table_last = 0;
5785 bitlst_table_size = rgn_nr_edges;
5786 bitlst_table = (int *) alloca (rgn_nr_edges * sizeof (int));
5788 compute_trg_info (bb);
5791 clear_units ();
5793 /* Allocate the ready list. */
5794 ready = (rtx *) alloca ((rgn_n_insns + 1) * sizeof (rtx));
5796 /* Print debugging information. */
5797 if (sched_verbose >= 5)
5798 debug_dependencies ();
5801 /* Initialize ready list with all 'ready' insns in target block.
5802 Count number of insns in the target block being scheduled. */
5803 n_ready = 0;
5804 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5806 rtx next;
5808 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5809 continue;
5810 next = NEXT_INSN (insn);
5812 if (INSN_DEP_COUNT (insn) == 0
5813 && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5814 ready[n_ready++] = insn;
5815 if (!(SCHED_GROUP_P (insn)))
5816 target_n_insns++;
5819 /* Add to ready list all 'ready' insns in valid source blocks.
5820 For speculative insns, check-live, exception-free, and
5821 issue-delay. */
5822 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5823 if (IS_VALID (bb_src))
5825 rtx src_head;
5826 rtx src_next_tail;
5827 rtx tail, head;
5829 get_block_head_tail (bb_src, &head, &tail);
5830 src_next_tail = NEXT_INSN (tail);
5831 src_head = head;
5833 if (head == tail
5834 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5835 continue;
5837 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5839 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5840 continue;
5842 if (!CANT_MOVE (insn)
5843 && (!IS_SPECULATIVE_INSN (insn)
5844 || (insn_issue_delay (insn) <= 3
5845 && check_live (insn, bb_src)
5846 && is_exception_free (insn, bb_src, target_bb))))
5849 rtx next;
5851 /* Note that we havn't squirrled away the notes for
5852 blocks other than the current. So if this is a
5853 speculative insn, NEXT might otherwise be a note. */
5854 next = next_nonnote_insn (insn);
5855 if (INSN_DEP_COUNT (insn) == 0
5856 && (SCHED_GROUP_P (next) == 0
5857 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5858 ready[n_ready++] = insn;
5863 #ifdef MD_SCHED_INIT
5864 MD_SCHED_INIT (dump, sched_verbose);
5865 #endif
5867 /* No insns scheduled in this block yet. */
5868 last_scheduled_insn = 0;
5870 /* Q_SIZE is the total number of insns in the queue. */
5871 q_ptr = 0;
5872 q_size = 0;
5873 last_clock_var = 0;
5874 bzero ((char *) insn_queue, sizeof (insn_queue));
5876 /* Start just before the beginning of time. */
5877 clock_var = -1;
5879 /* We start inserting insns after PREV_HEAD. */
5880 last = prev_head;
5882 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5883 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
5884 ? NEED_HEAD : NEED_NOTHING);
5885 if (PREV_INSN (next_tail) == BLOCK_END (b))
5886 new_needs |= NEED_TAIL;
5888 /* Loop until all the insns in BB are scheduled. */
5889 while (sched_target_n_insns < target_n_insns)
5891 int b1;
5893 clock_var++;
5895 /* Add to the ready list all pending insns that can be issued now.
5896 If there are no ready insns, increment clock until one
5897 is ready and add all pending insns at that point to the ready
5898 list. */
5899 n_ready = queue_to_ready (ready, n_ready);
5901 if (n_ready == 0)
5902 abort ();
5904 if (sched_verbose >= 2)
5906 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
5907 debug_ready_list (ready, n_ready);
5910 /* Sort the ready list based on priority. */
5911 SCHED_SORT (ready, n_ready);
5913 /* Allow the target to reorder the list, typically for
5914 better instruction bundling. */
5915 #ifdef MD_SCHED_REORDER
5916 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
5917 can_issue_more);
5918 #else
5919 can_issue_more = issue_rate;
5920 #endif
5922 if (sched_verbose)
5924 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
5925 debug_ready_list (ready, n_ready);
5928 /* Issue insns from ready list. */
5929 while (n_ready != 0 && can_issue_more)
5931 /* Select and remove the insn from the ready list. */
5932 rtx insn = ready[--n_ready];
5933 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
5935 if (cost >= 1)
5937 queue_insn (insn, cost);
5938 continue;
5941 /* An interblock motion? */
5942 if (INSN_BB (insn) != target_bb)
5944 rtx temp;
5946 if (IS_SPECULATIVE_INSN (insn))
5948 if (!check_live (insn, INSN_BB (insn)))
5949 continue;
5950 update_live (insn, INSN_BB (insn));
5952 /* For speculative load, mark insns fed by it. */
5953 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
5954 set_spec_fed (insn);
5956 nr_spec++;
5958 nr_inter++;
5960 temp = insn;
5961 while (SCHED_GROUP_P (temp))
5962 temp = PREV_INSN (temp);
5964 /* Update source block boundaries. */
5965 b1 = INSN_BLOCK (temp);
5966 if (temp == BLOCK_HEAD (b1)
5967 && insn == BLOCK_END (b1))
5969 /* We moved all the insns in the basic block.
5970 Emit a note after the last insn and update the
5971 begin/end boundaries to point to the note. */
5972 emit_note_after (NOTE_INSN_DELETED, insn);
5973 BLOCK_END (b1) = NEXT_INSN (insn);
5974 BLOCK_HEAD (b1) = NEXT_INSN (insn);
5976 else if (insn == BLOCK_END (b1))
5978 /* We took insns from the end of the basic block,
5979 so update the end of block boundary so that it
5980 points to the first insn we did not move. */
5981 BLOCK_END (b1) = PREV_INSN (temp);
5983 else if (temp == BLOCK_HEAD (b1))
5985 /* We took insns from the start of the basic block,
5986 so update the start of block boundary so that
5987 it points to the first insn we did not move. */
5988 BLOCK_HEAD (b1) = NEXT_INSN (insn);
5991 else
5993 /* In block motion. */
5994 sched_target_n_insns++;
5997 last_scheduled_insn = insn;
5998 last = move_insn (insn, last);
5999 sched_n_insns++;
6001 #ifdef MD_SCHED_VARIABLE_ISSUE
6002 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6003 can_issue_more);
6004 #else
6005 can_issue_more--;
6006 #endif
6008 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6010 /* Close this block after scheduling its jump. */
6011 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6012 break;
6015 /* Debug info. */
6016 if (sched_verbose)
6017 visualize_scheduled_insns (b, clock_var);
6020 /* Debug info. */
6021 if (sched_verbose)
6023 fprintf (dump, ";;\tReady list (final): ");
6024 debug_ready_list (ready, n_ready);
6025 print_block_visualization (b, "");
6028 /* Sanity check -- queue must be empty now. Meaningless if region has
6029 multiple bbs. */
6030 if (current_nr_blocks > 1)
6031 if (!flag_schedule_interblock && q_size != 0)
6032 abort ();
6034 /* Update head/tail boundaries. */
6035 head = NEXT_INSN (prev_head);
6036 tail = last;
6038 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6039 previously found among the insns. Insert them at the beginning
6040 of the insns. */
6041 if (note_list != 0)
6043 rtx note_head = note_list;
6045 while (PREV_INSN (note_head))
6047 note_head = PREV_INSN (note_head);
6050 PREV_INSN (note_head) = PREV_INSN (head);
6051 NEXT_INSN (PREV_INSN (head)) = note_head;
6052 PREV_INSN (head) = note_list;
6053 NEXT_INSN (note_list) = head;
6054 head = note_head;
6057 /* Update target block boundaries. */
6058 if (new_needs & NEED_HEAD)
6059 BLOCK_HEAD (b) = head;
6061 if (new_needs & NEED_TAIL)
6062 BLOCK_END (b) = tail;
6064 /* Debugging. */
6065 if (sched_verbose)
6067 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6068 clock_var, INSN_UID (BLOCK_HEAD (b)));
6069 fprintf (dump, ";; new basic block end = %d\n\n",
6070 INSN_UID (BLOCK_END (b)));
6073 return (sched_n_insns);
6074 } /* schedule_block () */
6077 /* Print the bit-set of registers, S, callable from debugger. */
6079 extern void
6080 debug_reg_vector (s)
6081 regset s;
6083 int regno;
6085 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6087 fprintf (dump, " %d", regno);
6090 fprintf (dump, "\n");
6093 /* Use the backward dependences from LOG_LINKS to build
6094 forward dependences in INSN_DEPEND. */
6096 static void
6097 compute_block_forward_dependences (bb)
6098 int bb;
6100 rtx insn, link;
6101 rtx tail, head;
6102 rtx next_tail;
6103 enum reg_note dep_type;
6105 get_block_head_tail (bb, &head, &tail);
6106 next_tail = NEXT_INSN (tail);
6107 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6109 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6110 continue;
6112 insn = group_leader (insn);
6114 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6116 rtx x = group_leader (XEXP (link, 0));
6117 rtx new_link;
6119 if (x != XEXP (link, 0))
6120 continue;
6122 #ifdef ENABLE_CHECKING
6123 /* If add_dependence is working properly there should never
6124 be notes, deleted insns or duplicates in the backward
6125 links. Thus we need not check for them here.
6127 However, if we have enabled checking we might as well go
6128 ahead and verify that add_dependence worked properly. */
6129 if (GET_CODE (x) == NOTE
6130 || INSN_DELETED_P (x)
6131 || find_insn_list (insn, INSN_DEPEND (x)))
6132 abort ();
6133 #endif
6135 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6137 dep_type = REG_NOTE_KIND (link);
6138 PUT_REG_NOTE_KIND (new_link, dep_type);
6140 INSN_DEPEND (x) = new_link;
6141 INSN_DEP_COUNT (insn) += 1;
6146 /* Initialize variables for region data dependence analysis.
6147 n_bbs is the number of region blocks. */
6149 __inline static void
6150 init_rgn_data_dependences (n_bbs)
6151 int n_bbs;
6153 int bb;
6155 /* Variables for which one copy exists for each block. */
6156 bzero ((char *) bb_pending_read_insns, n_bbs * sizeof (rtx));
6157 bzero ((char *) bb_pending_read_mems, n_bbs * sizeof (rtx));
6158 bzero ((char *) bb_pending_write_insns, n_bbs * sizeof (rtx));
6159 bzero ((char *) bb_pending_write_mems, n_bbs * sizeof (rtx));
6160 bzero ((char *) bb_pending_lists_length, n_bbs * sizeof (rtx));
6161 bzero ((char *) bb_last_pending_memory_flush, n_bbs * sizeof (rtx));
6162 bzero ((char *) bb_last_function_call, n_bbs * sizeof (rtx));
6163 bzero ((char *) bb_sched_before_next_call, n_bbs * sizeof (rtx));
6165 /* Create an insn here so that we can hang dependencies off of it later. */
6166 for (bb = 0; bb < n_bbs; bb++)
6168 bb_sched_before_next_call[bb] =
6169 gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6170 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6171 LOG_LINKS (bb_sched_before_next_call[bb]) = 0;
6175 /* Add dependences so that branches are scheduled to run last in their
6176 block. */
6178 static void
6179 add_branch_dependences (head, tail)
6180 rtx head, tail;
6183 rtx insn, last;
6185 /* For all branches, calls, uses, and cc0 setters, force them to remain
6186 in order at the end of the block by adding dependencies and giving
6187 the last a high priority. There may be notes present, and prev_head
6188 may also be a note.
6190 Branches must obviously remain at the end. Calls should remain at the
6191 end since moving them results in worse register allocation. Uses remain
6192 at the end to ensure proper register allocation. cc0 setters remaim
6193 at the end because they can't be moved away from their cc0 user. */
6194 insn = tail;
6195 last = 0;
6196 while (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN
6197 || (GET_CODE (insn) == INSN
6198 && (GET_CODE (PATTERN (insn)) == USE
6199 #ifdef HAVE_cc0
6200 || sets_cc0_p (PATTERN (insn))
6201 #endif
6203 || GET_CODE (insn) == NOTE)
6205 if (GET_CODE (insn) != NOTE)
6207 if (last != 0
6208 && !find_insn_list (insn, LOG_LINKS (last)))
6210 add_dependence (last, insn, REG_DEP_ANTI);
6211 INSN_REF_COUNT (insn)++;
6214 CANT_MOVE (insn) = 1;
6216 last = insn;
6217 /* Skip over insns that are part of a group.
6218 Make each insn explicitly depend on the previous insn.
6219 This ensures that only the group header will ever enter
6220 the ready queue (and, when scheduled, will automatically
6221 schedule the SCHED_GROUP_P block). */
6222 while (SCHED_GROUP_P (insn))
6224 rtx temp = prev_nonnote_insn (insn);
6225 add_dependence (insn, temp, REG_DEP_ANTI);
6226 insn = temp;
6230 /* Don't overrun the bounds of the basic block. */
6231 if (insn == head)
6232 break;
6234 insn = PREV_INSN (insn);
6237 /* Make sure these insns are scheduled last in their block. */
6238 insn = last;
6239 if (insn != 0)
6240 while (insn != head)
6242 insn = prev_nonnote_insn (insn);
6244 if (INSN_REF_COUNT (insn) != 0)
6245 continue;
6247 add_dependence (last, insn, REG_DEP_ANTI);
6248 INSN_REF_COUNT (insn) = 1;
6250 /* Skip over insns that are part of a group. */
6251 while (SCHED_GROUP_P (insn))
6252 insn = prev_nonnote_insn (insn);
6256 /* Compute backward dependences inside bb. In a multiple blocks region:
6257 (1) a bb is analyzed after its predecessors, and (2) the lists in
6258 effect at the end of bb (after analyzing for bb) are inherited by
6259 bb's successrs.
6261 Specifically for reg-reg data dependences, the block insns are
6262 scanned by sched_analyze () top-to-bottom. Two lists are
6263 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6264 and reg_last_uses[] for register USEs.
6266 When analysis is completed for bb, we update for its successors:
6267 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6268 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6270 The mechanism for computing mem-mem data dependence is very
6271 similar, and the result is interblock dependences in the region. */
6273 static void
6274 compute_block_backward_dependences (bb)
6275 int bb;
6277 int b;
6278 rtx x;
6279 rtx head, tail;
6280 int max_reg = max_reg_num ();
6282 b = BB_TO_BLOCK (bb);
6284 if (current_nr_blocks == 1)
6286 reg_last_uses = (rtx *) alloca (max_reg * sizeof (rtx));
6287 reg_last_sets = (rtx *) alloca (max_reg * sizeof (rtx));
6288 reg_last_clobbers = (rtx *) alloca (max_reg * sizeof (rtx));
6290 bzero ((char *) reg_last_uses, max_reg * sizeof (rtx));
6291 bzero ((char *) reg_last_sets, max_reg * sizeof (rtx));
6292 bzero ((char *) reg_last_clobbers, max_reg * sizeof (rtx));
6294 pending_read_insns = 0;
6295 pending_read_mems = 0;
6296 pending_write_insns = 0;
6297 pending_write_mems = 0;
6298 pending_lists_length = 0;
6299 last_function_call = 0;
6300 last_pending_memory_flush = 0;
6301 sched_before_next_call
6302 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6303 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6304 LOG_LINKS (sched_before_next_call) = 0;
6306 else
6308 reg_last_uses = bb_reg_last_uses[bb];
6309 reg_last_sets = bb_reg_last_sets[bb];
6310 reg_last_clobbers = bb_reg_last_clobbers[bb];
6312 pending_read_insns = bb_pending_read_insns[bb];
6313 pending_read_mems = bb_pending_read_mems[bb];
6314 pending_write_insns = bb_pending_write_insns[bb];
6315 pending_write_mems = bb_pending_write_mems[bb];
6316 pending_lists_length = bb_pending_lists_length[bb];
6317 last_function_call = bb_last_function_call[bb];
6318 last_pending_memory_flush = bb_last_pending_memory_flush[bb];
6320 sched_before_next_call = bb_sched_before_next_call[bb];
6323 /* Do the analysis for this block. */
6324 get_block_head_tail (bb, &head, &tail);
6325 sched_analyze (head, tail);
6326 add_branch_dependences (head, tail);
6328 if (current_nr_blocks > 1)
6330 int e, first_edge;
6331 int b_succ, bb_succ;
6332 int reg;
6333 rtx link_insn, link_mem;
6334 rtx u;
6336 /* These lists should point to the right place, for correct
6337 freeing later. */
6338 bb_pending_read_insns[bb] = pending_read_insns;
6339 bb_pending_read_mems[bb] = pending_read_mems;
6340 bb_pending_write_insns[bb] = pending_write_insns;
6341 bb_pending_write_mems[bb] = pending_write_mems;
6343 /* bb's structures are inherited by it's successors. */
6344 first_edge = e = OUT_EDGES (b);
6345 if (e > 0)
6348 b_succ = TO_BLOCK (e);
6349 bb_succ = BLOCK_TO_BB (b_succ);
6351 /* Only bbs "below" bb, in the same region, are interesting. */
6352 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6353 || bb_succ <= bb)
6355 e = NEXT_OUT (e);
6356 continue;
6359 for (reg = 0; reg < max_reg; reg++)
6362 /* reg-last-uses lists are inherited by bb_succ. */
6363 for (u = reg_last_uses[reg]; u; u = XEXP (u, 1))
6365 if (find_insn_list (XEXP (u, 0),
6366 (bb_reg_last_uses[bb_succ])[reg]))
6367 continue;
6369 (bb_reg_last_uses[bb_succ])[reg]
6370 = alloc_INSN_LIST (XEXP (u, 0),
6371 (bb_reg_last_uses[bb_succ])[reg]);
6374 /* reg-last-defs lists are inherited by bb_succ. */
6375 for (u = reg_last_sets[reg]; u; u = XEXP (u, 1))
6377 if (find_insn_list (XEXP (u, 0),
6378 (bb_reg_last_sets[bb_succ])[reg]))
6379 continue;
6381 (bb_reg_last_sets[bb_succ])[reg]
6382 = alloc_INSN_LIST (XEXP (u, 0),
6383 (bb_reg_last_sets[bb_succ])[reg]);
6386 for (u = reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6388 if (find_insn_list (XEXP (u, 0),
6389 (bb_reg_last_clobbers[bb_succ])[reg]))
6390 continue;
6392 (bb_reg_last_clobbers[bb_succ])[reg]
6393 = alloc_INSN_LIST (XEXP (u, 0),
6394 (bb_reg_last_clobbers[bb_succ])[reg]);
6398 /* Mem read/write lists are inherited by bb_succ. */
6399 link_insn = pending_read_insns;
6400 link_mem = pending_read_mems;
6401 while (link_insn)
6403 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6404 XEXP (link_mem, 0),
6405 bb_pending_read_insns[bb_succ],
6406 bb_pending_read_mems[bb_succ])))
6407 add_insn_mem_dependence (&bb_pending_read_insns[bb_succ],
6408 &bb_pending_read_mems[bb_succ],
6409 XEXP (link_insn, 0), XEXP (link_mem, 0));
6410 link_insn = XEXP (link_insn, 1);
6411 link_mem = XEXP (link_mem, 1);
6414 link_insn = pending_write_insns;
6415 link_mem = pending_write_mems;
6416 while (link_insn)
6418 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6419 XEXP (link_mem, 0),
6420 bb_pending_write_insns[bb_succ],
6421 bb_pending_write_mems[bb_succ])))
6422 add_insn_mem_dependence (&bb_pending_write_insns[bb_succ],
6423 &bb_pending_write_mems[bb_succ],
6424 XEXP (link_insn, 0), XEXP (link_mem, 0));
6426 link_insn = XEXP (link_insn, 1);
6427 link_mem = XEXP (link_mem, 1);
6430 /* last_function_call is inherited by bb_succ. */
6431 for (u = last_function_call; u; u = XEXP (u, 1))
6433 if (find_insn_list (XEXP (u, 0),
6434 bb_last_function_call[bb_succ]))
6435 continue;
6437 bb_last_function_call[bb_succ]
6438 = alloc_INSN_LIST (XEXP (u, 0),
6439 bb_last_function_call[bb_succ]);
6442 /* last_pending_memory_flush is inherited by bb_succ. */
6443 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
6445 if (find_insn_list (XEXP (u, 0),
6446 bb_last_pending_memory_flush[bb_succ]))
6447 continue;
6449 bb_last_pending_memory_flush[bb_succ]
6450 = alloc_INSN_LIST (XEXP (u, 0),
6451 bb_last_pending_memory_flush[bb_succ]);
6454 /* sched_before_next_call is inherited by bb_succ. */
6455 x = LOG_LINKS (sched_before_next_call);
6456 for (; x; x = XEXP (x, 1))
6457 add_dependence (bb_sched_before_next_call[bb_succ],
6458 XEXP (x, 0), REG_DEP_ANTI);
6460 e = NEXT_OUT (e);
6462 while (e != first_edge);
6465 /* Free up the INSN_LISTs.
6467 Note this loop is executed max_reg * nr_regions times. It's first
6468 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6469 The list was empty for the vast majority of those calls. On the PA, not
6470 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6471 3-5% on average. */
6472 for (b = 0; b < max_reg; ++b)
6474 if (reg_last_clobbers[b])
6475 free_INSN_LIST_list (&reg_last_clobbers[b]);
6476 if (reg_last_sets[b])
6477 free_INSN_LIST_list (&reg_last_sets[b]);
6478 if (reg_last_uses[b])
6479 free_INSN_LIST_list (&reg_last_uses[b]);
6482 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6483 if (current_nr_blocks > 1)
6485 bb_reg_last_uses[bb] = (rtx *) NULL_RTX;
6486 bb_reg_last_sets[bb] = (rtx *) NULL_RTX;
6487 bb_reg_last_clobbers[bb] = (rtx *) NULL_RTX;
6491 /* Print dependences for debugging, callable from debugger. */
6493 void
6494 debug_dependencies ()
6496 int bb;
6498 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6499 for (bb = 0; bb < current_nr_blocks; bb++)
6501 if (1)
6503 rtx head, tail;
6504 rtx next_tail;
6505 rtx insn;
6507 get_block_head_tail (bb, &head, &tail);
6508 next_tail = NEXT_INSN (tail);
6509 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6510 BB_TO_BLOCK (bb), bb);
6512 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6513 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6514 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6515 "----", "----", "--", "---", "----", "----", "--------", "-----");
6516 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6518 rtx link;
6519 int unit, range;
6521 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6523 int n;
6524 fprintf (dump, ";; %6d ", INSN_UID (insn));
6525 if (GET_CODE (insn) == NOTE)
6527 n = NOTE_LINE_NUMBER (insn);
6528 if (n < 0)
6529 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6530 else
6531 fprintf (dump, "line %d, file %s\n", n,
6532 NOTE_SOURCE_FILE (insn));
6534 else
6535 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6536 continue;
6539 unit = insn_unit (insn);
6540 range = (unit < 0
6541 || function_units[unit].blockage_range_function == 0) ? 0 :
6542 function_units[unit].blockage_range_function (insn);
6543 fprintf (dump,
6544 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6545 (SCHED_GROUP_P (insn) ? "+" : " "),
6546 INSN_UID (insn),
6547 INSN_CODE (insn),
6548 INSN_BB (insn),
6549 INSN_DEP_COUNT (insn),
6550 INSN_PRIORITY (insn),
6551 insn_cost (insn, 0, 0),
6552 (int) MIN_BLOCKAGE_COST (range),
6553 (int) MAX_BLOCKAGE_COST (range));
6554 insn_print_units (insn);
6555 fprintf (dump, "\t: ");
6556 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6557 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6558 fprintf (dump, "\n");
6562 fprintf (dump, "\n");
6565 /* Set_priorities: compute priority of each insn in the block. */
6567 static int
6568 set_priorities (bb)
6569 int bb;
6571 rtx insn;
6572 int n_insn;
6574 rtx tail;
6575 rtx prev_head;
6576 rtx head;
6578 get_block_head_tail (bb, &head, &tail);
6579 prev_head = PREV_INSN (head);
6581 if (head == tail
6582 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
6583 return 0;
6585 n_insn = 0;
6586 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6589 if (GET_CODE (insn) == NOTE)
6590 continue;
6592 if (!(SCHED_GROUP_P (insn)))
6593 n_insn++;
6594 (void) priority (insn);
6597 return n_insn;
6600 /* Make each element of VECTOR point at an rtx-vector,
6601 taking the space for all those rtx-vectors from SPACE.
6602 SPACE is of type (rtx *), but it is really as long as NELTS rtx-vectors.
6603 BYTES_PER_ELT is the number of bytes in one rtx-vector.
6604 (this is the same as init_regset_vector () in flow.c) */
6606 static void
6607 init_rtx_vector (vector, space, nelts, bytes_per_elt)
6608 rtx **vector;
6609 rtx *space;
6610 int nelts;
6611 int bytes_per_elt;
6613 register int i;
6614 register rtx *p = space;
6616 for (i = 0; i < nelts; i++)
6618 vector[i] = p;
6619 p += bytes_per_elt / sizeof (*p);
6623 /* Schedule a region. A region is either an inner loop, a loop-free
6624 subroutine, or a single basic block. Each bb in the region is
6625 scheduled after its flow predecessors. */
6627 static void
6628 schedule_region (rgn)
6629 int rgn;
6631 int bb;
6632 int rgn_n_insns = 0;
6633 int sched_rgn_n_insns = 0;
6634 int initial_deaths;
6635 sbitmap blocks;
6637 /* Set variables for the current region. */
6638 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6639 current_blocks = RGN_BLOCKS (rgn);
6641 reg_pending_sets = ALLOCA_REG_SET ();
6642 reg_pending_clobbers = ALLOCA_REG_SET ();
6643 reg_pending_sets_all = 0;
6645 /* Create a bitmap of the blocks in this region. */
6646 blocks = sbitmap_alloc (n_basic_blocks);
6647 sbitmap_zero (blocks);
6649 for (bb = current_nr_blocks - 1; bb >= 0; --bb)
6650 SET_BIT (blocks, BB_TO_BLOCK (bb));
6652 /* Initializations for region data dependence analyisis. */
6653 if (current_nr_blocks > 1)
6655 rtx *space;
6656 int maxreg = max_reg_num ();
6658 bb_reg_last_uses = (rtx **) alloca (current_nr_blocks * sizeof (rtx *));
6659 space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx));
6660 bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx));
6661 init_rtx_vector (bb_reg_last_uses, space, current_nr_blocks,
6662 maxreg * sizeof (rtx *));
6664 bb_reg_last_sets = (rtx **) alloca (current_nr_blocks * sizeof (rtx *));
6665 space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx));
6666 bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx));
6667 init_rtx_vector (bb_reg_last_sets, space, current_nr_blocks,
6668 maxreg * sizeof (rtx *));
6670 bb_reg_last_clobbers =
6671 (rtx **) alloca (current_nr_blocks * sizeof (rtx *));
6672 space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx));
6673 bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx));
6674 init_rtx_vector (bb_reg_last_clobbers, space, current_nr_blocks,
6675 maxreg * sizeof (rtx *));
6677 bb_pending_read_insns = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6678 bb_pending_read_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6679 bb_pending_write_insns =
6680 (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6681 bb_pending_write_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6682 bb_pending_lists_length =
6683 (int *) alloca (current_nr_blocks * sizeof (int));
6684 bb_last_pending_memory_flush =
6685 (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6686 bb_last_function_call = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6687 bb_sched_before_next_call =
6688 (rtx *) alloca (current_nr_blocks * sizeof (rtx));
6690 init_rgn_data_dependences (current_nr_blocks);
6693 /* Compute LOG_LINKS. */
6694 for (bb = 0; bb < current_nr_blocks; bb++)
6695 compute_block_backward_dependences (bb);
6697 /* Compute INSN_DEPEND. */
6698 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6699 compute_block_forward_dependences (bb);
6701 /* Compute INSN_REG_WEIGHT. */
6702 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6703 find_insn_reg_weight (bb);
6705 /* Remove death notes. */
6706 initial_deaths = count_or_remove_death_notes (blocks, 1);
6708 /* Delete line notes and set priorities. */
6709 for (bb = 0; bb < current_nr_blocks; bb++)
6711 if (write_symbols != NO_DEBUG)
6713 save_line_notes (bb);
6714 rm_line_notes (bb);
6717 rgn_n_insns += set_priorities (bb);
6720 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6721 if (current_nr_blocks > 1)
6723 int i;
6725 prob = (float *) alloca ((current_nr_blocks) * sizeof (float));
6727 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6728 dom = (bbset *) alloca (current_nr_blocks * sizeof (bbset));
6729 for (i = 0; i < current_nr_blocks; i++)
6731 dom[i] = (bbset) alloca (bbset_size * sizeof (HOST_WIDE_INT));
6732 bzero ((char *) dom[i], bbset_size * sizeof (HOST_WIDE_INT));
6735 /* Edge to bit. */
6736 rgn_nr_edges = 0;
6737 edge_to_bit = (int *) alloca (nr_edges * sizeof (int));
6738 for (i = 1; i < nr_edges; i++)
6739 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6740 EDGE_TO_BIT (i) = rgn_nr_edges++;
6741 rgn_edges = (int *) alloca (rgn_nr_edges * sizeof (int));
6743 rgn_nr_edges = 0;
6744 for (i = 1; i < nr_edges; i++)
6745 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6746 rgn_edges[rgn_nr_edges++] = i;
6748 /* Split edges. */
6749 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6750 pot_split = (edgeset *) alloca (current_nr_blocks * sizeof (edgeset));
6751 ancestor_edges = (edgeset *) alloca (current_nr_blocks
6752 * sizeof (edgeset));
6753 for (i = 0; i < current_nr_blocks; i++)
6755 pot_split[i] =
6756 (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT));
6757 bzero ((char *) pot_split[i],
6758 edgeset_size * sizeof (HOST_WIDE_INT));
6759 ancestor_edges[i] =
6760 (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT));
6761 bzero ((char *) ancestor_edges[i],
6762 edgeset_size * sizeof (HOST_WIDE_INT));
6765 /* Compute probabilities, dominators, split_edges. */
6766 for (bb = 0; bb < current_nr_blocks; bb++)
6767 compute_dom_prob_ps (bb);
6770 /* Now we can schedule all blocks. */
6771 for (bb = 0; bb < current_nr_blocks; bb++)
6773 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6775 #ifdef USE_C_ALLOCA
6776 alloca (0);
6777 #endif
6780 /* Sanity check: verify that all region insns were scheduled. */
6781 if (sched_rgn_n_insns != rgn_n_insns)
6782 abort ();
6784 /* Update register life and usage information. Scheduling a multi-block
6785 region requires a global update. */
6786 if (current_nr_blocks > 1)
6787 update_life_info (blocks, UPDATE_LIFE_GLOBAL);
6788 else
6790 update_life_info (blocks, UPDATE_LIFE_LOCAL);
6792 /* In the single block case, the count of registers that died should
6793 not have changed during the schedule. */
6794 if (count_or_remove_death_notes (blocks, 0) != initial_deaths)
6795 abort ();
6798 /* Restore line notes. */
6799 if (write_symbols != NO_DEBUG)
6801 for (bb = 0; bb < current_nr_blocks; bb++)
6802 restore_line_notes (bb);
6805 /* Done with this region. */
6806 free_pending_lists ();
6808 FREE_REG_SET (reg_pending_sets);
6809 FREE_REG_SET (reg_pending_clobbers);
6810 sbitmap_free (blocks);
6813 /* The one entry point in this file. DUMP_FILE is the dump file for
6814 this pass. */
6816 void
6817 schedule_insns (dump_file)
6818 FILE *dump_file;
6821 int max_uid;
6822 int b;
6823 rtx insn;
6824 int rgn;
6826 int luid;
6828 /* Disable speculative loads in their presence if cc0 defined. */
6829 #ifdef HAVE_cc0
6830 flag_schedule_speculative_load = 0;
6831 #endif
6833 /* Taking care of this degenerate case makes the rest of
6834 this code simpler. */
6835 if (n_basic_blocks == 0)
6836 return;
6838 /* Set dump and sched_verbose for the desired debugging output. If no
6839 dump-file was specified, but -fsched-verbose-N (any N), print to stderr.
6840 For -fsched-verbose-N, N>=10, print everything to stderr. */
6841 sched_verbose = sched_verbose_param;
6842 if (sched_verbose_param == 0 && dump_file)
6843 sched_verbose = 1;
6844 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6846 nr_inter = 0;
6847 nr_spec = 0;
6849 /* Initialize issue_rate. */
6850 issue_rate = ISSUE_RATE;
6852 split_all_insns (1);
6854 max_uid = (get_max_uid () + 1);
6856 cant_move = xcalloc (max_uid, sizeof (char));
6857 fed_by_spec_load = xcalloc (max_uid, sizeof (char));
6858 is_load_insn = xcalloc (max_uid, sizeof (char));
6860 insn_orig_block = (int *) xmalloc (max_uid * sizeof (int));
6861 insn_luid = (int *) xmalloc (max_uid * sizeof (int));
6863 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6864 pseudos which do not cross calls. */
6865 insn_luid[0] = 0;
6866 luid = 1;
6867 for (b = 0; b < n_basic_blocks; b++)
6868 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6870 INSN_BLOCK (insn) = b;
6871 INSN_LUID (insn) = luid++;
6873 if (insn == BLOCK_END (b))
6874 break;
6877 /* ?!? We could save some memory by computing a per-region luid mapping
6878 which could reduce both the number of vectors in the cache and the size
6879 of each vector. */
6880 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6881 sbitmap_vector_zero (true_dependency_cache, luid);
6883 nr_regions = 0;
6884 rgn_table = (region *) alloca ((n_basic_blocks) * sizeof (region));
6885 rgn_bb_table = (int *) alloca ((n_basic_blocks) * sizeof (int));
6886 block_to_bb = (int *) alloca ((n_basic_blocks) * sizeof (int));
6887 containing_rgn = (int *) alloca ((n_basic_blocks) * sizeof (int));
6889 /* Compute regions for scheduling. */
6890 if (reload_completed
6891 || n_basic_blocks == 1
6892 || !flag_schedule_interblock)
6894 find_single_block_region ();
6896 else
6898 /* Verify that a 'good' control flow graph can be built. */
6899 if (is_cfg_nonregular ())
6901 find_single_block_region ();
6903 else
6905 int_list_ptr *s_preds, *s_succs;
6906 int *num_preds, *num_succs;
6907 sbitmap *dom, *pdom;
6909 s_preds = (int_list_ptr *) alloca (n_basic_blocks
6910 * sizeof (int_list_ptr));
6911 s_succs = (int_list_ptr *) alloca (n_basic_blocks
6912 * sizeof (int_list_ptr));
6913 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
6914 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
6915 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6916 pdom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6918 /* The scheduler runs after flow; therefore, we can't blindly call
6919 back into find_basic_blocks since doing so could invalidate the
6920 info in global_live_at_start.
6922 Consider a block consisting entirely of dead stores; after life
6923 analysis it would be a block of NOTE_INSN_DELETED notes. If
6924 we call find_basic_blocks again, then the block would be removed
6925 entirely and invalidate our the register live information.
6927 We could (should?) recompute register live information. Doing
6928 so may even be beneficial. */
6930 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
6932 /* Compute the dominators and post dominators. We don't
6933 currently use post dominators, but we should for
6934 speculative motion analysis. */
6935 compute_dominators (dom, pdom, s_preds, s_succs);
6937 /* build_control_flow will return nonzero if it detects unreachable
6938 blocks or any other irregularity with the cfg which prevents
6939 cross block scheduling. */
6940 if (build_control_flow (s_preds, s_succs, num_preds, num_succs) != 0)
6941 find_single_block_region ();
6942 else
6943 find_rgns (s_preds, s_succs, num_preds, num_succs, dom);
6945 if (sched_verbose >= 3)
6946 debug_regions ();
6948 /* For now. This will move as more and more of haifa is converted
6949 to using the cfg code in flow.c. */
6950 free_bb_mem ();
6951 free (dom);
6952 free (pdom);
6956 /* Allocate data for this pass. See comments, above,
6957 for what these vectors do.
6959 We use xmalloc instead of alloca, because max_uid can be very large
6960 when there is a lot of function inlining. If we used alloca, we could
6961 exceed stack limits on some hosts for some inputs. */
6962 insn_priority = (int *) xcalloc (max_uid, sizeof (int));
6963 insn_reg_weight = (int *) xcalloc (max_uid, sizeof (int));
6964 insn_tick = (int *) xcalloc (max_uid, sizeof (int));
6965 insn_costs = (short *) xcalloc (max_uid, sizeof (short));
6966 insn_units = (short *) xcalloc (max_uid, sizeof (short));
6967 insn_blockage = (unsigned int *) xcalloc (max_uid, sizeof (unsigned int));
6968 insn_ref_count = (int *) xcalloc (max_uid, sizeof (int));
6970 /* Allocate for forward dependencies. */
6971 insn_dep_count = (int *) xcalloc (max_uid, sizeof (int));
6972 insn_depend = (rtx *) xcalloc (max_uid, sizeof (rtx));
6974 init_alias_analysis ();
6976 if (write_symbols != NO_DEBUG)
6978 rtx line;
6980 line_note = (rtx *) xcalloc (max_uid, sizeof (rtx));
6981 line_note_head = (rtx *) alloca (n_basic_blocks * sizeof (rtx));
6982 bzero ((char *) line_note_head, n_basic_blocks * sizeof (rtx));
6984 /* Save-line-note-head:
6985 Determine the line-number at the start of each basic block.
6986 This must be computed and saved now, because after a basic block's
6987 predecessor has been scheduled, it is impossible to accurately
6988 determine the correct line number for the first insn of the block. */
6990 for (b = 0; b < n_basic_blocks; b++)
6991 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
6992 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
6994 line_note_head[b] = line;
6995 break;
6999 /* Find units used in this fuction, for visualization. */
7000 if (sched_verbose)
7001 init_target_units ();
7003 /* ??? Add a NOTE after the last insn of the last basic block. It is not
7004 known why this is done. */
7006 insn = BLOCK_END (n_basic_blocks - 1);
7007 if (NEXT_INSN (insn) == 0
7008 || (GET_CODE (insn) != NOTE
7009 && GET_CODE (insn) != CODE_LABEL
7010 /* Don't emit a NOTE if it would end up between an unconditional
7011 jump and a BARRIER. */
7012 && !(GET_CODE (insn) == JUMP_INSN
7013 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
7014 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
7016 /* Schedule every region in the subroutine. */
7017 for (rgn = 0; rgn < nr_regions; rgn++)
7019 schedule_region (rgn);
7021 #ifdef USE_C_ALLOCA
7022 alloca (0);
7023 #endif
7026 /* Reposition the prologue and epilogue notes in case we moved the
7027 prologue/epilogue insns. */
7028 if (reload_completed)
7029 reposition_prologue_and_epilogue_notes (get_insns ());
7031 /* Delete redundant line notes. */
7032 if (write_symbols != NO_DEBUG)
7033 rm_redundant_line_notes ();
7035 if (sched_verbose)
7037 if (reload_completed == 0 && flag_schedule_interblock)
7039 fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n",
7040 nr_inter, nr_spec);
7042 else
7044 if (nr_inter > 0)
7045 abort ();
7047 fprintf (dump, "\n\n");
7050 free (true_dependency_cache);
7051 free (cant_move);
7052 free (fed_by_spec_load);
7053 free (is_load_insn);
7054 free (insn_orig_block);
7055 free (insn_luid);
7057 free (insn_priority);
7058 free (insn_reg_weight);
7059 free (insn_tick);
7060 free (insn_costs);
7061 free (insn_units);
7062 free (insn_blockage);
7063 free (insn_ref_count);
7065 free (insn_dep_count);
7066 free (insn_depend);
7068 if (write_symbols != NO_DEBUG)
7069 free (line_note);
7071 if (edge_table)
7073 free (edge_table);
7074 edge_table = NULL;
7077 if (in_edges)
7079 free (in_edges);
7080 in_edges = NULL;
7082 if (out_edges)
7084 free (out_edges);
7085 out_edges = NULL;
7088 #endif /* INSN_SCHEDULING */