1 /* Post reload partially redundant load elimination
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
24 #include "diagnostic-core.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
38 #include "dominance.h"
41 #include "basic-block.h"
56 #include "tree-pass.h"
59 #include "gcse-common.h"
61 /* The following code implements gcse after reload, the purpose of this
62 pass is to cleanup redundant loads generated by reload and other
63 optimizations that come after gcse. It searches for simple inter-block
64 redundancies and tries to eliminate them by adding moves and loads
67 Perform partially redundant load elimination, try to eliminate redundant
68 loads created by the reload pass. We try to look for full or partial
69 redundant loads fed by one or more loads/stores in predecessor BBs,
70 and try adding loads to make them fully redundant. We also check if
71 it's worth adding loads to be able to delete the redundant load.
74 1. Build available expressions hash table:
75 For each load/store instruction, if the loaded/stored memory didn't
76 change until the end of the basic block add this memory expression to
78 2. Perform Redundancy elimination:
79 For each load instruction do the following:
80 perform partial redundancy elimination, check if it's worth adding
81 loads to make the load fully redundant. If so add loads and
82 register copies and delete the load.
83 3. Delete instructions made redundant in step 2.
86 If the loaded register is used/defined between load and some store,
87 look for some other free register between load and all its stores,
88 and replace the load with a copy from this register to the loaded
93 /* Keep statistics of this pass. */
101 /* We need to keep a hash table of expressions. The table entries are of
102 type 'struct expr', and for each expression there is a single linked
103 list of occurrences. */
105 /* Expression elements in the hash table. */
108 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
111 /* The same hash for this entry. */
114 /* Index in the transparent bitmaps. */
115 unsigned int bitmap_index
;
117 /* List of available occurrence in basic blocks in the function. */
118 struct occr
*avail_occr
;
121 /* Hashtable helpers. */
123 struct expr_hasher
: nofree_ptr_hash
<expr
>
125 static inline hashval_t
hash (const expr
*);
126 static inline bool equal (const expr
*, const expr
*);
130 /* Hash expression X.
131 DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
132 or if the expression contains something we don't want to insert in the
136 hash_expr (rtx x
, int *do_not_record_p
)
138 *do_not_record_p
= 0;
139 return hash_rtx (x
, GET_MODE (x
), do_not_record_p
,
140 NULL
, /*have_reg_qty=*/false);
143 /* Callback for hashtab.
144 Return the hash value for expression EXP. We don't actually hash
145 here, we just return the cached hash value. */
148 expr_hasher::hash (const expr
*exp
)
153 /* Callback for hashtab.
154 Return nonzero if exp1 is equivalent to exp2. */
157 expr_hasher::equal (const expr
*exp1
, const expr
*exp2
)
159 int equiv_p
= exp_equiv_p (exp1
->expr
, exp2
->expr
, 0, true);
161 gcc_assert (!equiv_p
|| exp1
->hash
== exp2
->hash
);
165 /* The table itself. */
166 static hash_table
<expr_hasher
> *expr_table
;
169 static struct obstack expr_obstack
;
171 /* Occurrence of an expression.
172 There is at most one occurrence per basic block. If a pattern appears
173 more than once, the last appearance is used. */
177 /* Next occurrence of this expression. */
179 /* The insn that computes the expression. */
181 /* Nonzero if this [anticipatable] occurrence has been deleted. */
185 static struct obstack occr_obstack
;
187 /* The following structure holds the information about the occurrences of
188 the redundant instructions. */
196 static struct obstack unoccr_obstack
;
198 /* Array where each element is the CUID if the insn that last set the hard
199 register with the number of the element, since the start of the current
202 This array is used during the building of the hash table (step 1) to
203 determine if a reg is killed before the end of a basic block.
205 It is also used when eliminating partial redundancies (step 2) to see
206 if a reg was modified since the start of a basic block. */
207 static int *reg_avail_info
;
209 /* A list of insns that may modify memory within the current basic block. */
213 struct modifies_mem
*next
;
215 static struct modifies_mem
*modifies_mem_list
;
217 /* The modifies_mem structs also go on an obstack, only this obstack is
218 freed each time after completing the analysis or transformations on
219 a basic block. So we allocate a dummy modifies_mem_obstack_bottom
220 object on the obstack to keep track of the bottom of the obstack. */
221 static struct obstack modifies_mem_obstack
;
222 static struct modifies_mem
*modifies_mem_obstack_bottom
;
224 /* Mapping of insn UIDs to CUIDs.
225 CUIDs are like UIDs except they increase monotonically in each basic
226 block, have no gaps, and only apply to real insns. */
227 static int *uid_cuid
;
228 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
230 /* Bitmap of blocks which have memory stores. */
231 static bitmap modify_mem_list_set
;
233 /* Bitmap of blocks which have calls. */
234 static bitmap blocks_with_calls
;
236 /* Vector indexed by block # with a list of all the insns that
237 modify memory within the block. */
238 static vec
<rtx_insn
*> *modify_mem_list
;
240 /* Vector indexed by block # with a canonicalized list of insns
241 that modify memory in the block. */
242 static vec
<modify_pair
> *canon_modify_mem_list
;
244 /* Vector of simple bitmaps indexed by block number. Each component sbitmap
245 indicates which expressions are transparent through the block. */
246 static sbitmap
*transp
;
249 /* Helpers for memory allocation/freeing. */
250 static void alloc_mem (void);
251 static void free_mem (void);
253 /* Support for hash table construction and transformations. */
254 static bool oprs_unchanged_p (rtx
, rtx_insn
*, bool);
255 static void record_last_reg_set_info (rtx_insn
*, rtx
);
256 static void record_last_reg_set_info_regno (rtx_insn
*, int);
257 static void record_last_mem_set_info (rtx_insn
*);
258 static void record_last_set_info (rtx
, const_rtx
, void *);
259 static void record_opr_changes (rtx_insn
*);
261 static void find_mem_conflicts (rtx
, const_rtx
, void *);
262 static int load_killed_in_block_p (int, rtx
, bool);
263 static void reset_opr_set_tables (void);
265 /* Hash table support. */
266 static hashval_t
hash_expr (rtx
, int *);
267 static void insert_expr_in_table (rtx
, rtx_insn
*);
268 static struct expr
*lookup_expr_in_table (rtx
);
269 static void dump_hash_table (FILE *);
271 /* Helpers for eliminate_partially_redundant_load. */
272 static bool reg_killed_on_edge (rtx
, edge
);
273 static bool reg_used_on_edge (rtx
, edge
);
275 static rtx
get_avail_load_store_reg (rtx_insn
*);
277 static bool bb_has_well_behaved_predecessors (basic_block
);
278 static struct occr
* get_bb_avail_insn (basic_block
, struct occr
*, int);
279 static void hash_scan_set (rtx_insn
*);
280 static void compute_hash_table (void);
282 /* The work horses of this pass. */
283 static void eliminate_partially_redundant_load (basic_block
,
286 static void eliminate_partially_redundant_loads (void);
289 /* Allocate memory for the CUID mapping array and register/memory
299 /* Find the largest UID and create a mapping from UIDs to CUIDs. */
300 uid_cuid
= XCNEWVEC (int, get_max_uid () + 1);
302 FOR_EACH_BB_FN (bb
, cfun
)
303 FOR_BB_INSNS (bb
, insn
)
306 uid_cuid
[INSN_UID (insn
)] = i
++;
308 uid_cuid
[INSN_UID (insn
)] = i
;
311 /* Allocate the available expressions hash table. We don't want to
312 make the hash table too small, but unnecessarily making it too large
313 also doesn't help. The i/4 is a gcse.c relic, and seems like a
314 reasonable choice. */
315 expr_table
= new hash_table
<expr_hasher
> (MAX (i
/ 4, 13));
317 /* We allocate everything on obstacks because we often can roll back
318 the whole obstack to some point. Freeing obstacks is very fast. */
319 gcc_obstack_init (&expr_obstack
);
320 gcc_obstack_init (&occr_obstack
);
321 gcc_obstack_init (&unoccr_obstack
);
322 gcc_obstack_init (&modifies_mem_obstack
);
324 /* Working array used to track the last set for each register
325 in the current block. */
326 reg_avail_info
= (int *) xmalloc (FIRST_PSEUDO_REGISTER
* sizeof (int));
328 /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
329 can roll it back in reset_opr_set_tables. */
330 modifies_mem_obstack_bottom
=
331 (struct modifies_mem
*) obstack_alloc (&modifies_mem_obstack
,
332 sizeof (struct modifies_mem
));
334 blocks_with_calls
= BITMAP_ALLOC (NULL
);
335 modify_mem_list_set
= BITMAP_ALLOC (NULL
);
337 modify_mem_list
= (vec_rtx_heap
*) xcalloc (last_basic_block_for_fn (cfun
),
338 sizeof (vec_rtx_heap
));
339 canon_modify_mem_list
340 = (vec_modify_pair_heap
*) xcalloc (last_basic_block_for_fn (cfun
),
341 sizeof (vec_modify_pair_heap
));
344 /* Free memory allocated by alloc_mem. */
354 obstack_free (&expr_obstack
, NULL
);
355 obstack_free (&occr_obstack
, NULL
);
356 obstack_free (&unoccr_obstack
, NULL
);
357 obstack_free (&modifies_mem_obstack
, NULL
);
361 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set
, 0, i
, bi
)
363 modify_mem_list
[i
].release ();
364 canon_modify_mem_list
[i
].release ();
367 BITMAP_FREE (blocks_with_calls
);
368 BITMAP_FREE (modify_mem_list_set
);
369 free (reg_avail_info
);
373 /* Insert expression X in INSN in the hash TABLE.
374 If it is already present, record it as the last occurrence in INSN's
378 insert_expr_in_table (rtx x
, rtx_insn
*insn
)
382 struct expr
*cur_expr
, **slot
;
383 struct occr
*avail_occr
, *last_occr
= NULL
;
385 hash
= hash_expr (x
, &do_not_record_p
);
387 /* Do not insert expression in the table if it contains volatile operands,
388 or if hash_expr determines the expression is something we don't want
389 to or can't handle. */
393 /* We anticipate that redundant expressions are rare, so for convenience
394 allocate a new hash table element here already and set its fields.
395 If we don't do this, we need a hack with a static struct expr. Anyway,
396 obstack_free is really fast and one more obstack_alloc doesn't hurt if
397 we're going to see more expressions later on. */
398 cur_expr
= (struct expr
*) obstack_alloc (&expr_obstack
,
399 sizeof (struct expr
));
401 cur_expr
->hash
= hash
;
402 cur_expr
->avail_occr
= NULL
;
404 slot
= expr_table
->find_slot_with_hash (cur_expr
, hash
, INSERT
);
408 /* The expression isn't found, so insert it. */
411 /* Anytime we add an entry to the table, record the index
412 of the new entry. The bitmap index starts counting
414 cur_expr
->bitmap_index
= expr_table
->elements () - 1;
418 /* The expression is already in the table, so roll back the
419 obstack and use the existing table entry. */
420 obstack_free (&expr_obstack
, cur_expr
);
424 /* Search for another occurrence in the same basic block. */
425 avail_occr
= cur_expr
->avail_occr
;
427 && BLOCK_FOR_INSN (avail_occr
->insn
) != BLOCK_FOR_INSN (insn
))
429 /* If an occurrence isn't found, save a pointer to the end of
431 last_occr
= avail_occr
;
432 avail_occr
= avail_occr
->next
;
436 /* Found another instance of the expression in the same basic block.
437 Prefer this occurrence to the currently recorded one. We want
438 the last one in the block and the block is scanned from start
440 avail_occr
->insn
= insn
;
443 /* First occurrence of this expression in this basic block. */
444 avail_occr
= (struct occr
*) obstack_alloc (&occr_obstack
,
445 sizeof (struct occr
));
447 /* First occurrence of this expression in any block? */
448 if (cur_expr
->avail_occr
== NULL
)
449 cur_expr
->avail_occr
= avail_occr
;
451 last_occr
->next
= avail_occr
;
453 avail_occr
->insn
= insn
;
454 avail_occr
->next
= NULL
;
455 avail_occr
->deleted_p
= 0;
460 /* Lookup pattern PAT in the expression hash table.
461 The result is a pointer to the table entry, or NULL if not found. */
464 lookup_expr_in_table (rtx pat
)
467 struct expr
**slot
, *tmp_expr
;
468 hashval_t hash
= hash_expr (pat
, &do_not_record_p
);
473 tmp_expr
= (struct expr
*) obstack_alloc (&expr_obstack
,
474 sizeof (struct expr
));
475 tmp_expr
->expr
= pat
;
476 tmp_expr
->hash
= hash
;
477 tmp_expr
->avail_occr
= NULL
;
479 slot
= expr_table
->find_slot_with_hash (tmp_expr
, hash
, INSERT
);
480 obstack_free (&expr_obstack
, tmp_expr
);
489 /* Dump all expressions and occurrences that are currently in the
490 expression hash table to FILE. */
492 /* This helper is called via htab_traverse. */
494 dump_expr_hash_table_entry (expr
**slot
, FILE *file
)
496 struct expr
*exprs
= *slot
;
499 fprintf (file
, "expr: ");
500 print_rtl (file
, exprs
->expr
);
501 fprintf (file
,"\nhashcode: %u\n", exprs
->hash
);
502 fprintf (file
,"list of occurrences:\n");
503 occr
= exprs
->avail_occr
;
506 rtx_insn
*insn
= occr
->insn
;
507 print_rtl_single (file
, insn
);
508 fprintf (file
, "\n");
511 fprintf (file
, "\n");
516 dump_hash_table (FILE *file
)
518 fprintf (file
, "\n\nexpression hash table\n");
519 fprintf (file
, "size %ld, %ld elements, %f collision/search ratio\n",
520 (long) expr_table
->size (),
521 (long) expr_table
->elements (),
522 expr_table
->collisions ());
523 if (expr_table
->elements () > 0)
525 fprintf (file
, "\n\ntable entries:\n");
526 expr_table
->traverse
<FILE *, dump_expr_hash_table_entry
> (file
);
528 fprintf (file
, "\n");
531 /* Return true if register X is recorded as being set by an instruction
532 whose CUID is greater than the one given. */
535 reg_changed_after_insn_p (rtx x
, int cuid
)
537 unsigned int regno
, end_regno
;
540 end_regno
= END_REGNO (x
);
542 if (reg_avail_info
[regno
] > cuid
)
544 while (++regno
< end_regno
);
548 /* Return nonzero if the operands of expression X are unchanged
549 1) from the start of INSN's basic block up to but not including INSN
550 if AFTER_INSN is false, or
551 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */
554 oprs_unchanged_p (rtx x
, rtx_insn
*insn
, bool after_insn
)
567 /* We are called after register allocation. */
568 gcc_assert (REGNO (x
) < FIRST_PSEUDO_REGISTER
);
570 return !reg_changed_after_insn_p (x
, INSN_CUID (insn
) - 1);
572 return !reg_changed_after_insn_p (x
, 0);
575 if (load_killed_in_block_p (INSN_CUID (insn
), x
, after_insn
))
578 return oprs_unchanged_p (XEXP (x
, 0), insn
, after_insn
);
604 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
608 if (! oprs_unchanged_p (XEXP (x
, i
), insn
, after_insn
))
611 else if (fmt
[i
] == 'E')
612 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
613 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, after_insn
))
621 /* Used for communication between find_mem_conflicts and
622 load_killed_in_block_p. Nonzero if find_mem_conflicts finds a
623 conflict between two memory references.
624 This is a bit of a hack to work around the limitations of note_stores. */
625 static int mems_conflict_p
;
627 /* DEST is the output of an instruction. If it is a memory reference, and
628 possibly conflicts with the load found in DATA, then set mems_conflict_p
629 to a nonzero value. */
632 find_mem_conflicts (rtx dest
, const_rtx setter ATTRIBUTE_UNUSED
,
635 rtx mem_op
= (rtx
) data
;
637 while (GET_CODE (dest
) == SUBREG
638 || GET_CODE (dest
) == ZERO_EXTRACT
639 || GET_CODE (dest
) == STRICT_LOW_PART
)
640 dest
= XEXP (dest
, 0);
642 /* If DEST is not a MEM, then it will not conflict with the load. Note
643 that function calls are assumed to clobber memory, but are handled
648 if (true_dependence (dest
, GET_MODE (dest
), mem_op
))
653 /* Return nonzero if the expression in X (a memory reference) is killed
654 in the current basic block before (if AFTER_INSN is false) or after
655 (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
657 This function assumes that the modifies_mem table is flushed when
658 the hash table construction or redundancy elimination phases start
659 processing a new basic block. */
662 load_killed_in_block_p (int uid_limit
, rtx x
, bool after_insn
)
664 struct modifies_mem
*list_entry
= modifies_mem_list
;
668 rtx_insn
*setter
= list_entry
->insn
;
670 /* Ignore entries in the list that do not apply. */
672 && INSN_CUID (setter
) < uid_limit
)
674 && INSN_CUID (setter
) > uid_limit
))
676 list_entry
= list_entry
->next
;
680 /* If SETTER is a call everything is clobbered. Note that calls
681 to pure functions are never put on the list, so we need not
686 /* SETTER must be an insn of some kind that sets memory. Call
687 note_stores to examine each hunk of memory that is modified.
688 It will set mems_conflict_p to nonzero if there may be a
689 conflict between X and SETTER. */
691 note_stores (PATTERN (setter
), find_mem_conflicts
, x
);
695 list_entry
= list_entry
->next
;
701 /* Record register first/last/block set information for REGNO in INSN. */
704 record_last_reg_set_info (rtx_insn
*insn
, rtx reg
)
706 unsigned int regno
, end_regno
;
709 end_regno
= END_REGNO (reg
);
711 reg_avail_info
[regno
] = INSN_CUID (insn
);
712 while (++regno
< end_regno
);
716 record_last_reg_set_info_regno (rtx_insn
*insn
, int regno
)
718 reg_avail_info
[regno
] = INSN_CUID (insn
);
722 /* Record memory modification information for INSN. We do not actually care
723 about the memory location(s) that are set, or even how they are set (consider
724 a CALL_INSN). We merely need to record which insns modify memory. */
727 record_last_mem_set_info (rtx_insn
*insn
)
729 struct modifies_mem
*list_entry
;
731 list_entry
= (struct modifies_mem
*) obstack_alloc (&modifies_mem_obstack
,
732 sizeof (struct modifies_mem
));
733 list_entry
->insn
= insn
;
734 list_entry
->next
= modifies_mem_list
;
735 modifies_mem_list
= list_entry
;
737 record_last_mem_set_info_common (insn
, modify_mem_list
,
738 canon_modify_mem_list
,
743 /* Called from compute_hash_table via note_stores to handle one
744 SET or CLOBBER in an insn. DATA is really the instruction in which
745 the SET is taking place. */
748 record_last_set_info (rtx dest
, const_rtx setter ATTRIBUTE_UNUSED
, void *data
)
750 rtx_insn
*last_set_insn
= (rtx_insn
*) data
;
752 if (GET_CODE (dest
) == SUBREG
)
753 dest
= SUBREG_REG (dest
);
756 record_last_reg_set_info (last_set_insn
, dest
);
757 else if (MEM_P (dest
))
759 /* Ignore pushes, they don't clobber memory. They may still
760 clobber the stack pointer though. Some targets do argument
761 pushes without adding REG_INC notes. See e.g. PR25196,
762 where a pushsi2 on i386 doesn't have REG_INC notes. Note
763 such changes here too. */
764 if (! push_operand (dest
, GET_MODE (dest
)))
765 record_last_mem_set_info (last_set_insn
);
767 record_last_reg_set_info_regno (last_set_insn
, STACK_POINTER_REGNUM
);
772 /* Reset tables used to keep track of what's still available since the
773 start of the block. */
776 reset_opr_set_tables (void)
778 memset (reg_avail_info
, 0, FIRST_PSEUDO_REGISTER
* sizeof (int));
779 obstack_free (&modifies_mem_obstack
, modifies_mem_obstack_bottom
);
780 modifies_mem_list
= NULL
;
784 /* Record things set by INSN.
785 This data is used by oprs_unchanged_p. */
788 record_opr_changes (rtx_insn
*insn
)
792 /* Find all stores and record them. */
793 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
795 /* Also record autoincremented REGs for this insn as changed. */
796 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
797 if (REG_NOTE_KIND (note
) == REG_INC
)
798 record_last_reg_set_info (insn
, XEXP (note
, 0));
800 /* Finally, if this is a call, record all call clobbers. */
805 hard_reg_set_iterator hrsi
;
806 EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call
, 0, regno
, hrsi
)
807 record_last_reg_set_info_regno (insn
, regno
);
809 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
810 if (GET_CODE (XEXP (link
, 0)) == CLOBBER
)
812 x
= XEXP (XEXP (link
, 0), 0);
815 gcc_assert (HARD_REGISTER_P (x
));
816 record_last_reg_set_info (insn
, x
);
820 if (! RTL_CONST_OR_PURE_CALL_P (insn
))
821 record_last_mem_set_info (insn
);
826 /* Scan the pattern of INSN and add an entry to the hash TABLE.
827 After reload we are interested in loads/stores only. */
830 hash_scan_set (rtx_insn
*insn
)
832 rtx pat
= PATTERN (insn
);
833 rtx src
= SET_SRC (pat
);
834 rtx dest
= SET_DEST (pat
);
836 /* We are only interested in loads and stores. */
837 if (! MEM_P (src
) && ! MEM_P (dest
))
840 /* Don't mess with jumps and nops. */
841 if (JUMP_P (insn
) || set_noop_p (pat
))
846 if (/* Don't CSE something if we can't do a reg/reg copy. */
847 can_copy_p (GET_MODE (dest
))
848 /* Is SET_SRC something we want to gcse? */
849 && general_operand (src
, GET_MODE (src
))
851 /* Never consider insns touching the register stack. It may
852 create situations that reg-stack cannot handle (e.g. a stack
853 register live across an abnormal edge). */
854 && (REGNO (dest
) < FIRST_STACK_REG
|| REGNO (dest
) > LAST_STACK_REG
)
856 /* An expression is not available if its operands are
857 subsequently modified, including this insn. */
858 && oprs_unchanged_p (src
, insn
, true))
860 insert_expr_in_table (src
, insn
);
863 else if (REG_P (src
))
865 /* Only record sets of pseudo-regs in the hash table. */
866 if (/* Don't CSE something if we can't do a reg/reg copy. */
867 can_copy_p (GET_MODE (src
))
868 /* Is SET_DEST something we want to gcse? */
869 && general_operand (dest
, GET_MODE (dest
))
871 /* As above for STACK_REGS. */
872 && (REGNO (src
) < FIRST_STACK_REG
|| REGNO (src
) > LAST_STACK_REG
)
874 && ! (flag_float_store
&& FLOAT_MODE_P (GET_MODE (dest
)))
875 /* Check if the memory expression is killed after insn. */
876 && ! load_killed_in_block_p (INSN_CUID (insn
) + 1, dest
, true)
877 && oprs_unchanged_p (XEXP (dest
, 0), insn
, true))
879 insert_expr_in_table (dest
, insn
);
885 /* Create hash table of memory expressions available at end of basic
886 blocks. Basically you should think of this hash table as the
887 representation of AVAIL_OUT. This is the set of expressions that
888 is generated in a basic block and not killed before the end of the
889 same basic block. Notice that this is really a local computation. */
892 compute_hash_table (void)
896 FOR_EACH_BB_FN (bb
, cfun
)
900 /* First pass over the instructions records information used to
901 determine when registers and memory are last set.
902 Since we compute a "local" AVAIL_OUT, reset the tables that
903 help us keep track of what has been modified since the start
905 reset_opr_set_tables ();
906 FOR_BB_INSNS (bb
, insn
)
909 record_opr_changes (insn
);
912 /* The next pass actually builds the hash table. */
913 FOR_BB_INSNS (bb
, insn
)
914 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SET
)
915 hash_scan_set (insn
);
920 /* Check if register REG is killed in any insn waiting to be inserted on
921 edge E. This function is required to check that our data flow analysis
922 is still valid prior to commit_edge_insertions. */
925 reg_killed_on_edge (rtx reg
, edge e
)
929 for (insn
= e
->insns
.r
; insn
; insn
= NEXT_INSN (insn
))
930 if (INSN_P (insn
) && reg_set_p (reg
, insn
))
936 /* Similar to above - check if register REG is used in any insn waiting
937 to be inserted on edge E.
938 Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
939 with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */
942 reg_used_on_edge (rtx reg
, edge e
)
946 for (insn
= e
->insns
.r
; insn
; insn
= NEXT_INSN (insn
))
947 if (INSN_P (insn
) && reg_overlap_mentioned_p (reg
, PATTERN (insn
)))
953 /* Return the loaded/stored register of a load/store instruction. */
956 get_avail_load_store_reg (rtx_insn
*insn
)
958 if (REG_P (SET_DEST (PATTERN (insn
))))
960 return SET_DEST (PATTERN (insn
));
964 gcc_assert (REG_P (SET_SRC (PATTERN (insn
))));
965 return SET_SRC (PATTERN (insn
));
969 /* Return nonzero if the predecessors of BB are "well behaved". */
972 bb_has_well_behaved_predecessors (basic_block bb
)
977 if (EDGE_COUNT (bb
->preds
) == 0)
980 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
982 if ((pred
->flags
& EDGE_ABNORMAL
) && EDGE_CRITICAL_P (pred
))
985 if ((pred
->flags
& EDGE_ABNORMAL_CALL
) && cfun
->has_nonlocal_label
)
988 if (tablejump_p (BB_END (pred
->src
), NULL
, NULL
))
995 /* Search for the occurrences of expression in BB. */
998 get_bb_avail_insn (basic_block bb
, struct occr
*orig_occr
, int bitmap_index
)
1000 struct occr
*occr
= orig_occr
;
1002 for (; occr
!= NULL
; occr
= occr
->next
)
1003 if (BLOCK_FOR_INSN (occr
->insn
) == bb
)
1006 /* If we could not find an occurrence in BB, see if BB
1007 has a single predecessor with an occurrence that is
1008 transparent through BB. */
1009 if (single_pred_p (bb
)
1010 && bitmap_bit_p (transp
[bb
->index
], bitmap_index
)
1011 && (occr
= get_bb_avail_insn (single_pred (bb
), orig_occr
, bitmap_index
)))
1013 rtx avail_reg
= get_avail_load_store_reg (occr
->insn
);
1014 if (!reg_set_between_p (avail_reg
,
1015 PREV_INSN (BB_HEAD (bb
)),
1016 NEXT_INSN (BB_END (bb
)))
1017 && !reg_killed_on_edge (avail_reg
, single_pred_edge (bb
)))
1025 /* This helper is called via htab_traverse. */
1027 compute_expr_transp (expr
**slot
, FILE *dump_file ATTRIBUTE_UNUSED
)
1029 struct expr
*expr
= *slot
;
1031 compute_transp (expr
->expr
, expr
->bitmap_index
, transp
,
1032 blocks_with_calls
, modify_mem_list_set
,
1033 canon_modify_mem_list
);
1037 /* This handles the case where several stores feed a partially redundant
1038 load. It checks if the redundancy elimination is possible and if it's
1041 Redundancy elimination is possible if,
1042 1) None of the operands of an insn have been modified since the start
1043 of the current basic block.
1044 2) In any predecessor of the current basic block, the same expression
1047 See the function body for the heuristics that determine if eliminating
1048 a redundancy is also worth doing, assuming it is possible. */
1051 eliminate_partially_redundant_load (basic_block bb
, rtx_insn
*insn
,
1055 rtx_insn
*avail_insn
= NULL
;
1058 struct occr
*a_occr
;
1059 struct unoccr
*occr
, *avail_occrs
= NULL
;
1060 struct unoccr
*unoccr
, *unavail_occrs
= NULL
, *rollback_unoccr
= NULL
;
1062 gcov_type ok_count
= 0; /* Redundant load execution count. */
1063 gcov_type critical_count
= 0; /* Execution count of critical edges. */
1065 bool critical_edge_split
= false;
1067 /* The execution count of the loads to be added to make the
1068 load fully redundant. */
1069 gcov_type not_ok_count
= 0;
1070 basic_block pred_bb
;
1072 pat
= PATTERN (insn
);
1073 dest
= SET_DEST (pat
);
1075 /* Check that the loaded register is not used, set, or killed from the
1076 beginning of the block. */
1077 if (reg_changed_after_insn_p (dest
, 0)
1078 || reg_used_between_p (dest
, PREV_INSN (BB_HEAD (bb
)), insn
))
1081 /* Check potential for replacing load with copy for predecessors. */
1082 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
1084 rtx_insn
*next_pred_bb_end
;
1087 avail_reg
= NULL_RTX
;
1088 pred_bb
= pred
->src
;
1089 for (a_occr
= get_bb_avail_insn (pred_bb
,
1091 expr
->bitmap_index
);
1093 a_occr
= get_bb_avail_insn (pred_bb
,
1095 expr
->bitmap_index
))
1097 /* Check if the loaded register is not used. */
1098 avail_insn
= a_occr
->insn
;
1099 avail_reg
= get_avail_load_store_reg (avail_insn
);
1100 gcc_assert (avail_reg
);
1102 /* Make sure we can generate a move from register avail_reg to
1104 rtx_insn
*move
= gen_move_insn (copy_rtx (dest
),
1105 copy_rtx (avail_reg
));
1106 extract_insn (move
);
1107 if (! constrain_operands (1, get_preferred_alternatives (insn
,
1109 || reg_killed_on_edge (avail_reg
, pred
)
1110 || reg_used_on_edge (dest
, pred
))
1115 next_pred_bb_end
= NEXT_INSN (BB_END (BLOCK_FOR_INSN (avail_insn
)));
1116 if (!reg_set_between_p (avail_reg
, avail_insn
, next_pred_bb_end
))
1117 /* AVAIL_INSN remains non-null. */
1123 if (EDGE_CRITICAL_P (pred
))
1124 critical_count
+= pred
->count
;
1126 if (avail_insn
!= NULL_RTX
)
1129 ok_count
+= pred
->count
;
1130 if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest
),
1131 copy_rtx (avail_reg
)))))
1133 /* Check if there is going to be a split. */
1134 if (EDGE_CRITICAL_P (pred
))
1135 critical_edge_split
= true;
1137 else /* Its a dead move no need to generate. */
1139 occr
= (struct unoccr
*) obstack_alloc (&unoccr_obstack
,
1140 sizeof (struct unoccr
));
1141 occr
->insn
= avail_insn
;
1143 occr
->next
= avail_occrs
;
1145 if (! rollback_unoccr
)
1146 rollback_unoccr
= occr
;
1150 /* Adding a load on a critical edge will cause a split. */
1151 if (EDGE_CRITICAL_P (pred
))
1152 critical_edge_split
= true;
1153 not_ok_count
+= pred
->count
;
1154 unoccr
= (struct unoccr
*) obstack_alloc (&unoccr_obstack
,
1155 sizeof (struct unoccr
));
1156 unoccr
->insn
= NULL
;
1157 unoccr
->pred
= pred
;
1158 unoccr
->next
= unavail_occrs
;
1159 unavail_occrs
= unoccr
;
1160 if (! rollback_unoccr
)
1161 rollback_unoccr
= unoccr
;
1165 if (/* No load can be replaced by copy. */
1167 /* Prevent exploding the code. */
1168 || (optimize_bb_for_size_p (bb
) && npred_ok
> 1)
1169 /* If we don't have profile information we cannot tell if splitting
1170 a critical edge is profitable or not so don't do it. */
1171 || ((! profile_info
|| ! flag_branch_probabilities
1172 || targetm
.cannot_modify_jumps_p ())
1173 && critical_edge_split
))
1176 /* Check if it's worth applying the partial redundancy elimination. */
1177 if (ok_count
< GCSE_AFTER_RELOAD_PARTIAL_FRACTION
* not_ok_count
)
1179 if (ok_count
< GCSE_AFTER_RELOAD_CRITICAL_FRACTION
* critical_count
)
1182 /* Generate moves to the loaded register from where
1183 the memory is available. */
1184 for (occr
= avail_occrs
; occr
; occr
= occr
->next
)
1186 avail_insn
= occr
->insn
;
1188 /* Set avail_reg to be the register having the value of the
1190 avail_reg
= get_avail_load_store_reg (avail_insn
);
1191 gcc_assert (avail_reg
);
1193 insert_insn_on_edge (gen_move_insn (copy_rtx (dest
),
1194 copy_rtx (avail_reg
)),
1196 stats
.moves_inserted
++;
1200 "generating move from %d to %d on edge from %d to %d\n",
1207 /* Regenerate loads where the memory is unavailable. */
1208 for (unoccr
= unavail_occrs
; unoccr
; unoccr
= unoccr
->next
)
1210 pred
= unoccr
->pred
;
1211 insert_insn_on_edge (copy_insn (PATTERN (insn
)), pred
);
1212 stats
.copies_inserted
++;
1217 "generating on edge from %d to %d a copy of load: ",
1220 print_rtl (dump_file
, PATTERN (insn
));
1221 fprintf (dump_file
, "\n");
1225 /* Delete the insn if it is not available in this block and mark it
1226 for deletion if it is available. If insn is available it may help
1227 discover additional redundancies, so mark it for later deletion. */
1228 for (a_occr
= get_bb_avail_insn (bb
, expr
->avail_occr
, expr
->bitmap_index
);
1229 a_occr
&& (a_occr
->insn
!= insn
);
1230 a_occr
= get_bb_avail_insn (bb
, a_occr
->next
, expr
->bitmap_index
))
1235 stats
.insns_deleted
++;
1239 fprintf (dump_file
, "deleting insn:\n");
1240 print_rtl_single (dump_file
, insn
);
1241 fprintf (dump_file
, "\n");
1246 a_occr
->deleted_p
= 1;
1249 if (rollback_unoccr
)
1250 obstack_free (&unoccr_obstack
, rollback_unoccr
);
1253 /* Performing the redundancy elimination as described before. */
1256 eliminate_partially_redundant_loads (void)
1261 /* Note we start at block 1. */
1263 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1267 ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
->next_bb
,
1268 EXIT_BLOCK_PTR_FOR_FN (cfun
),
1271 /* Don't try anything on basic blocks with strange predecessors. */
1272 if (! bb_has_well_behaved_predecessors (bb
))
1275 /* Do not try anything on cold basic blocks. */
1276 if (optimize_bb_for_size_p (bb
))
1279 /* Reset the table of things changed since the start of the current
1281 reset_opr_set_tables ();
1283 /* Look at all insns in the current basic block and see if there are
1284 any loads in it that we can record. */
1285 FOR_BB_INSNS (bb
, insn
)
1287 /* Is it a load - of the form (set (reg) (mem))? */
1288 if (NONJUMP_INSN_P (insn
)
1289 && GET_CODE (PATTERN (insn
)) == SET
1290 && REG_P (SET_DEST (PATTERN (insn
)))
1291 && MEM_P (SET_SRC (PATTERN (insn
))))
1293 rtx pat
= PATTERN (insn
);
1294 rtx src
= SET_SRC (pat
);
1297 if (!MEM_VOLATILE_P (src
)
1298 && GET_MODE (src
) != BLKmode
1299 && general_operand (src
, GET_MODE (src
))
1300 /* Are the operands unchanged since the start of the
1302 && oprs_unchanged_p (src
, insn
, false)
1303 && !(cfun
->can_throw_non_call_exceptions
&& may_trap_p (src
))
1304 && !side_effects_p (src
)
1305 /* Is the expression recorded? */
1306 && (expr
= lookup_expr_in_table (src
)) != NULL
)
1308 /* We now have a load (insn) and an available memory at
1309 its BB start (expr). Try to remove the loads if it is
1311 eliminate_partially_redundant_load (bb
, insn
, expr
);
1315 /* Keep track of everything modified by this insn, so that we
1316 know what has been modified since the start of the current
1319 record_opr_changes (insn
);
1323 commit_edge_insertions ();
1326 /* Go over the expression hash table and delete insns that were
1327 marked for later deletion. */
1329 /* This helper is called via htab_traverse. */
1331 delete_redundant_insns_1 (expr
**slot
, void *data ATTRIBUTE_UNUSED
)
1333 struct expr
*exprs
= *slot
;
1336 for (occr
= exprs
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
1338 if (occr
->deleted_p
&& dbg_cnt (gcse2_delete
))
1340 delete_insn (occr
->insn
);
1341 stats
.insns_deleted
++;
1345 fprintf (dump_file
, "deleting insn:\n");
1346 print_rtl_single (dump_file
, occr
->insn
);
1347 fprintf (dump_file
, "\n");
1356 delete_redundant_insns (void)
1358 expr_table
->traverse
<void *, delete_redundant_insns_1
> (NULL
);
1360 fprintf (dump_file
, "\n");
1363 /* Main entry point of the GCSE after reload - clean some redundant loads
1367 gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED
)
1370 memset (&stats
, 0, sizeof (stats
));
1372 /* Allocate memory for this pass.
1373 Also computes and initializes the insns' CUIDs. */
1376 /* We need alias analysis. */
1377 init_alias_analysis ();
1379 compute_hash_table ();
1382 dump_hash_table (dump_file
);
1384 if (expr_table
->elements () > 0)
1386 /* Knowing which MEMs are transparent through a block can signifiantly
1387 increase the number of redundant loads found. So compute transparency
1388 information for each memory expression in the hash table. */
1390 /* This can not be part of the normal allocation routine because
1391 we have to know the number of elements in the hash table. */
1392 transp
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
1393 expr_table
->elements ());
1394 bitmap_vector_ones (transp
, last_basic_block_for_fn (cfun
));
1395 expr_table
->traverse
<FILE *, compute_expr_transp
> (dump_file
);
1396 eliminate_partially_redundant_loads ();
1397 delete_redundant_insns ();
1398 sbitmap_vector_free (transp
);
1402 fprintf (dump_file
, "GCSE AFTER RELOAD stats:\n");
1403 fprintf (dump_file
, "copies inserted: %d\n", stats
.copies_inserted
);
1404 fprintf (dump_file
, "moves inserted: %d\n", stats
.moves_inserted
);
1405 fprintf (dump_file
, "insns deleted: %d\n", stats
.insns_deleted
);
1406 fprintf (dump_file
, "\n\n");
1409 statistics_counter_event (cfun
, "copies inserted",
1410 stats
.copies_inserted
);
1411 statistics_counter_event (cfun
, "moves inserted",
1412 stats
.moves_inserted
);
1413 statistics_counter_event (cfun
, "insns deleted",
1414 stats
.insns_deleted
);
1417 /* We are finished with alias. */
1418 end_alias_analysis ();
1426 rest_of_handle_gcse2 (void)
1428 gcse_after_reload_main (get_insns ());
1429 rebuild_jump_labels (get_insns ());
1435 const pass_data pass_data_gcse2
=
1437 RTL_PASS
, /* type */
1439 OPTGROUP_NONE
, /* optinfo_flags */
1440 TV_GCSE_AFTER_RELOAD
, /* tv_id */
1441 0, /* properties_required */
1442 0, /* properties_provided */
1443 0, /* properties_destroyed */
1444 0, /* todo_flags_start */
1445 0, /* todo_flags_finish */
1448 class pass_gcse2
: public rtl_opt_pass
1451 pass_gcse2 (gcc::context
*ctxt
)
1452 : rtl_opt_pass (pass_data_gcse2
, ctxt
)
1455 /* opt_pass methods: */
1456 virtual bool gate (function
*fun
)
1458 return (optimize
> 0 && flag_gcse_after_reload
1459 && optimize_function_for_speed_p (fun
));
1462 virtual unsigned int execute (function
*) { return rest_of_handle_gcse2 (); }
1464 }; // class pass_gcse2
1469 make_pass_gcse2 (gcc::context
*ctxt
)
1471 return new pass_gcse2 (ctxt
);