Daily bump.
[official-gcc.git] / gcc / postreload-gcse.c
bloba1ffdb4d632c78bf1d2f535ec42f265458d2ee18
1 /* Post reload partially redundant load elimination
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "diagnostic-core.h"
26 #include "rtl.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "tm_p.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "recog.h"
36 #include "predict.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "cfgrtl.h"
41 #include "basic-block.h"
42 #include "profile.h"
43 #include "expmed.h"
44 #include "dojump.h"
45 #include "explow.h"
46 #include "calls.h"
47 #include "emit-rtl.h"
48 #include "varasm.h"
49 #include "stmt.h"
50 #include "expr.h"
51 #include "except.h"
52 #include "intl.h"
53 #include "obstack.h"
54 #include "params.h"
55 #include "target.h"
56 #include "tree-pass.h"
57 #include "dbgcnt.h"
58 #include "df.h"
59 #include "gcse-common.h"
61 /* The following code implements gcse after reload, the purpose of this
62 pass is to cleanup redundant loads generated by reload and other
63 optimizations that come after gcse. It searches for simple inter-block
64 redundancies and tries to eliminate them by adding moves and loads
65 in cold places.
67 Perform partially redundant load elimination, try to eliminate redundant
68 loads created by the reload pass. We try to look for full or partial
69 redundant loads fed by one or more loads/stores in predecessor BBs,
70 and try adding loads to make them fully redundant. We also check if
71 it's worth adding loads to be able to delete the redundant load.
73 Algorithm:
74 1. Build available expressions hash table:
75 For each load/store instruction, if the loaded/stored memory didn't
76 change until the end of the basic block add this memory expression to
77 the hash table.
78 2. Perform Redundancy elimination:
79 For each load instruction do the following:
80 perform partial redundancy elimination, check if it's worth adding
81 loads to make the load fully redundant. If so add loads and
82 register copies and delete the load.
83 3. Delete instructions made redundant in step 2.
85 Future enhancement:
86 If the loaded register is used/defined between load and some store,
87 look for some other free register between load and all its stores,
88 and replace the load with a copy from this register to the loaded
89 register.
93 /* Keep statistics of this pass. */
94 static struct
96 int moves_inserted;
97 int copies_inserted;
98 int insns_deleted;
99 } stats;
101 /* We need to keep a hash table of expressions. The table entries are of
102 type 'struct expr', and for each expression there is a single linked
103 list of occurrences. */
105 /* Expression elements in the hash table. */
106 struct expr
108 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
109 rtx expr;
111 /* The same hash for this entry. */
112 hashval_t hash;
114 /* Index in the transparent bitmaps. */
115 unsigned int bitmap_index;
117 /* List of available occurrence in basic blocks in the function. */
118 struct occr *avail_occr;
121 /* Hashtable helpers. */
123 struct expr_hasher : nofree_ptr_hash <expr>
125 static inline hashval_t hash (const expr *);
126 static inline bool equal (const expr *, const expr *);
130 /* Hash expression X.
131 DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
132 or if the expression contains something we don't want to insert in the
133 table. */
135 static hashval_t
136 hash_expr (rtx x, int *do_not_record_p)
138 *do_not_record_p = 0;
139 return hash_rtx (x, GET_MODE (x), do_not_record_p,
140 NULL, /*have_reg_qty=*/false);
143 /* Callback for hashtab.
144 Return the hash value for expression EXP. We don't actually hash
145 here, we just return the cached hash value. */
147 inline hashval_t
148 expr_hasher::hash (const expr *exp)
150 return exp->hash;
153 /* Callback for hashtab.
154 Return nonzero if exp1 is equivalent to exp2. */
156 inline bool
157 expr_hasher::equal (const expr *exp1, const expr *exp2)
159 int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
161 gcc_assert (!equiv_p || exp1->hash == exp2->hash);
162 return equiv_p;
165 /* The table itself. */
166 static hash_table<expr_hasher> *expr_table;
169 static struct obstack expr_obstack;
171 /* Occurrence of an expression.
172 There is at most one occurrence per basic block. If a pattern appears
173 more than once, the last appearance is used. */
175 struct occr
177 /* Next occurrence of this expression. */
178 struct occr *next;
179 /* The insn that computes the expression. */
180 rtx_insn *insn;
181 /* Nonzero if this [anticipatable] occurrence has been deleted. */
182 char deleted_p;
185 static struct obstack occr_obstack;
187 /* The following structure holds the information about the occurrences of
188 the redundant instructions. */
189 struct unoccr
191 struct unoccr *next;
192 edge pred;
193 rtx_insn *insn;
196 static struct obstack unoccr_obstack;
198 /* Array where each element is the CUID if the insn that last set the hard
199 register with the number of the element, since the start of the current
200 basic block.
202 This array is used during the building of the hash table (step 1) to
203 determine if a reg is killed before the end of a basic block.
205 It is also used when eliminating partial redundancies (step 2) to see
206 if a reg was modified since the start of a basic block. */
207 static int *reg_avail_info;
209 /* A list of insns that may modify memory within the current basic block. */
210 struct modifies_mem
212 rtx_insn *insn;
213 struct modifies_mem *next;
215 static struct modifies_mem *modifies_mem_list;
217 /* The modifies_mem structs also go on an obstack, only this obstack is
218 freed each time after completing the analysis or transformations on
219 a basic block. So we allocate a dummy modifies_mem_obstack_bottom
220 object on the obstack to keep track of the bottom of the obstack. */
221 static struct obstack modifies_mem_obstack;
222 static struct modifies_mem *modifies_mem_obstack_bottom;
224 /* Mapping of insn UIDs to CUIDs.
225 CUIDs are like UIDs except they increase monotonically in each basic
226 block, have no gaps, and only apply to real insns. */
227 static int *uid_cuid;
228 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
230 /* Bitmap of blocks which have memory stores. */
231 static bitmap modify_mem_list_set;
233 /* Bitmap of blocks which have calls. */
234 static bitmap blocks_with_calls;
236 /* Vector indexed by block # with a list of all the insns that
237 modify memory within the block. */
238 static vec<rtx_insn *> *modify_mem_list;
240 /* Vector indexed by block # with a canonicalized list of insns
241 that modify memory in the block. */
242 static vec<modify_pair> *canon_modify_mem_list;
244 /* Vector of simple bitmaps indexed by block number. Each component sbitmap
245 indicates which expressions are transparent through the block. */
246 static sbitmap *transp;
249 /* Helpers for memory allocation/freeing. */
250 static void alloc_mem (void);
251 static void free_mem (void);
253 /* Support for hash table construction and transformations. */
254 static bool oprs_unchanged_p (rtx, rtx_insn *, bool);
255 static void record_last_reg_set_info (rtx_insn *, rtx);
256 static void record_last_reg_set_info_regno (rtx_insn *, int);
257 static void record_last_mem_set_info (rtx_insn *);
258 static void record_last_set_info (rtx, const_rtx, void *);
259 static void record_opr_changes (rtx_insn *);
261 static void find_mem_conflicts (rtx, const_rtx, void *);
262 static int load_killed_in_block_p (int, rtx, bool);
263 static void reset_opr_set_tables (void);
265 /* Hash table support. */
266 static hashval_t hash_expr (rtx, int *);
267 static void insert_expr_in_table (rtx, rtx_insn *);
268 static struct expr *lookup_expr_in_table (rtx);
269 static void dump_hash_table (FILE *);
271 /* Helpers for eliminate_partially_redundant_load. */
272 static bool reg_killed_on_edge (rtx, edge);
273 static bool reg_used_on_edge (rtx, edge);
275 static rtx get_avail_load_store_reg (rtx_insn *);
277 static bool bb_has_well_behaved_predecessors (basic_block);
278 static struct occr* get_bb_avail_insn (basic_block, struct occr *, int);
279 static void hash_scan_set (rtx_insn *);
280 static void compute_hash_table (void);
282 /* The work horses of this pass. */
283 static void eliminate_partially_redundant_load (basic_block,
284 rtx_insn *,
285 struct expr *);
286 static void eliminate_partially_redundant_loads (void);
289 /* Allocate memory for the CUID mapping array and register/memory
290 tracking tables. */
292 static void
293 alloc_mem (void)
295 int i;
296 basic_block bb;
297 rtx_insn *insn;
299 /* Find the largest UID and create a mapping from UIDs to CUIDs. */
300 uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
301 i = 1;
302 FOR_EACH_BB_FN (bb, cfun)
303 FOR_BB_INSNS (bb, insn)
305 if (INSN_P (insn))
306 uid_cuid[INSN_UID (insn)] = i++;
307 else
308 uid_cuid[INSN_UID (insn)] = i;
311 /* Allocate the available expressions hash table. We don't want to
312 make the hash table too small, but unnecessarily making it too large
313 also doesn't help. The i/4 is a gcse.c relic, and seems like a
314 reasonable choice. */
315 expr_table = new hash_table<expr_hasher> (MAX (i / 4, 13));
317 /* We allocate everything on obstacks because we often can roll back
318 the whole obstack to some point. Freeing obstacks is very fast. */
319 gcc_obstack_init (&expr_obstack);
320 gcc_obstack_init (&occr_obstack);
321 gcc_obstack_init (&unoccr_obstack);
322 gcc_obstack_init (&modifies_mem_obstack);
324 /* Working array used to track the last set for each register
325 in the current block. */
326 reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
328 /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
329 can roll it back in reset_opr_set_tables. */
330 modifies_mem_obstack_bottom =
331 (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
332 sizeof (struct modifies_mem));
334 blocks_with_calls = BITMAP_ALLOC (NULL);
335 modify_mem_list_set = BITMAP_ALLOC (NULL);
337 modify_mem_list = (vec_rtx_heap *) xcalloc (last_basic_block_for_fn (cfun),
338 sizeof (vec_rtx_heap));
339 canon_modify_mem_list
340 = (vec_modify_pair_heap *) xcalloc (last_basic_block_for_fn (cfun),
341 sizeof (vec_modify_pair_heap));
344 /* Free memory allocated by alloc_mem. */
346 static void
347 free_mem (void)
349 free (uid_cuid);
351 delete expr_table;
352 expr_table = NULL;
354 obstack_free (&expr_obstack, NULL);
355 obstack_free (&occr_obstack, NULL);
356 obstack_free (&unoccr_obstack, NULL);
357 obstack_free (&modifies_mem_obstack, NULL);
359 unsigned i;
360 bitmap_iterator bi;
361 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
363 modify_mem_list[i].release ();
364 canon_modify_mem_list[i].release ();
367 BITMAP_FREE (blocks_with_calls);
368 BITMAP_FREE (modify_mem_list_set);
369 free (reg_avail_info);
373 /* Insert expression X in INSN in the hash TABLE.
374 If it is already present, record it as the last occurrence in INSN's
375 basic block. */
377 static void
378 insert_expr_in_table (rtx x, rtx_insn *insn)
380 int do_not_record_p;
381 hashval_t hash;
382 struct expr *cur_expr, **slot;
383 struct occr *avail_occr, *last_occr = NULL;
385 hash = hash_expr (x, &do_not_record_p);
387 /* Do not insert expression in the table if it contains volatile operands,
388 or if hash_expr determines the expression is something we don't want
389 to or can't handle. */
390 if (do_not_record_p)
391 return;
393 /* We anticipate that redundant expressions are rare, so for convenience
394 allocate a new hash table element here already and set its fields.
395 If we don't do this, we need a hack with a static struct expr. Anyway,
396 obstack_free is really fast and one more obstack_alloc doesn't hurt if
397 we're going to see more expressions later on. */
398 cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
399 sizeof (struct expr));
400 cur_expr->expr = x;
401 cur_expr->hash = hash;
402 cur_expr->avail_occr = NULL;
404 slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
406 if (! (*slot))
408 /* The expression isn't found, so insert it. */
409 *slot = cur_expr;
411 /* Anytime we add an entry to the table, record the index
412 of the new entry. The bitmap index starts counting
413 at zero. */
414 cur_expr->bitmap_index = expr_table->elements () - 1;
416 else
418 /* The expression is already in the table, so roll back the
419 obstack and use the existing table entry. */
420 obstack_free (&expr_obstack, cur_expr);
421 cur_expr = *slot;
424 /* Search for another occurrence in the same basic block. */
425 avail_occr = cur_expr->avail_occr;
426 while (avail_occr
427 && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
429 /* If an occurrence isn't found, save a pointer to the end of
430 the list. */
431 last_occr = avail_occr;
432 avail_occr = avail_occr->next;
435 if (avail_occr)
436 /* Found another instance of the expression in the same basic block.
437 Prefer this occurrence to the currently recorded one. We want
438 the last one in the block and the block is scanned from start
439 to end. */
440 avail_occr->insn = insn;
441 else
443 /* First occurrence of this expression in this basic block. */
444 avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
445 sizeof (struct occr));
447 /* First occurrence of this expression in any block? */
448 if (cur_expr->avail_occr == NULL)
449 cur_expr->avail_occr = avail_occr;
450 else
451 last_occr->next = avail_occr;
453 avail_occr->insn = insn;
454 avail_occr->next = NULL;
455 avail_occr->deleted_p = 0;
460 /* Lookup pattern PAT in the expression hash table.
461 The result is a pointer to the table entry, or NULL if not found. */
463 static struct expr *
464 lookup_expr_in_table (rtx pat)
466 int do_not_record_p;
467 struct expr **slot, *tmp_expr;
468 hashval_t hash = hash_expr (pat, &do_not_record_p);
470 if (do_not_record_p)
471 return NULL;
473 tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
474 sizeof (struct expr));
475 tmp_expr->expr = pat;
476 tmp_expr->hash = hash;
477 tmp_expr->avail_occr = NULL;
479 slot = expr_table->find_slot_with_hash (tmp_expr, hash, INSERT);
480 obstack_free (&expr_obstack, tmp_expr);
482 if (!slot)
483 return NULL;
484 else
485 return (*slot);
489 /* Dump all expressions and occurrences that are currently in the
490 expression hash table to FILE. */
492 /* This helper is called via htab_traverse. */
494 dump_expr_hash_table_entry (expr **slot, FILE *file)
496 struct expr *exprs = *slot;
497 struct occr *occr;
499 fprintf (file, "expr: ");
500 print_rtl (file, exprs->expr);
501 fprintf (file,"\nhashcode: %u\n", exprs->hash);
502 fprintf (file,"list of occurrences:\n");
503 occr = exprs->avail_occr;
504 while (occr)
506 rtx_insn *insn = occr->insn;
507 print_rtl_single (file, insn);
508 fprintf (file, "\n");
509 occr = occr->next;
511 fprintf (file, "\n");
512 return 1;
515 static void
516 dump_hash_table (FILE *file)
518 fprintf (file, "\n\nexpression hash table\n");
519 fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
520 (long) expr_table->size (),
521 (long) expr_table->elements (),
522 expr_table->collisions ());
523 if (expr_table->elements () > 0)
525 fprintf (file, "\n\ntable entries:\n");
526 expr_table->traverse <FILE *, dump_expr_hash_table_entry> (file);
528 fprintf (file, "\n");
531 /* Return true if register X is recorded as being set by an instruction
532 whose CUID is greater than the one given. */
534 static bool
535 reg_changed_after_insn_p (rtx x, int cuid)
537 unsigned int regno, end_regno;
539 regno = REGNO (x);
540 end_regno = END_REGNO (x);
542 if (reg_avail_info[regno] > cuid)
543 return true;
544 while (++regno < end_regno);
545 return false;
548 /* Return nonzero if the operands of expression X are unchanged
549 1) from the start of INSN's basic block up to but not including INSN
550 if AFTER_INSN is false, or
551 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */
553 static bool
554 oprs_unchanged_p (rtx x, rtx_insn *insn, bool after_insn)
556 int i, j;
557 enum rtx_code code;
558 const char *fmt;
560 if (x == 0)
561 return 1;
563 code = GET_CODE (x);
564 switch (code)
566 case REG:
567 /* We are called after register allocation. */
568 gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
569 if (after_insn)
570 return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
571 else
572 return !reg_changed_after_insn_p (x, 0);
574 case MEM:
575 if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
576 return 0;
577 else
578 return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
580 case PC:
581 case CC0: /*FIXME*/
582 case CONST:
583 CASE_CONST_ANY:
584 case SYMBOL_REF:
585 case LABEL_REF:
586 case ADDR_VEC:
587 case ADDR_DIFF_VEC:
588 return 1;
590 case PRE_DEC:
591 case PRE_INC:
592 case POST_DEC:
593 case POST_INC:
594 case PRE_MODIFY:
595 case POST_MODIFY:
596 if (after_insn)
597 return 0;
598 break;
600 default:
601 break;
604 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
606 if (fmt[i] == 'e')
608 if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
609 return 0;
611 else if (fmt[i] == 'E')
612 for (j = 0; j < XVECLEN (x, i); j++)
613 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
614 return 0;
617 return 1;
621 /* Used for communication between find_mem_conflicts and
622 load_killed_in_block_p. Nonzero if find_mem_conflicts finds a
623 conflict between two memory references.
624 This is a bit of a hack to work around the limitations of note_stores. */
625 static int mems_conflict_p;
627 /* DEST is the output of an instruction. If it is a memory reference, and
628 possibly conflicts with the load found in DATA, then set mems_conflict_p
629 to a nonzero value. */
631 static void
632 find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
633 void *data)
635 rtx mem_op = (rtx) data;
637 while (GET_CODE (dest) == SUBREG
638 || GET_CODE (dest) == ZERO_EXTRACT
639 || GET_CODE (dest) == STRICT_LOW_PART)
640 dest = XEXP (dest, 0);
642 /* If DEST is not a MEM, then it will not conflict with the load. Note
643 that function calls are assumed to clobber memory, but are handled
644 elsewhere. */
645 if (! MEM_P (dest))
646 return;
648 if (true_dependence (dest, GET_MODE (dest), mem_op))
649 mems_conflict_p = 1;
653 /* Return nonzero if the expression in X (a memory reference) is killed
654 in the current basic block before (if AFTER_INSN is false) or after
655 (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
657 This function assumes that the modifies_mem table is flushed when
658 the hash table construction or redundancy elimination phases start
659 processing a new basic block. */
661 static int
662 load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
664 struct modifies_mem *list_entry = modifies_mem_list;
666 while (list_entry)
668 rtx_insn *setter = list_entry->insn;
670 /* Ignore entries in the list that do not apply. */
671 if ((after_insn
672 && INSN_CUID (setter) < uid_limit)
673 || (! after_insn
674 && INSN_CUID (setter) > uid_limit))
676 list_entry = list_entry->next;
677 continue;
680 /* If SETTER is a call everything is clobbered. Note that calls
681 to pure functions are never put on the list, so we need not
682 worry about them. */
683 if (CALL_P (setter))
684 return 1;
686 /* SETTER must be an insn of some kind that sets memory. Call
687 note_stores to examine each hunk of memory that is modified.
688 It will set mems_conflict_p to nonzero if there may be a
689 conflict between X and SETTER. */
690 mems_conflict_p = 0;
691 note_stores (PATTERN (setter), find_mem_conflicts, x);
692 if (mems_conflict_p)
693 return 1;
695 list_entry = list_entry->next;
697 return 0;
701 /* Record register first/last/block set information for REGNO in INSN. */
703 static inline void
704 record_last_reg_set_info (rtx_insn *insn, rtx reg)
706 unsigned int regno, end_regno;
708 regno = REGNO (reg);
709 end_regno = END_REGNO (reg);
711 reg_avail_info[regno] = INSN_CUID (insn);
712 while (++regno < end_regno);
715 static inline void
716 record_last_reg_set_info_regno (rtx_insn *insn, int regno)
718 reg_avail_info[regno] = INSN_CUID (insn);
722 /* Record memory modification information for INSN. We do not actually care
723 about the memory location(s) that are set, or even how they are set (consider
724 a CALL_INSN). We merely need to record which insns modify memory. */
726 static void
727 record_last_mem_set_info (rtx_insn *insn)
729 struct modifies_mem *list_entry;
731 list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
732 sizeof (struct modifies_mem));
733 list_entry->insn = insn;
734 list_entry->next = modifies_mem_list;
735 modifies_mem_list = list_entry;
737 record_last_mem_set_info_common (insn, modify_mem_list,
738 canon_modify_mem_list,
739 modify_mem_list_set,
740 blocks_with_calls);
743 /* Called from compute_hash_table via note_stores to handle one
744 SET or CLOBBER in an insn. DATA is really the instruction in which
745 the SET is taking place. */
747 static void
748 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
750 rtx_insn *last_set_insn = (rtx_insn *) data;
752 if (GET_CODE (dest) == SUBREG)
753 dest = SUBREG_REG (dest);
755 if (REG_P (dest))
756 record_last_reg_set_info (last_set_insn, dest);
757 else if (MEM_P (dest))
759 /* Ignore pushes, they don't clobber memory. They may still
760 clobber the stack pointer though. Some targets do argument
761 pushes without adding REG_INC notes. See e.g. PR25196,
762 where a pushsi2 on i386 doesn't have REG_INC notes. Note
763 such changes here too. */
764 if (! push_operand (dest, GET_MODE (dest)))
765 record_last_mem_set_info (last_set_insn);
766 else
767 record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
772 /* Reset tables used to keep track of what's still available since the
773 start of the block. */
775 static void
776 reset_opr_set_tables (void)
778 memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
779 obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
780 modifies_mem_list = NULL;
784 /* Record things set by INSN.
785 This data is used by oprs_unchanged_p. */
787 static void
788 record_opr_changes (rtx_insn *insn)
790 rtx note;
792 /* Find all stores and record them. */
793 note_stores (PATTERN (insn), record_last_set_info, insn);
795 /* Also record autoincremented REGs for this insn as changed. */
796 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
797 if (REG_NOTE_KIND (note) == REG_INC)
798 record_last_reg_set_info (insn, XEXP (note, 0));
800 /* Finally, if this is a call, record all call clobbers. */
801 if (CALL_P (insn))
803 unsigned int regno;
804 rtx link, x;
805 hard_reg_set_iterator hrsi;
806 EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi)
807 record_last_reg_set_info_regno (insn, regno);
809 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
810 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
812 x = XEXP (XEXP (link, 0), 0);
813 if (REG_P (x))
815 gcc_assert (HARD_REGISTER_P (x));
816 record_last_reg_set_info (insn, x);
820 if (! RTL_CONST_OR_PURE_CALL_P (insn))
821 record_last_mem_set_info (insn);
826 /* Scan the pattern of INSN and add an entry to the hash TABLE.
827 After reload we are interested in loads/stores only. */
829 static void
830 hash_scan_set (rtx_insn *insn)
832 rtx pat = PATTERN (insn);
833 rtx src = SET_SRC (pat);
834 rtx dest = SET_DEST (pat);
836 /* We are only interested in loads and stores. */
837 if (! MEM_P (src) && ! MEM_P (dest))
838 return;
840 /* Don't mess with jumps and nops. */
841 if (JUMP_P (insn) || set_noop_p (pat))
842 return;
844 if (REG_P (dest))
846 if (/* Don't CSE something if we can't do a reg/reg copy. */
847 can_copy_p (GET_MODE (dest))
848 /* Is SET_SRC something we want to gcse? */
849 && general_operand (src, GET_MODE (src))
850 #ifdef STACK_REGS
851 /* Never consider insns touching the register stack. It may
852 create situations that reg-stack cannot handle (e.g. a stack
853 register live across an abnormal edge). */
854 && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
855 #endif
856 /* An expression is not available if its operands are
857 subsequently modified, including this insn. */
858 && oprs_unchanged_p (src, insn, true))
860 insert_expr_in_table (src, insn);
863 else if (REG_P (src))
865 /* Only record sets of pseudo-regs in the hash table. */
866 if (/* Don't CSE something if we can't do a reg/reg copy. */
867 can_copy_p (GET_MODE (src))
868 /* Is SET_DEST something we want to gcse? */
869 && general_operand (dest, GET_MODE (dest))
870 #ifdef STACK_REGS
871 /* As above for STACK_REGS. */
872 && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
873 #endif
874 && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
875 /* Check if the memory expression is killed after insn. */
876 && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
877 && oprs_unchanged_p (XEXP (dest, 0), insn, true))
879 insert_expr_in_table (dest, insn);
885 /* Create hash table of memory expressions available at end of basic
886 blocks. Basically you should think of this hash table as the
887 representation of AVAIL_OUT. This is the set of expressions that
888 is generated in a basic block and not killed before the end of the
889 same basic block. Notice that this is really a local computation. */
891 static void
892 compute_hash_table (void)
894 basic_block bb;
896 FOR_EACH_BB_FN (bb, cfun)
898 rtx_insn *insn;
900 /* First pass over the instructions records information used to
901 determine when registers and memory are last set.
902 Since we compute a "local" AVAIL_OUT, reset the tables that
903 help us keep track of what has been modified since the start
904 of the block. */
905 reset_opr_set_tables ();
906 FOR_BB_INSNS (bb, insn)
908 if (INSN_P (insn))
909 record_opr_changes (insn);
912 /* The next pass actually builds the hash table. */
913 FOR_BB_INSNS (bb, insn)
914 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
915 hash_scan_set (insn);
920 /* Check if register REG is killed in any insn waiting to be inserted on
921 edge E. This function is required to check that our data flow analysis
922 is still valid prior to commit_edge_insertions. */
924 static bool
925 reg_killed_on_edge (rtx reg, edge e)
927 rtx_insn *insn;
929 for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
930 if (INSN_P (insn) && reg_set_p (reg, insn))
931 return true;
933 return false;
936 /* Similar to above - check if register REG is used in any insn waiting
937 to be inserted on edge E.
938 Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
939 with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */
941 static bool
942 reg_used_on_edge (rtx reg, edge e)
944 rtx_insn *insn;
946 for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
947 if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
948 return true;
950 return false;
953 /* Return the loaded/stored register of a load/store instruction. */
955 static rtx
956 get_avail_load_store_reg (rtx_insn *insn)
958 if (REG_P (SET_DEST (PATTERN (insn))))
959 /* A load. */
960 return SET_DEST (PATTERN (insn));
961 else
963 /* A store. */
964 gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
965 return SET_SRC (PATTERN (insn));
969 /* Return nonzero if the predecessors of BB are "well behaved". */
971 static bool
972 bb_has_well_behaved_predecessors (basic_block bb)
974 edge pred;
975 edge_iterator ei;
977 if (EDGE_COUNT (bb->preds) == 0)
978 return false;
980 FOR_EACH_EDGE (pred, ei, bb->preds)
982 if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
983 return false;
985 if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
986 return false;
988 if (tablejump_p (BB_END (pred->src), NULL, NULL))
989 return false;
991 return true;
995 /* Search for the occurrences of expression in BB. */
997 static struct occr*
998 get_bb_avail_insn (basic_block bb, struct occr *orig_occr, int bitmap_index)
1000 struct occr *occr = orig_occr;
1002 for (; occr != NULL; occr = occr->next)
1003 if (BLOCK_FOR_INSN (occr->insn) == bb)
1004 return occr;
1006 /* If we could not find an occurrence in BB, see if BB
1007 has a single predecessor with an occurrence that is
1008 transparent through BB. */
1009 if (single_pred_p (bb)
1010 && bitmap_bit_p (transp[bb->index], bitmap_index)
1011 && (occr = get_bb_avail_insn (single_pred (bb), orig_occr, bitmap_index)))
1013 rtx avail_reg = get_avail_load_store_reg (occr->insn);
1014 if (!reg_set_between_p (avail_reg,
1015 PREV_INSN (BB_HEAD (bb)),
1016 NEXT_INSN (BB_END (bb)))
1017 && !reg_killed_on_edge (avail_reg, single_pred_edge (bb)))
1018 return occr;
1021 return NULL;
1025 /* This helper is called via htab_traverse. */
1027 compute_expr_transp (expr **slot, FILE *dump_file ATTRIBUTE_UNUSED)
1029 struct expr *expr = *slot;
1031 compute_transp (expr->expr, expr->bitmap_index, transp,
1032 blocks_with_calls, modify_mem_list_set,
1033 canon_modify_mem_list);
1034 return 1;
1037 /* This handles the case where several stores feed a partially redundant
1038 load. It checks if the redundancy elimination is possible and if it's
1039 worth it.
1041 Redundancy elimination is possible if,
1042 1) None of the operands of an insn have been modified since the start
1043 of the current basic block.
1044 2) In any predecessor of the current basic block, the same expression
1045 is generated.
1047 See the function body for the heuristics that determine if eliminating
1048 a redundancy is also worth doing, assuming it is possible. */
1050 static void
1051 eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn,
1052 struct expr *expr)
1054 edge pred;
1055 rtx_insn *avail_insn = NULL;
1056 rtx avail_reg;
1057 rtx dest, pat;
1058 struct occr *a_occr;
1059 struct unoccr *occr, *avail_occrs = NULL;
1060 struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
1061 int npred_ok = 0;
1062 gcov_type ok_count = 0; /* Redundant load execution count. */
1063 gcov_type critical_count = 0; /* Execution count of critical edges. */
1064 edge_iterator ei;
1065 bool critical_edge_split = false;
1067 /* The execution count of the loads to be added to make the
1068 load fully redundant. */
1069 gcov_type not_ok_count = 0;
1070 basic_block pred_bb;
1072 pat = PATTERN (insn);
1073 dest = SET_DEST (pat);
1075 /* Check that the loaded register is not used, set, or killed from the
1076 beginning of the block. */
1077 if (reg_changed_after_insn_p (dest, 0)
1078 || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
1079 return;
1081 /* Check potential for replacing load with copy for predecessors. */
1082 FOR_EACH_EDGE (pred, ei, bb->preds)
1084 rtx_insn *next_pred_bb_end;
1086 avail_insn = NULL;
1087 avail_reg = NULL_RTX;
1088 pred_bb = pred->src;
1089 for (a_occr = get_bb_avail_insn (pred_bb,
1090 expr->avail_occr,
1091 expr->bitmap_index);
1092 a_occr;
1093 a_occr = get_bb_avail_insn (pred_bb,
1094 a_occr->next,
1095 expr->bitmap_index))
1097 /* Check if the loaded register is not used. */
1098 avail_insn = a_occr->insn;
1099 avail_reg = get_avail_load_store_reg (avail_insn);
1100 gcc_assert (avail_reg);
1102 /* Make sure we can generate a move from register avail_reg to
1103 dest. */
1104 rtx_insn *move = gen_move_insn (copy_rtx (dest),
1105 copy_rtx (avail_reg));
1106 extract_insn (move);
1107 if (! constrain_operands (1, get_preferred_alternatives (insn,
1108 pred_bb))
1109 || reg_killed_on_edge (avail_reg, pred)
1110 || reg_used_on_edge (dest, pred))
1112 avail_insn = NULL;
1113 continue;
1115 next_pred_bb_end = NEXT_INSN (BB_END (BLOCK_FOR_INSN (avail_insn)));
1116 if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1117 /* AVAIL_INSN remains non-null. */
1118 break;
1119 else
1120 avail_insn = NULL;
1123 if (EDGE_CRITICAL_P (pred))
1124 critical_count += pred->count;
1126 if (avail_insn != NULL_RTX)
1128 npred_ok++;
1129 ok_count += pred->count;
1130 if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1131 copy_rtx (avail_reg)))))
1133 /* Check if there is going to be a split. */
1134 if (EDGE_CRITICAL_P (pred))
1135 critical_edge_split = true;
1137 else /* Its a dead move no need to generate. */
1138 continue;
1139 occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1140 sizeof (struct unoccr));
1141 occr->insn = avail_insn;
1142 occr->pred = pred;
1143 occr->next = avail_occrs;
1144 avail_occrs = occr;
1145 if (! rollback_unoccr)
1146 rollback_unoccr = occr;
1148 else
1150 /* Adding a load on a critical edge will cause a split. */
1151 if (EDGE_CRITICAL_P (pred))
1152 critical_edge_split = true;
1153 not_ok_count += pred->count;
1154 unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1155 sizeof (struct unoccr));
1156 unoccr->insn = NULL;
1157 unoccr->pred = pred;
1158 unoccr->next = unavail_occrs;
1159 unavail_occrs = unoccr;
1160 if (! rollback_unoccr)
1161 rollback_unoccr = unoccr;
1165 if (/* No load can be replaced by copy. */
1166 npred_ok == 0
1167 /* Prevent exploding the code. */
1168 || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1169 /* If we don't have profile information we cannot tell if splitting
1170 a critical edge is profitable or not so don't do it. */
1171 || ((! profile_info || ! flag_branch_probabilities
1172 || targetm.cannot_modify_jumps_p ())
1173 && critical_edge_split))
1174 goto cleanup;
1176 /* Check if it's worth applying the partial redundancy elimination. */
1177 if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
1178 goto cleanup;
1179 if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
1180 goto cleanup;
1182 /* Generate moves to the loaded register from where
1183 the memory is available. */
1184 for (occr = avail_occrs; occr; occr = occr->next)
1186 avail_insn = occr->insn;
1187 pred = occr->pred;
1188 /* Set avail_reg to be the register having the value of the
1189 memory. */
1190 avail_reg = get_avail_load_store_reg (avail_insn);
1191 gcc_assert (avail_reg);
1193 insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1194 copy_rtx (avail_reg)),
1195 pred);
1196 stats.moves_inserted++;
1198 if (dump_file)
1199 fprintf (dump_file,
1200 "generating move from %d to %d on edge from %d to %d\n",
1201 REGNO (avail_reg),
1202 REGNO (dest),
1203 pred->src->index,
1204 pred->dest->index);
1207 /* Regenerate loads where the memory is unavailable. */
1208 for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1210 pred = unoccr->pred;
1211 insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1212 stats.copies_inserted++;
1214 if (dump_file)
1216 fprintf (dump_file,
1217 "generating on edge from %d to %d a copy of load: ",
1218 pred->src->index,
1219 pred->dest->index);
1220 print_rtl (dump_file, PATTERN (insn));
1221 fprintf (dump_file, "\n");
1225 /* Delete the insn if it is not available in this block and mark it
1226 for deletion if it is available. If insn is available it may help
1227 discover additional redundancies, so mark it for later deletion. */
1228 for (a_occr = get_bb_avail_insn (bb, expr->avail_occr, expr->bitmap_index);
1229 a_occr && (a_occr->insn != insn);
1230 a_occr = get_bb_avail_insn (bb, a_occr->next, expr->bitmap_index))
1233 if (!a_occr)
1235 stats.insns_deleted++;
1237 if (dump_file)
1239 fprintf (dump_file, "deleting insn:\n");
1240 print_rtl_single (dump_file, insn);
1241 fprintf (dump_file, "\n");
1243 delete_insn (insn);
1245 else
1246 a_occr->deleted_p = 1;
1248 cleanup:
1249 if (rollback_unoccr)
1250 obstack_free (&unoccr_obstack, rollback_unoccr);
1253 /* Performing the redundancy elimination as described before. */
1255 static void
1256 eliminate_partially_redundant_loads (void)
1258 rtx_insn *insn;
1259 basic_block bb;
1261 /* Note we start at block 1. */
1263 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1264 return;
1266 FOR_BB_BETWEEN (bb,
1267 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->next_bb,
1268 EXIT_BLOCK_PTR_FOR_FN (cfun),
1269 next_bb)
1271 /* Don't try anything on basic blocks with strange predecessors. */
1272 if (! bb_has_well_behaved_predecessors (bb))
1273 continue;
1275 /* Do not try anything on cold basic blocks. */
1276 if (optimize_bb_for_size_p (bb))
1277 continue;
1279 /* Reset the table of things changed since the start of the current
1280 basic block. */
1281 reset_opr_set_tables ();
1283 /* Look at all insns in the current basic block and see if there are
1284 any loads in it that we can record. */
1285 FOR_BB_INSNS (bb, insn)
1287 /* Is it a load - of the form (set (reg) (mem))? */
1288 if (NONJUMP_INSN_P (insn)
1289 && GET_CODE (PATTERN (insn)) == SET
1290 && REG_P (SET_DEST (PATTERN (insn)))
1291 && MEM_P (SET_SRC (PATTERN (insn))))
1293 rtx pat = PATTERN (insn);
1294 rtx src = SET_SRC (pat);
1295 struct expr *expr;
1297 if (!MEM_VOLATILE_P (src)
1298 && GET_MODE (src) != BLKmode
1299 && general_operand (src, GET_MODE (src))
1300 /* Are the operands unchanged since the start of the
1301 block? */
1302 && oprs_unchanged_p (src, insn, false)
1303 && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1304 && !side_effects_p (src)
1305 /* Is the expression recorded? */
1306 && (expr = lookup_expr_in_table (src)) != NULL)
1308 /* We now have a load (insn) and an available memory at
1309 its BB start (expr). Try to remove the loads if it is
1310 redundant. */
1311 eliminate_partially_redundant_load (bb, insn, expr);
1315 /* Keep track of everything modified by this insn, so that we
1316 know what has been modified since the start of the current
1317 basic block. */
1318 if (INSN_P (insn))
1319 record_opr_changes (insn);
1323 commit_edge_insertions ();
1326 /* Go over the expression hash table and delete insns that were
1327 marked for later deletion. */
1329 /* This helper is called via htab_traverse. */
1331 delete_redundant_insns_1 (expr **slot, void *data ATTRIBUTE_UNUSED)
1333 struct expr *exprs = *slot;
1334 struct occr *occr;
1336 for (occr = exprs->avail_occr; occr != NULL; occr = occr->next)
1338 if (occr->deleted_p && dbg_cnt (gcse2_delete))
1340 delete_insn (occr->insn);
1341 stats.insns_deleted++;
1343 if (dump_file)
1345 fprintf (dump_file, "deleting insn:\n");
1346 print_rtl_single (dump_file, occr->insn);
1347 fprintf (dump_file, "\n");
1352 return 1;
1355 static void
1356 delete_redundant_insns (void)
1358 expr_table->traverse <void *, delete_redundant_insns_1> (NULL);
1359 if (dump_file)
1360 fprintf (dump_file, "\n");
1363 /* Main entry point of the GCSE after reload - clean some redundant loads
1364 due to spilling. */
1366 static void
1367 gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1370 memset (&stats, 0, sizeof (stats));
1372 /* Allocate memory for this pass.
1373 Also computes and initializes the insns' CUIDs. */
1374 alloc_mem ();
1376 /* We need alias analysis. */
1377 init_alias_analysis ();
1379 compute_hash_table ();
1381 if (dump_file)
1382 dump_hash_table (dump_file);
1384 if (expr_table->elements () > 0)
1386 /* Knowing which MEMs are transparent through a block can signifiantly
1387 increase the number of redundant loads found. So compute transparency
1388 information for each memory expression in the hash table. */
1389 df_analyze ();
1390 /* This can not be part of the normal allocation routine because
1391 we have to know the number of elements in the hash table. */
1392 transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1393 expr_table->elements ());
1394 bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
1395 expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
1396 eliminate_partially_redundant_loads ();
1397 delete_redundant_insns ();
1398 sbitmap_vector_free (transp);
1400 if (dump_file)
1402 fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1403 fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1404 fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted);
1405 fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted);
1406 fprintf (dump_file, "\n\n");
1409 statistics_counter_event (cfun, "copies inserted",
1410 stats.copies_inserted);
1411 statistics_counter_event (cfun, "moves inserted",
1412 stats.moves_inserted);
1413 statistics_counter_event (cfun, "insns deleted",
1414 stats.insns_deleted);
1417 /* We are finished with alias. */
1418 end_alias_analysis ();
1420 free_mem ();
1425 static unsigned int
1426 rest_of_handle_gcse2 (void)
1428 gcse_after_reload_main (get_insns ());
1429 rebuild_jump_labels (get_insns ());
1430 return 0;
1433 namespace {
1435 const pass_data pass_data_gcse2 =
1437 RTL_PASS, /* type */
1438 "gcse2", /* name */
1439 OPTGROUP_NONE, /* optinfo_flags */
1440 TV_GCSE_AFTER_RELOAD, /* tv_id */
1441 0, /* properties_required */
1442 0, /* properties_provided */
1443 0, /* properties_destroyed */
1444 0, /* todo_flags_start */
1445 0, /* todo_flags_finish */
1448 class pass_gcse2 : public rtl_opt_pass
1450 public:
1451 pass_gcse2 (gcc::context *ctxt)
1452 : rtl_opt_pass (pass_data_gcse2, ctxt)
1455 /* opt_pass methods: */
1456 virtual bool gate (function *fun)
1458 return (optimize > 0 && flag_gcse_after_reload
1459 && optimize_function_for_speed_p (fun));
1462 virtual unsigned int execute (function *) { return rest_of_handle_gcse2 (); }
1464 }; // class pass_gcse2
1466 } // anon namespace
1468 rtl_opt_pass *
1469 make_pass_gcse2 (gcc::context *ctxt)
1471 return new pass_gcse2 (ctxt);