2015-01-14 Sandra Loosemore <sandra@codesourcery.com>
[official-gcc.git] / gcc / postreload-gcse.c
blob8d826328bd358836263000be4ebe08981ae5855d
1 /* Post reload partially redundant load elimination
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "diagnostic-core.h"
26 #include "hash-table.h"
27 #include "rtl.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "tm_p.h"
39 #include "regs.h"
40 #include "hard-reg-set.h"
41 #include "flags.h"
42 #include "insn-config.h"
43 #include "recog.h"
44 #include "predict.h"
45 #include "input.h"
46 #include "function.h"
47 #include "dominance.h"
48 #include "cfg.h"
49 #include "cfgrtl.h"
50 #include "basic-block.h"
51 #include "profile.h"
52 #include "expr.h"
53 #include "except.h"
54 #include "intl.h"
55 #include "obstack.h"
56 #include "params.h"
57 #include "target.h"
58 #include "tree-pass.h"
59 #include "dbgcnt.h"
61 /* The following code implements gcse after reload, the purpose of this
62 pass is to cleanup redundant loads generated by reload and other
63 optimizations that come after gcse. It searches for simple inter-block
64 redundancies and tries to eliminate them by adding moves and loads
65 in cold places.
67 Perform partially redundant load elimination, try to eliminate redundant
68 loads created by the reload pass. We try to look for full or partial
69 redundant loads fed by one or more loads/stores in predecessor BBs,
70 and try adding loads to make them fully redundant. We also check if
71 it's worth adding loads to be able to delete the redundant load.
73 Algorithm:
74 1. Build available expressions hash table:
75 For each load/store instruction, if the loaded/stored memory didn't
76 change until the end of the basic block add this memory expression to
77 the hash table.
78 2. Perform Redundancy elimination:
79 For each load instruction do the following:
80 perform partial redundancy elimination, check if it's worth adding
81 loads to make the load fully redundant. If so add loads and
82 register copies and delete the load.
83 3. Delete instructions made redundant in step 2.
85 Future enhancement:
86 If the loaded register is used/defined between load and some store,
87 look for some other free register between load and all its stores,
88 and replace the load with a copy from this register to the loaded
89 register.
93 /* Keep statistics of this pass. */
94 static struct
96 int moves_inserted;
97 int copies_inserted;
98 int insns_deleted;
99 } stats;
101 /* We need to keep a hash table of expressions. The table entries are of
102 type 'struct expr', and for each expression there is a single linked
103 list of occurrences. */
105 /* Expression elements in the hash table. */
106 struct expr
108 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
109 rtx expr;
111 /* The same hash for this entry. */
112 hashval_t hash;
114 /* List of available occurrence in basic blocks in the function. */
115 struct occr *avail_occr;
118 /* Hashtable helpers. */
120 struct expr_hasher : typed_noop_remove <expr>
122 typedef expr value_type;
123 typedef expr compare_type;
124 static inline hashval_t hash (const value_type *);
125 static inline bool equal (const value_type *, const compare_type *);
129 /* Hash expression X.
130 DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
131 or if the expression contains something we don't want to insert in the
132 table. */
134 static hashval_t
135 hash_expr (rtx x, int *do_not_record_p)
137 *do_not_record_p = 0;
138 return hash_rtx (x, GET_MODE (x), do_not_record_p,
139 NULL, /*have_reg_qty=*/false);
142 /* Callback for hashtab.
143 Return the hash value for expression EXP. We don't actually hash
144 here, we just return the cached hash value. */
146 inline hashval_t
147 expr_hasher::hash (const value_type *exp)
149 return exp->hash;
152 /* Callback for hashtab.
153 Return nonzero if exp1 is equivalent to exp2. */
155 inline bool
156 expr_hasher::equal (const value_type *exp1, const compare_type *exp2)
158 int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
160 gcc_assert (!equiv_p || exp1->hash == exp2->hash);
161 return equiv_p;
164 /* The table itself. */
165 static hash_table<expr_hasher> *expr_table;
168 static struct obstack expr_obstack;
170 /* Occurrence of an expression.
171 There is at most one occurrence per basic block. If a pattern appears
172 more than once, the last appearance is used. */
174 struct occr
176 /* Next occurrence of this expression. */
177 struct occr *next;
178 /* The insn that computes the expression. */
179 rtx_insn *insn;
180 /* Nonzero if this [anticipatable] occurrence has been deleted. */
181 char deleted_p;
184 static struct obstack occr_obstack;
186 /* The following structure holds the information about the occurrences of
187 the redundant instructions. */
188 struct unoccr
190 struct unoccr *next;
191 edge pred;
192 rtx_insn *insn;
195 static struct obstack unoccr_obstack;
197 /* Array where each element is the CUID if the insn that last set the hard
198 register with the number of the element, since the start of the current
199 basic block.
201 This array is used during the building of the hash table (step 1) to
202 determine if a reg is killed before the end of a basic block.
204 It is also used when eliminating partial redundancies (step 2) to see
205 if a reg was modified since the start of a basic block. */
206 static int *reg_avail_info;
208 /* A list of insns that may modify memory within the current basic block. */
209 struct modifies_mem
211 rtx_insn *insn;
212 struct modifies_mem *next;
214 static struct modifies_mem *modifies_mem_list;
216 /* The modifies_mem structs also go on an obstack, only this obstack is
217 freed each time after completing the analysis or transformations on
218 a basic block. So we allocate a dummy modifies_mem_obstack_bottom
219 object on the obstack to keep track of the bottom of the obstack. */
220 static struct obstack modifies_mem_obstack;
221 static struct modifies_mem *modifies_mem_obstack_bottom;
223 /* Mapping of insn UIDs to CUIDs.
224 CUIDs are like UIDs except they increase monotonically in each basic
225 block, have no gaps, and only apply to real insns. */
226 static int *uid_cuid;
227 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
230 /* Helpers for memory allocation/freeing. */
231 static void alloc_mem (void);
232 static void free_mem (void);
234 /* Support for hash table construction and transformations. */
235 static bool oprs_unchanged_p (rtx, rtx_insn *, bool);
236 static void record_last_reg_set_info (rtx_insn *, rtx);
237 static void record_last_reg_set_info_regno (rtx_insn *, int);
238 static void record_last_mem_set_info (rtx_insn *);
239 static void record_last_set_info (rtx, const_rtx, void *);
240 static void record_opr_changes (rtx_insn *);
242 static void find_mem_conflicts (rtx, const_rtx, void *);
243 static int load_killed_in_block_p (int, rtx, bool);
244 static void reset_opr_set_tables (void);
246 /* Hash table support. */
247 static hashval_t hash_expr (rtx, int *);
248 static void insert_expr_in_table (rtx, rtx_insn *);
249 static struct expr *lookup_expr_in_table (rtx);
250 static void dump_hash_table (FILE *);
252 /* Helpers for eliminate_partially_redundant_load. */
253 static bool reg_killed_on_edge (rtx, edge);
254 static bool reg_used_on_edge (rtx, edge);
256 static rtx get_avail_load_store_reg (rtx_insn *);
258 static bool bb_has_well_behaved_predecessors (basic_block);
259 static struct occr* get_bb_avail_insn (basic_block, struct occr *);
260 static void hash_scan_set (rtx_insn *);
261 static void compute_hash_table (void);
263 /* The work horses of this pass. */
264 static void eliminate_partially_redundant_load (basic_block,
265 rtx_insn *,
266 struct expr *);
267 static void eliminate_partially_redundant_loads (void);
270 /* Allocate memory for the CUID mapping array and register/memory
271 tracking tables. */
273 static void
274 alloc_mem (void)
276 int i;
277 basic_block bb;
278 rtx_insn *insn;
280 /* Find the largest UID and create a mapping from UIDs to CUIDs. */
281 uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
282 i = 1;
283 FOR_EACH_BB_FN (bb, cfun)
284 FOR_BB_INSNS (bb, insn)
286 if (INSN_P (insn))
287 uid_cuid[INSN_UID (insn)] = i++;
288 else
289 uid_cuid[INSN_UID (insn)] = i;
292 /* Allocate the available expressions hash table. We don't want to
293 make the hash table too small, but unnecessarily making it too large
294 also doesn't help. The i/4 is a gcse.c relic, and seems like a
295 reasonable choice. */
296 expr_table = new hash_table<expr_hasher> (MAX (i / 4, 13));
298 /* We allocate everything on obstacks because we often can roll back
299 the whole obstack to some point. Freeing obstacks is very fast. */
300 gcc_obstack_init (&expr_obstack);
301 gcc_obstack_init (&occr_obstack);
302 gcc_obstack_init (&unoccr_obstack);
303 gcc_obstack_init (&modifies_mem_obstack);
305 /* Working array used to track the last set for each register
306 in the current block. */
307 reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
309 /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
310 can roll it back in reset_opr_set_tables. */
311 modifies_mem_obstack_bottom =
312 (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
313 sizeof (struct modifies_mem));
316 /* Free memory allocated by alloc_mem. */
318 static void
319 free_mem (void)
321 free (uid_cuid);
323 delete expr_table;
324 expr_table = NULL;
326 obstack_free (&expr_obstack, NULL);
327 obstack_free (&occr_obstack, NULL);
328 obstack_free (&unoccr_obstack, NULL);
329 obstack_free (&modifies_mem_obstack, NULL);
331 free (reg_avail_info);
335 /* Insert expression X in INSN in the hash TABLE.
336 If it is already present, record it as the last occurrence in INSN's
337 basic block. */
339 static void
340 insert_expr_in_table (rtx x, rtx_insn *insn)
342 int do_not_record_p;
343 hashval_t hash;
344 struct expr *cur_expr, **slot;
345 struct occr *avail_occr, *last_occr = NULL;
347 hash = hash_expr (x, &do_not_record_p);
349 /* Do not insert expression in the table if it contains volatile operands,
350 or if hash_expr determines the expression is something we don't want
351 to or can't handle. */
352 if (do_not_record_p)
353 return;
355 /* We anticipate that redundant expressions are rare, so for convenience
356 allocate a new hash table element here already and set its fields.
357 If we don't do this, we need a hack with a static struct expr. Anyway,
358 obstack_free is really fast and one more obstack_alloc doesn't hurt if
359 we're going to see more expressions later on. */
360 cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
361 sizeof (struct expr));
362 cur_expr->expr = x;
363 cur_expr->hash = hash;
364 cur_expr->avail_occr = NULL;
366 slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
368 if (! (*slot))
369 /* The expression isn't found, so insert it. */
370 *slot = cur_expr;
371 else
373 /* The expression is already in the table, so roll back the
374 obstack and use the existing table entry. */
375 obstack_free (&expr_obstack, cur_expr);
376 cur_expr = *slot;
379 /* Search for another occurrence in the same basic block. */
380 avail_occr = cur_expr->avail_occr;
381 while (avail_occr
382 && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
384 /* If an occurrence isn't found, save a pointer to the end of
385 the list. */
386 last_occr = avail_occr;
387 avail_occr = avail_occr->next;
390 if (avail_occr)
391 /* Found another instance of the expression in the same basic block.
392 Prefer this occurrence to the currently recorded one. We want
393 the last one in the block and the block is scanned from start
394 to end. */
395 avail_occr->insn = insn;
396 else
398 /* First occurrence of this expression in this basic block. */
399 avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
400 sizeof (struct occr));
402 /* First occurrence of this expression in any block? */
403 if (cur_expr->avail_occr == NULL)
404 cur_expr->avail_occr = avail_occr;
405 else
406 last_occr->next = avail_occr;
408 avail_occr->insn = insn;
409 avail_occr->next = NULL;
410 avail_occr->deleted_p = 0;
415 /* Lookup pattern PAT in the expression hash table.
416 The result is a pointer to the table entry, or NULL if not found. */
418 static struct expr *
419 lookup_expr_in_table (rtx pat)
421 int do_not_record_p;
422 struct expr **slot, *tmp_expr;
423 hashval_t hash = hash_expr (pat, &do_not_record_p);
425 if (do_not_record_p)
426 return NULL;
428 tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
429 sizeof (struct expr));
430 tmp_expr->expr = pat;
431 tmp_expr->hash = hash;
432 tmp_expr->avail_occr = NULL;
434 slot = expr_table->find_slot_with_hash (tmp_expr, hash, INSERT);
435 obstack_free (&expr_obstack, tmp_expr);
437 if (!slot)
438 return NULL;
439 else
440 return (*slot);
444 /* Dump all expressions and occurrences that are currently in the
445 expression hash table to FILE. */
447 /* This helper is called via htab_traverse. */
449 dump_expr_hash_table_entry (expr **slot, FILE *file)
451 struct expr *exprs = *slot;
452 struct occr *occr;
454 fprintf (file, "expr: ");
455 print_rtl (file, exprs->expr);
456 fprintf (file,"\nhashcode: %u\n", exprs->hash);
457 fprintf (file,"list of occurrences:\n");
458 occr = exprs->avail_occr;
459 while (occr)
461 rtx_insn *insn = occr->insn;
462 print_rtl_single (file, insn);
463 fprintf (file, "\n");
464 occr = occr->next;
466 fprintf (file, "\n");
467 return 1;
470 static void
471 dump_hash_table (FILE *file)
473 fprintf (file, "\n\nexpression hash table\n");
474 fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
475 (long) expr_table->size (),
476 (long) expr_table->elements (),
477 expr_table->collisions ());
478 if (expr_table->elements () > 0)
480 fprintf (file, "\n\ntable entries:\n");
481 expr_table->traverse <FILE *, dump_expr_hash_table_entry> (file);
483 fprintf (file, "\n");
486 /* Return true if register X is recorded as being set by an instruction
487 whose CUID is greater than the one given. */
489 static bool
490 reg_changed_after_insn_p (rtx x, int cuid)
492 unsigned int regno, end_regno;
494 regno = REGNO (x);
495 end_regno = END_HARD_REGNO (x);
497 if (reg_avail_info[regno] > cuid)
498 return true;
499 while (++regno < end_regno);
500 return false;
503 /* Return nonzero if the operands of expression X are unchanged
504 1) from the start of INSN's basic block up to but not including INSN
505 if AFTER_INSN is false, or
506 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */
508 static bool
509 oprs_unchanged_p (rtx x, rtx_insn *insn, bool after_insn)
511 int i, j;
512 enum rtx_code code;
513 const char *fmt;
515 if (x == 0)
516 return 1;
518 code = GET_CODE (x);
519 switch (code)
521 case REG:
522 /* We are called after register allocation. */
523 gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
524 if (after_insn)
525 return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
526 else
527 return !reg_changed_after_insn_p (x, 0);
529 case MEM:
530 if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
531 return 0;
532 else
533 return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
535 case PC:
536 case CC0: /*FIXME*/
537 case CONST:
538 CASE_CONST_ANY:
539 case SYMBOL_REF:
540 case LABEL_REF:
541 case ADDR_VEC:
542 case ADDR_DIFF_VEC:
543 return 1;
545 case PRE_DEC:
546 case PRE_INC:
547 case POST_DEC:
548 case POST_INC:
549 case PRE_MODIFY:
550 case POST_MODIFY:
551 if (after_insn)
552 return 0;
553 break;
555 default:
556 break;
559 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
561 if (fmt[i] == 'e')
563 if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
564 return 0;
566 else if (fmt[i] == 'E')
567 for (j = 0; j < XVECLEN (x, i); j++)
568 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
569 return 0;
572 return 1;
576 /* Used for communication between find_mem_conflicts and
577 load_killed_in_block_p. Nonzero if find_mem_conflicts finds a
578 conflict between two memory references.
579 This is a bit of a hack to work around the limitations of note_stores. */
580 static int mems_conflict_p;
582 /* DEST is the output of an instruction. If it is a memory reference, and
583 possibly conflicts with the load found in DATA, then set mems_conflict_p
584 to a nonzero value. */
586 static void
587 find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
588 void *data)
590 rtx mem_op = (rtx) data;
592 while (GET_CODE (dest) == SUBREG
593 || GET_CODE (dest) == ZERO_EXTRACT
594 || GET_CODE (dest) == STRICT_LOW_PART)
595 dest = XEXP (dest, 0);
597 /* If DEST is not a MEM, then it will not conflict with the load. Note
598 that function calls are assumed to clobber memory, but are handled
599 elsewhere. */
600 if (! MEM_P (dest))
601 return;
603 if (true_dependence (dest, GET_MODE (dest), mem_op))
604 mems_conflict_p = 1;
608 /* Return nonzero if the expression in X (a memory reference) is killed
609 in the current basic block before (if AFTER_INSN is false) or after
610 (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
612 This function assumes that the modifies_mem table is flushed when
613 the hash table construction or redundancy elimination phases start
614 processing a new basic block. */
616 static int
617 load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
619 struct modifies_mem *list_entry = modifies_mem_list;
621 while (list_entry)
623 rtx_insn *setter = list_entry->insn;
625 /* Ignore entries in the list that do not apply. */
626 if ((after_insn
627 && INSN_CUID (setter) < uid_limit)
628 || (! after_insn
629 && INSN_CUID (setter) > uid_limit))
631 list_entry = list_entry->next;
632 continue;
635 /* If SETTER is a call everything is clobbered. Note that calls
636 to pure functions are never put on the list, so we need not
637 worry about them. */
638 if (CALL_P (setter))
639 return 1;
641 /* SETTER must be an insn of some kind that sets memory. Call
642 note_stores to examine each hunk of memory that is modified.
643 It will set mems_conflict_p to nonzero if there may be a
644 conflict between X and SETTER. */
645 mems_conflict_p = 0;
646 note_stores (PATTERN (setter), find_mem_conflicts, x);
647 if (mems_conflict_p)
648 return 1;
650 list_entry = list_entry->next;
652 return 0;
656 /* Record register first/last/block set information for REGNO in INSN. */
658 static inline void
659 record_last_reg_set_info (rtx_insn *insn, rtx reg)
661 unsigned int regno, end_regno;
663 regno = REGNO (reg);
664 end_regno = END_HARD_REGNO (reg);
666 reg_avail_info[regno] = INSN_CUID (insn);
667 while (++regno < end_regno);
670 static inline void
671 record_last_reg_set_info_regno (rtx_insn *insn, int regno)
673 reg_avail_info[regno] = INSN_CUID (insn);
677 /* Record memory modification information for INSN. We do not actually care
678 about the memory location(s) that are set, or even how they are set (consider
679 a CALL_INSN). We merely need to record which insns modify memory. */
681 static void
682 record_last_mem_set_info (rtx_insn *insn)
684 struct modifies_mem *list_entry;
686 list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
687 sizeof (struct modifies_mem));
688 list_entry->insn = insn;
689 list_entry->next = modifies_mem_list;
690 modifies_mem_list = list_entry;
693 /* Called from compute_hash_table via note_stores to handle one
694 SET or CLOBBER in an insn. DATA is really the instruction in which
695 the SET is taking place. */
697 static void
698 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
700 rtx_insn *last_set_insn = (rtx_insn *) data;
702 if (GET_CODE (dest) == SUBREG)
703 dest = SUBREG_REG (dest);
705 if (REG_P (dest))
706 record_last_reg_set_info (last_set_insn, dest);
707 else if (MEM_P (dest))
709 /* Ignore pushes, they don't clobber memory. They may still
710 clobber the stack pointer though. Some targets do argument
711 pushes without adding REG_INC notes. See e.g. PR25196,
712 where a pushsi2 on i386 doesn't have REG_INC notes. Note
713 such changes here too. */
714 if (! push_operand (dest, GET_MODE (dest)))
715 record_last_mem_set_info (last_set_insn);
716 else
717 record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
722 /* Reset tables used to keep track of what's still available since the
723 start of the block. */
725 static void
726 reset_opr_set_tables (void)
728 memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
729 obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
730 modifies_mem_list = NULL;
734 /* Record things set by INSN.
735 This data is used by oprs_unchanged_p. */
737 static void
738 record_opr_changes (rtx_insn *insn)
740 rtx note;
742 /* Find all stores and record them. */
743 note_stores (PATTERN (insn), record_last_set_info, insn);
745 /* Also record autoincremented REGs for this insn as changed. */
746 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
747 if (REG_NOTE_KIND (note) == REG_INC)
748 record_last_reg_set_info (insn, XEXP (note, 0));
750 /* Finally, if this is a call, record all call clobbers. */
751 if (CALL_P (insn))
753 unsigned int regno;
754 rtx link, x;
755 hard_reg_set_iterator hrsi;
756 EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi)
757 record_last_reg_set_info_regno (insn, regno);
759 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
760 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
762 x = XEXP (XEXP (link, 0), 0);
763 if (REG_P (x))
765 gcc_assert (HARD_REGISTER_P (x));
766 record_last_reg_set_info (insn, x);
770 if (! RTL_CONST_OR_PURE_CALL_P (insn))
771 record_last_mem_set_info (insn);
776 /* Scan the pattern of INSN and add an entry to the hash TABLE.
777 After reload we are interested in loads/stores only. */
779 static void
780 hash_scan_set (rtx_insn *insn)
782 rtx pat = PATTERN (insn);
783 rtx src = SET_SRC (pat);
784 rtx dest = SET_DEST (pat);
786 /* We are only interested in loads and stores. */
787 if (! MEM_P (src) && ! MEM_P (dest))
788 return;
790 /* Don't mess with jumps and nops. */
791 if (JUMP_P (insn) || set_noop_p (pat))
792 return;
794 if (REG_P (dest))
796 if (/* Don't CSE something if we can't do a reg/reg copy. */
797 can_copy_p (GET_MODE (dest))
798 /* Is SET_SRC something we want to gcse? */
799 && general_operand (src, GET_MODE (src))
800 #ifdef STACK_REGS
801 /* Never consider insns touching the register stack. It may
802 create situations that reg-stack cannot handle (e.g. a stack
803 register live across an abnormal edge). */
804 && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
805 #endif
806 /* An expression is not available if its operands are
807 subsequently modified, including this insn. */
808 && oprs_unchanged_p (src, insn, true))
810 insert_expr_in_table (src, insn);
813 else if (REG_P (src))
815 /* Only record sets of pseudo-regs in the hash table. */
816 if (/* Don't CSE something if we can't do a reg/reg copy. */
817 can_copy_p (GET_MODE (src))
818 /* Is SET_DEST something we want to gcse? */
819 && general_operand (dest, GET_MODE (dest))
820 #ifdef STACK_REGS
821 /* As above for STACK_REGS. */
822 && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
823 #endif
824 && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
825 /* Check if the memory expression is killed after insn. */
826 && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
827 && oprs_unchanged_p (XEXP (dest, 0), insn, true))
829 insert_expr_in_table (dest, insn);
835 /* Create hash table of memory expressions available at end of basic
836 blocks. Basically you should think of this hash table as the
837 representation of AVAIL_OUT. This is the set of expressions that
838 is generated in a basic block and not killed before the end of the
839 same basic block. Notice that this is really a local computation. */
841 static void
842 compute_hash_table (void)
844 basic_block bb;
846 FOR_EACH_BB_FN (bb, cfun)
848 rtx_insn *insn;
850 /* First pass over the instructions records information used to
851 determine when registers and memory are last set.
852 Since we compute a "local" AVAIL_OUT, reset the tables that
853 help us keep track of what has been modified since the start
854 of the block. */
855 reset_opr_set_tables ();
856 FOR_BB_INSNS (bb, insn)
858 if (INSN_P (insn))
859 record_opr_changes (insn);
862 /* The next pass actually builds the hash table. */
863 FOR_BB_INSNS (bb, insn)
864 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
865 hash_scan_set (insn);
870 /* Check if register REG is killed in any insn waiting to be inserted on
871 edge E. This function is required to check that our data flow analysis
872 is still valid prior to commit_edge_insertions. */
874 static bool
875 reg_killed_on_edge (rtx reg, edge e)
877 rtx_insn *insn;
879 for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
880 if (INSN_P (insn) && reg_set_p (reg, insn))
881 return true;
883 return false;
886 /* Similar to above - check if register REG is used in any insn waiting
887 to be inserted on edge E.
888 Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
889 with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */
891 static bool
892 reg_used_on_edge (rtx reg, edge e)
894 rtx_insn *insn;
896 for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
897 if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
898 return true;
900 return false;
903 /* Return the loaded/stored register of a load/store instruction. */
905 static rtx
906 get_avail_load_store_reg (rtx_insn *insn)
908 if (REG_P (SET_DEST (PATTERN (insn))))
909 /* A load. */
910 return SET_DEST (PATTERN (insn));
911 else
913 /* A store. */
914 gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
915 return SET_SRC (PATTERN (insn));
919 /* Return nonzero if the predecessors of BB are "well behaved". */
921 static bool
922 bb_has_well_behaved_predecessors (basic_block bb)
924 edge pred;
925 edge_iterator ei;
927 if (EDGE_COUNT (bb->preds) == 0)
928 return false;
930 FOR_EACH_EDGE (pred, ei, bb->preds)
932 if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
933 return false;
935 if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
936 return false;
938 if (tablejump_p (BB_END (pred->src), NULL, NULL))
939 return false;
941 return true;
945 /* Search for the occurrences of expression in BB. */
947 static struct occr*
948 get_bb_avail_insn (basic_block bb, struct occr *occr)
950 for (; occr != NULL; occr = occr->next)
951 if (BLOCK_FOR_INSN (occr->insn) == bb)
952 return occr;
953 return NULL;
957 /* This handles the case where several stores feed a partially redundant
958 load. It checks if the redundancy elimination is possible and if it's
959 worth it.
961 Redundancy elimination is possible if,
962 1) None of the operands of an insn have been modified since the start
963 of the current basic block.
964 2) In any predecessor of the current basic block, the same expression
965 is generated.
967 See the function body for the heuristics that determine if eliminating
968 a redundancy is also worth doing, assuming it is possible. */
970 static void
971 eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn,
972 struct expr *expr)
974 edge pred;
975 rtx_insn *avail_insn = NULL;
976 rtx avail_reg;
977 rtx dest, pat;
978 struct occr *a_occr;
979 struct unoccr *occr, *avail_occrs = NULL;
980 struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
981 int npred_ok = 0;
982 gcov_type ok_count = 0; /* Redundant load execution count. */
983 gcov_type critical_count = 0; /* Execution count of critical edges. */
984 edge_iterator ei;
985 bool critical_edge_split = false;
987 /* The execution count of the loads to be added to make the
988 load fully redundant. */
989 gcov_type not_ok_count = 0;
990 basic_block pred_bb;
992 pat = PATTERN (insn);
993 dest = SET_DEST (pat);
995 /* Check that the loaded register is not used, set, or killed from the
996 beginning of the block. */
997 if (reg_changed_after_insn_p (dest, 0)
998 || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
999 return;
1001 /* Check potential for replacing load with copy for predecessors. */
1002 FOR_EACH_EDGE (pred, ei, bb->preds)
1004 rtx_insn *next_pred_bb_end;
1006 avail_insn = NULL;
1007 avail_reg = NULL_RTX;
1008 pred_bb = pred->src;
1009 next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
1010 for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
1011 a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
1013 /* Check if the loaded register is not used. */
1014 avail_insn = a_occr->insn;
1015 avail_reg = get_avail_load_store_reg (avail_insn);
1016 gcc_assert (avail_reg);
1018 /* Make sure we can generate a move from register avail_reg to
1019 dest. */
1020 rtx_insn *move = as_a <rtx_insn *>
1021 (gen_move_insn (copy_rtx (dest), copy_rtx (avail_reg)));
1022 extract_insn (move);
1023 if (! constrain_operands (1, get_preferred_alternatives (insn,
1024 pred_bb))
1025 || reg_killed_on_edge (avail_reg, pred)
1026 || reg_used_on_edge (dest, pred))
1028 avail_insn = NULL;
1029 continue;
1031 if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1032 /* AVAIL_INSN remains non-null. */
1033 break;
1034 else
1035 avail_insn = NULL;
1038 if (EDGE_CRITICAL_P (pred))
1039 critical_count += pred->count;
1041 if (avail_insn != NULL_RTX)
1043 npred_ok++;
1044 ok_count += pred->count;
1045 if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1046 copy_rtx (avail_reg)))))
1048 /* Check if there is going to be a split. */
1049 if (EDGE_CRITICAL_P (pred))
1050 critical_edge_split = true;
1052 else /* Its a dead move no need to generate. */
1053 continue;
1054 occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1055 sizeof (struct unoccr));
1056 occr->insn = avail_insn;
1057 occr->pred = pred;
1058 occr->next = avail_occrs;
1059 avail_occrs = occr;
1060 if (! rollback_unoccr)
1061 rollback_unoccr = occr;
1063 else
1065 /* Adding a load on a critical edge will cause a split. */
1066 if (EDGE_CRITICAL_P (pred))
1067 critical_edge_split = true;
1068 not_ok_count += pred->count;
1069 unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1070 sizeof (struct unoccr));
1071 unoccr->insn = NULL;
1072 unoccr->pred = pred;
1073 unoccr->next = unavail_occrs;
1074 unavail_occrs = unoccr;
1075 if (! rollback_unoccr)
1076 rollback_unoccr = unoccr;
1080 if (/* No load can be replaced by copy. */
1081 npred_ok == 0
1082 /* Prevent exploding the code. */
1083 || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1084 /* If we don't have profile information we cannot tell if splitting
1085 a critical edge is profitable or not so don't do it. */
1086 || ((! profile_info || ! flag_branch_probabilities
1087 || targetm.cannot_modify_jumps_p ())
1088 && critical_edge_split))
1089 goto cleanup;
1091 /* Check if it's worth applying the partial redundancy elimination. */
1092 if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
1093 goto cleanup;
1094 if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
1095 goto cleanup;
1097 /* Generate moves to the loaded register from where
1098 the memory is available. */
1099 for (occr = avail_occrs; occr; occr = occr->next)
1101 avail_insn = occr->insn;
1102 pred = occr->pred;
1103 /* Set avail_reg to be the register having the value of the
1104 memory. */
1105 avail_reg = get_avail_load_store_reg (avail_insn);
1106 gcc_assert (avail_reg);
1108 insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1109 copy_rtx (avail_reg)),
1110 pred);
1111 stats.moves_inserted++;
1113 if (dump_file)
1114 fprintf (dump_file,
1115 "generating move from %d to %d on edge from %d to %d\n",
1116 REGNO (avail_reg),
1117 REGNO (dest),
1118 pred->src->index,
1119 pred->dest->index);
1122 /* Regenerate loads where the memory is unavailable. */
1123 for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1125 pred = unoccr->pred;
1126 insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1127 stats.copies_inserted++;
1129 if (dump_file)
1131 fprintf (dump_file,
1132 "generating on edge from %d to %d a copy of load: ",
1133 pred->src->index,
1134 pred->dest->index);
1135 print_rtl (dump_file, PATTERN (insn));
1136 fprintf (dump_file, "\n");
1140 /* Delete the insn if it is not available in this block and mark it
1141 for deletion if it is available. If insn is available it may help
1142 discover additional redundancies, so mark it for later deletion. */
1143 for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
1144 a_occr && (a_occr->insn != insn);
1145 a_occr = get_bb_avail_insn (bb, a_occr->next))
1148 if (!a_occr)
1150 stats.insns_deleted++;
1152 if (dump_file)
1154 fprintf (dump_file, "deleting insn:\n");
1155 print_rtl_single (dump_file, insn);
1156 fprintf (dump_file, "\n");
1158 delete_insn (insn);
1160 else
1161 a_occr->deleted_p = 1;
1163 cleanup:
1164 if (rollback_unoccr)
1165 obstack_free (&unoccr_obstack, rollback_unoccr);
1168 /* Performing the redundancy elimination as described before. */
1170 static void
1171 eliminate_partially_redundant_loads (void)
1173 rtx_insn *insn;
1174 basic_block bb;
1176 /* Note we start at block 1. */
1178 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1179 return;
1181 FOR_BB_BETWEEN (bb,
1182 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->next_bb,
1183 EXIT_BLOCK_PTR_FOR_FN (cfun),
1184 next_bb)
1186 /* Don't try anything on basic blocks with strange predecessors. */
1187 if (! bb_has_well_behaved_predecessors (bb))
1188 continue;
1190 /* Do not try anything on cold basic blocks. */
1191 if (optimize_bb_for_size_p (bb))
1192 continue;
1194 /* Reset the table of things changed since the start of the current
1195 basic block. */
1196 reset_opr_set_tables ();
1198 /* Look at all insns in the current basic block and see if there are
1199 any loads in it that we can record. */
1200 FOR_BB_INSNS (bb, insn)
1202 /* Is it a load - of the form (set (reg) (mem))? */
1203 if (NONJUMP_INSN_P (insn)
1204 && GET_CODE (PATTERN (insn)) == SET
1205 && REG_P (SET_DEST (PATTERN (insn)))
1206 && MEM_P (SET_SRC (PATTERN (insn))))
1208 rtx pat = PATTERN (insn);
1209 rtx src = SET_SRC (pat);
1210 struct expr *expr;
1212 if (!MEM_VOLATILE_P (src)
1213 && GET_MODE (src) != BLKmode
1214 && general_operand (src, GET_MODE (src))
1215 /* Are the operands unchanged since the start of the
1216 block? */
1217 && oprs_unchanged_p (src, insn, false)
1218 && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1219 && !side_effects_p (src)
1220 /* Is the expression recorded? */
1221 && (expr = lookup_expr_in_table (src)) != NULL)
1223 /* We now have a load (insn) and an available memory at
1224 its BB start (expr). Try to remove the loads if it is
1225 redundant. */
1226 eliminate_partially_redundant_load (bb, insn, expr);
1230 /* Keep track of everything modified by this insn, so that we
1231 know what has been modified since the start of the current
1232 basic block. */
1233 if (INSN_P (insn))
1234 record_opr_changes (insn);
1238 commit_edge_insertions ();
1241 /* Go over the expression hash table and delete insns that were
1242 marked for later deletion. */
1244 /* This helper is called via htab_traverse. */
1246 delete_redundant_insns_1 (expr **slot, void *data ATTRIBUTE_UNUSED)
1248 struct expr *exprs = *slot;
1249 struct occr *occr;
1251 for (occr = exprs->avail_occr; occr != NULL; occr = occr->next)
1253 if (occr->deleted_p && dbg_cnt (gcse2_delete))
1255 delete_insn (occr->insn);
1256 stats.insns_deleted++;
1258 if (dump_file)
1260 fprintf (dump_file, "deleting insn:\n");
1261 print_rtl_single (dump_file, occr->insn);
1262 fprintf (dump_file, "\n");
1267 return 1;
1270 static void
1271 delete_redundant_insns (void)
1273 expr_table->traverse <void *, delete_redundant_insns_1> (NULL);
1274 if (dump_file)
1275 fprintf (dump_file, "\n");
1278 /* Main entry point of the GCSE after reload - clean some redundant loads
1279 due to spilling. */
1281 static void
1282 gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1285 memset (&stats, 0, sizeof (stats));
1287 /* Allocate memory for this pass.
1288 Also computes and initializes the insns' CUIDs. */
1289 alloc_mem ();
1291 /* We need alias analysis. */
1292 init_alias_analysis ();
1294 compute_hash_table ();
1296 if (dump_file)
1297 dump_hash_table (dump_file);
1299 if (expr_table->elements () > 0)
1301 eliminate_partially_redundant_loads ();
1302 delete_redundant_insns ();
1304 if (dump_file)
1306 fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1307 fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1308 fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted);
1309 fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted);
1310 fprintf (dump_file, "\n\n");
1313 statistics_counter_event (cfun, "copies inserted",
1314 stats.copies_inserted);
1315 statistics_counter_event (cfun, "moves inserted",
1316 stats.moves_inserted);
1317 statistics_counter_event (cfun, "insns deleted",
1318 stats.insns_deleted);
1321 /* We are finished with alias. */
1322 end_alias_analysis ();
1324 free_mem ();
1329 static unsigned int
1330 rest_of_handle_gcse2 (void)
1332 gcse_after_reload_main (get_insns ());
1333 rebuild_jump_labels (get_insns ());
1334 return 0;
1337 namespace {
1339 const pass_data pass_data_gcse2 =
1341 RTL_PASS, /* type */
1342 "gcse2", /* name */
1343 OPTGROUP_NONE, /* optinfo_flags */
1344 TV_GCSE_AFTER_RELOAD, /* tv_id */
1345 0, /* properties_required */
1346 0, /* properties_provided */
1347 0, /* properties_destroyed */
1348 0, /* todo_flags_start */
1349 0, /* todo_flags_finish */
1352 class pass_gcse2 : public rtl_opt_pass
1354 public:
1355 pass_gcse2 (gcc::context *ctxt)
1356 : rtl_opt_pass (pass_data_gcse2, ctxt)
1359 /* opt_pass methods: */
1360 virtual bool gate (function *fun)
1362 return (optimize > 0 && flag_gcse_after_reload
1363 && optimize_function_for_speed_p (fun));
1366 virtual unsigned int execute (function *) { return rest_of_handle_gcse2 (); }
1368 }; // class pass_gcse2
1370 } // anon namespace
1372 rtl_opt_pass *
1373 make_pass_gcse2 (gcc::context *ctxt)
1375 return new pass_gcse2 (ctxt);