[NDS32] Have shirnk-wrapping optimization to be performed on nds32 target.
[official-gcc.git] / gcc / lra-remat.c
blob68146a1110038dd2ed5c2d95f527ee749f15a677
1 /* Rematerialize pseudos values.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This code objective is to rematerialize spilled pseudo values. To
22 do this we calculate available insn candidates. The candidate is
23 available at some point if there is dominated set of insns with the
24 same pattern, the insn inputs are not dying or modified on any path
25 from the set, the outputs are not modified.
27 The insns containing memory or spilled pseudos (except for the
28 rematerialized pseudo) are not considered as such insns are not
29 profitable in comparison with regular loads of spilled pseudo
30 values. That simplifies the implementation as we don't need to
31 deal with memory aliasing.
33 To speed up available candidate calculation, we calculate partially
34 available candidates first and use them for initialization of the
35 availability. That is because (partial) availability sets are
36 sparse.
38 The rematerialization sub-pass could be improved further in the
39 following ways:
41 o We could make longer live ranges of inputs in the
42 rematerialization candidates if their hard registers are not used
43 for other purposes. This could be complicated if we need to
44 update BB live info information as LRA does not use
45 DF-infrastructure for compile-time reasons. This problem could
46 be overcome if constrain making live ranges longer only in BB/EBB
47 scope.
48 o We could use cost-based decision to choose rematerialization insn
49 (currently all insns without memory is can be used).
50 o We could use other free hard regs for unused output pseudos in
51 rematerialization candidates although such cases probably will
52 be very rare. */
55 #include "config.h"
56 #include "system.h"
57 #include "coretypes.h"
58 #include "tm.h"
59 #include "hard-reg-set.h"
60 #include "rtl.h"
61 #include "rtl-error.h"
62 #include "tm_p.h"
63 #include "target.h"
64 #include "insn-config.h"
65 #include "recog.h"
66 #include "output.h"
67 #include "regs.h"
68 #include "hashtab.h"
69 #include "hash-set.h"
70 #include "vec.h"
71 #include "machmode.h"
72 #include "input.h"
73 #include "function.h"
74 #include "symtab.h"
75 #include "flags.h"
76 #include "statistics.h"
77 #include "double-int.h"
78 #include "real.h"
79 #include "fixed-value.h"
80 #include "alias.h"
81 #include "wide-int.h"
82 #include "inchash.h"
83 #include "tree.h"
84 #include "expmed.h"
85 #include "dojump.h"
86 #include "explow.h"
87 #include "calls.h"
88 #include "emit-rtl.h"
89 #include "varasm.h"
90 #include "stmt.h"
91 #include "expr.h"
92 #include "predict.h"
93 #include "dominance.h"
94 #include "cfg.h"
95 #include "basic-block.h"
96 #include "except.h"
97 #include "df.h"
98 #include "ira.h"
99 #include "sparseset.h"
100 #include "params.h"
101 #include "lra-int.h"
103 /* Number of candidates for rematerialization. */
104 static unsigned int cands_num;
106 /* The following is used for representation of call_used_reg_set in
107 form array whose elements are hard register numbers with nonzero bit
108 in CALL_USED_REG_SET. */
109 static int call_used_regs_arr_len;
110 static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
112 /* Bitmap used for different calculations. */
113 static bitmap_head temp_bitmap;
115 typedef struct cand *cand_t;
116 typedef const struct cand *const_cand_t;
118 /* Insn candidates for rematerialization. The candidate insn should
119 have the following properies:
120 o no any memory (as access to memory is non-profitable)
121 o no INOUT regs (it means no non-paradoxical subreg of output reg)
122 o one output spilled pseudo (or reload pseudo of a spilled pseudo)
123 o all other pseudos are with assigned hard regs. */
124 struct cand
126 /* Index of the candidates in all_cands. */
127 int index;
128 /* The candidate insn. */
129 rtx_insn *insn;
130 /* Insn pseudo regno for rematerialization. */
131 int regno;
132 /* Non-negative if a reload pseudo is in the insn instead of the
133 pseudo for rematerialization. */
134 int reload_regno;
135 /* Number of the operand containing the regno or its reload
136 regno. */
137 int nop;
138 /* Next candidate for the same regno. */
139 cand_t next_regno_cand;
142 /* Vector containing all candidates. */
143 static vec<cand_t> all_cands;
144 /* Map: insn -> candidate representing it. It is null if the insn can
145 not be used for rematerialization. */
146 static cand_t *insn_to_cand;
148 /* Map regno -> candidates can be used for the regno
149 rematerialization. */
150 static cand_t *regno_cands;
152 /* Data about basic blocks used for the rematerialization
153 sub-pass. */
154 struct remat_bb_data
156 /* Basic block about which the below data are. */
157 basic_block bb;
158 /* Registers changed in the basic block: */
159 bitmap_head changed_regs;
160 /* Registers becoming dead in the BB. */
161 bitmap_head dead_regs;
162 /* Cands present in the BB whose in/out regs are not changed after
163 the cands occurence and are not dead (except the reload
164 regno). */
165 bitmap_head gen_cands;
166 bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
167 bitmap_head pavin_cands; /* cands partially available at BB entry. */
168 bitmap_head pavout_cands; /* cands partially available at BB exit. */
169 bitmap_head avin_cands; /* cands available at the entry of the BB. */
170 bitmap_head avout_cands; /* cands available at the exit of the BB. */
173 /* Array for all BB data. Indexed by the corresponding BB index. */
174 typedef struct remat_bb_data *remat_bb_data_t;
176 /* Basic blocks for data flow problems -- all bocks except the special
177 ones. */
178 static bitmap_head all_blocks;
180 /* All basic block data are referred through the following array. */
181 static remat_bb_data_t remat_bb_data;
183 /* Two small functions for access to the bb data. */
184 static inline remat_bb_data_t
185 get_remat_bb_data (basic_block bb)
187 return &remat_bb_data[(bb)->index];
190 static inline remat_bb_data_t
191 get_remat_bb_data_by_index (int index)
193 return &remat_bb_data[index];
198 /* Recursive hash function for RTL X. */
199 static hashval_t
200 rtx_hash (rtx x)
202 int i, j;
203 enum rtx_code code;
204 const char *fmt;
205 hashval_t val = 0;
207 if (x == 0)
208 return val;
210 code = GET_CODE (x);
211 val += (int) code + 4095;
213 /* Some RTL can be compared nonrecursively. */
214 switch (code)
216 case REG:
217 return val + REGNO (x);
219 case LABEL_REF:
220 return iterative_hash_object (XEXP (x, 0), val);
222 case SYMBOL_REF:
223 return iterative_hash_object (XSTR (x, 0), val);
225 case SCRATCH:
226 case CONST_DOUBLE:
227 case CONST_INT:
228 case CONST_VECTOR:
229 return val;
231 default:
232 break;
235 /* Hash the elements. */
236 fmt = GET_RTX_FORMAT (code);
237 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
239 switch (fmt[i])
241 case 'w':
242 val += XWINT (x, i);
243 break;
245 case 'n':
246 case 'i':
247 val += XINT (x, i);
248 break;
250 case 'V':
251 case 'E':
252 val += XVECLEN (x, i);
254 for (j = 0; j < XVECLEN (x, i); j++)
255 val += rtx_hash (XVECEXP (x, i, j));
256 break;
258 case 'e':
259 val += rtx_hash (XEXP (x, i));
260 break;
262 case 'S':
263 case 's':
264 val += htab_hash_string (XSTR (x, i));
265 break;
267 case 'u':
268 case '0':
269 case 't':
270 break;
272 /* It is believed that rtx's at this level will never
273 contain anything but integers and other rtx's, except for
274 within LABEL_REFs and SYMBOL_REFs. */
275 default:
276 abort ();
279 return val;
284 /* Hash table for the candidates. Different insns (e.g. structurally
285 the same insns or even insns with different unused output regs) can
286 be represented by the same candidate in the table. */
287 static htab_t cand_table;
289 /* Hash function for candidate CAND. */
290 static hashval_t
291 cand_hash (const void *cand)
293 const_cand_t c = (const_cand_t) cand;
294 lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
295 struct lra_static_insn_data *static_id = id->insn_static_data;
296 int nops = static_id->n_operands;
297 hashval_t hash = 0;
299 for (int i = 0; i < nops; i++)
300 if (i == c->nop)
301 hash = iterative_hash_object (c->regno, hash);
302 else if (static_id->operand[i].type == OP_IN)
303 hash = iterative_hash_object (*id->operand_loc[i], hash);
304 return hash;
307 /* Equal function for candidates CAND1 and CAND2. They are equal if
308 the corresponding candidate insns have the same code, the same
309 regno for rematerialization, the same input operands. */
310 static int
311 cand_eq_p (const void *cand1, const void *cand2)
313 const_cand_t c1 = (const_cand_t) cand1;
314 const_cand_t c2 = (const_cand_t) cand2;
315 lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
316 lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
317 struct lra_static_insn_data *static_id1 = id1->insn_static_data;
318 int nops = static_id1->n_operands;
320 if (c1->regno != c2->regno
321 || INSN_CODE (c1->insn) < 0
322 || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
323 return false;
324 gcc_assert (c1->nop == c2->nop);
325 for (int i = 0; i < nops; i++)
326 if (i != c1->nop && static_id1->operand[i].type == OP_IN
327 && *id1->operand_loc[i] != *id2->operand_loc[i])
328 return false;
329 return true;
332 /* Insert candidate CAND into the table if it is not there yet.
333 Return candidate which is in the table. */
334 static cand_t
335 insert_cand (cand_t cand)
337 void **entry_ptr;
339 entry_ptr = htab_find_slot (cand_table, cand, INSERT);
340 if (*entry_ptr == NULL)
341 *entry_ptr = (void *) cand;
342 return (cand_t) *entry_ptr;
345 /* Free candidate CAND memory. */
346 static void
347 free_cand (void *cand)
349 free (cand);
352 /* Initiate the candidate table. */
353 static void
354 initiate_cand_table (void)
356 cand_table = htab_create (8000, cand_hash, cand_eq_p,
357 (htab_del) free_cand);
360 /* Finish the candidate table. */
361 static void
362 finish_cand_table (void)
364 htab_delete (cand_table);
369 /* Return true if X contains memory or some UNSPEC. We can not just
370 check insn operands as memory or unspec might be not an operand
371 itself but contain an operand. Insn with memory access is not
372 profitable for rematerialization. Rematerialization of UNSPEC
373 might result in wrong code generation as the UNPEC effect is
374 unknown (e.g. generating a label). */
375 static bool
376 bad_for_rematerialization_p (rtx x)
378 int i, j;
379 const char *fmt;
380 enum rtx_code code;
382 if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
383 return true;
384 code = GET_CODE (x);
385 fmt = GET_RTX_FORMAT (code);
386 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
388 if (fmt[i] == 'e')
390 if (bad_for_rematerialization_p (XEXP (x, i)))
391 return true;
393 else if (fmt[i] == 'E')
395 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
396 if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
397 return true;
400 return false;
403 /* If INSN can not be used for rematerialization, return negative
404 value. If INSN can be considered as a candidate for
405 rematerialization, return value which is the operand number of the
406 pseudo for which the insn can be used for rematerialization. Here
407 we consider the insns without any memory, spilled pseudo (except
408 for the rematerialization pseudo), or dying or unused regs. */
409 static int
410 operand_to_remat (rtx_insn *insn)
412 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
413 struct lra_static_insn_data *static_id = id->insn_static_data;
414 struct lra_insn_reg *reg, *found_reg = NULL;
416 /* First find a pseudo which can be rematerialized. */
417 for (reg = id->regs; reg != NULL; reg = reg->next)
418 /* True FRAME_POINTER_NEEDED might be because we can not follow
419 changing sp offsets, e.g. alloca is used. If the insn contains
420 stack pointer in such case, we can not rematerialize it as we
421 can not know sp offset at a rematerialization place. */
422 if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
423 return -1;
424 else if (reg->type == OP_OUT && ! reg->subreg_p
425 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
427 /* We permits only one spilled reg. */
428 if (found_reg != NULL)
429 return -1;
430 found_reg = reg;
432 if (found_reg == NULL)
433 return -1;
434 if (found_reg->regno < FIRST_PSEUDO_REGISTER)
435 return -1;
436 if (bad_for_rematerialization_p (PATTERN (insn)))
437 return -1;
438 /* Check the other regs are not spilled. */
439 for (reg = id->regs; reg != NULL; reg = reg->next)
440 if (found_reg == reg)
441 continue;
442 else if (reg->type == OP_INOUT)
443 return -1;
444 else if (reg->regno >= FIRST_PSEUDO_REGISTER
445 && reg_renumber[reg->regno] < 0)
446 /* Another spilled reg. */
447 return -1;
448 else if (reg->type == OP_IN)
450 if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
451 /* We don't want to make live ranges longer. */
452 return -1;
453 /* Check that there is no output reg as the input one. */
454 for (struct lra_insn_reg *reg2 = id->regs;
455 reg2 != NULL;
456 reg2 = reg2->next)
457 if (reg2->type == OP_OUT && reg->regno == reg2->regno)
458 return -1;
460 /* Find the rematerialization operand. */
461 int nop = static_id->n_operands;
462 for (int i = 0; i < nop; i++)
463 if (REG_P (*id->operand_loc[i])
464 && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
465 return i;
466 return -1;
469 /* Create candidate for INSN with rematerialization operand NOP and
470 REGNO. Insert the candidate into the table and set up the
471 corresponding INSN_TO_CAND element. */
472 static void
473 create_cand (rtx_insn *insn, int nop, int regno)
475 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
476 rtx reg = *id->operand_loc[nop];
477 gcc_assert (REG_P (reg));
478 int op_regno = REGNO (reg);
479 gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
480 cand_t cand = XNEW (struct cand);
481 cand->insn = insn;
482 cand->nop = nop;
483 cand->regno = regno;
484 cand->reload_regno = op_regno == regno ? -1 : op_regno;
485 gcc_assert (cand->regno >= 0);
486 cand_t cand_in_table = insert_cand (cand);
487 insn_to_cand[INSN_UID (insn)] = cand_in_table;
488 if (cand != cand_in_table)
489 free (cand);
490 else
492 /* A new cand. */
493 cand->index = all_cands.length ();
494 all_cands.safe_push (cand);
495 cand->next_regno_cand = regno_cands[cand->regno];
496 regno_cands[cand->regno] = cand;
500 /* Create rematerialization candidates (inserting them into the
501 table). */
502 static void
503 create_cands (void)
505 rtx_insn *insn;
506 struct potential_cand
508 rtx_insn *insn;
509 int nop;
511 struct potential_cand *regno_potential_cand;
513 /* Create candidates. */
514 regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
515 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
516 if (INSN_P (insn))
518 rtx set;
519 int src_regno, dst_regno;
520 rtx_insn *insn2;
521 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
522 int nop = operand_to_remat (insn);
523 int regno = -1;
525 if ((set = single_set (insn)) != NULL
526 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
527 && ((src_regno = REGNO (SET_SRC (set)))
528 >= lra_constraint_new_regno_start)
529 && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
530 && reg_renumber[dst_regno] < 0
531 && (insn2 = regno_potential_cand[src_regno].insn) != NULL
532 && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
533 /* It is an output reload insn after insn can be
534 rematerialized (potential candidate). */
535 create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
536 if (nop < 0)
537 goto fail;
538 gcc_assert (REG_P (*id->operand_loc[nop]));
539 regno = REGNO (*id->operand_loc[nop]);
540 gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
541 if (reg_renumber[regno] < 0)
542 create_cand (insn, nop, regno);
543 else
545 regno_potential_cand[regno].insn = insn;
546 regno_potential_cand[regno].nop = nop;
547 goto fail;
549 regno = -1;
550 fail:
551 for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
552 if (reg->type != OP_IN && reg->regno != regno
553 && reg->regno >= FIRST_PSEUDO_REGISTER)
554 regno_potential_cand[reg->regno].insn = NULL;
556 cands_num = all_cands.length ();
557 free (regno_potential_cand);
562 /* Create and initialize BB data. */
563 static void
564 create_remat_bb_data (void)
566 basic_block bb;
567 remat_bb_data_t bb_info;
569 remat_bb_data = XNEWVEC (struct remat_bb_data,
570 last_basic_block_for_fn (cfun));
571 FOR_ALL_BB_FN (bb, cfun)
573 #ifdef ENABLE_CHECKING
574 if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun))
575 abort ();
576 #endif
577 bb_info = get_remat_bb_data (bb);
578 bb_info->bb = bb;
579 bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
580 bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
581 bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
582 bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
583 bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
584 bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
585 bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
586 bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
590 /* Dump all candidates to DUMP_FILE. */
591 static void
592 dump_cands (FILE *dump_file)
594 int i;
595 cand_t cand;
597 fprintf (dump_file, "\nCands:\n");
598 for (i = 0; i < (int) cands_num; i++)
600 cand = all_cands[i];
601 fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
602 i, cand->nop, cand->regno, cand->reload_regno);
603 print_inline_rtx (dump_file, cand->insn, 6);
604 fprintf (dump_file, "\n");
608 /* Dump all candidates and BB data. */
609 static void
610 dump_candidates_and_remat_bb_data (void)
612 basic_block bb;
614 if (lra_dump_file == NULL)
615 return;
616 dump_cands (lra_dump_file);
617 FOR_EACH_BB_FN (bb, cfun)
619 fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
620 /* Livein */
621 fprintf (lra_dump_file, " register live in:");
622 dump_regset (df_get_live_in (bb), lra_dump_file);
623 putc ('\n', lra_dump_file);
624 /* Liveout */
625 fprintf (lra_dump_file, " register live out:");
626 dump_regset (df_get_live_out (bb), lra_dump_file);
627 putc ('\n', lra_dump_file);
628 /* Changed/dead regs: */
629 fprintf (lra_dump_file, " changed regs:");
630 dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
631 putc ('\n', lra_dump_file);
632 fprintf (lra_dump_file, " dead regs:");
633 dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
634 putc ('\n', lra_dump_file);
635 lra_dump_bitmap_with_title ("cands generated in BB",
636 &get_remat_bb_data (bb)->gen_cands, bb->index);
637 lra_dump_bitmap_with_title ("livein cands in BB",
638 &get_remat_bb_data (bb)->livein_cands, bb->index);
639 lra_dump_bitmap_with_title ("pavin cands in BB",
640 &get_remat_bb_data (bb)->pavin_cands, bb->index);
641 lra_dump_bitmap_with_title ("pavout cands in BB",
642 &get_remat_bb_data (bb)->pavout_cands, bb->index);
643 lra_dump_bitmap_with_title ("avin cands in BB",
644 &get_remat_bb_data (bb)->avin_cands, bb->index);
645 lra_dump_bitmap_with_title ("avout cands in BB",
646 &get_remat_bb_data (bb)->avout_cands, bb->index);
650 /* Free all BB data. */
651 static void
652 finish_remat_bb_data (void)
654 basic_block bb;
656 FOR_EACH_BB_FN (bb, cfun)
658 bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
659 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
660 bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
661 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
662 bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
663 bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
664 bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
665 bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
667 free (remat_bb_data);
672 /* Update changed_regs and dead_regs of BB from INSN. */
673 static void
674 set_bb_regs (basic_block bb, rtx_insn *insn)
676 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
677 struct lra_insn_reg *reg;
679 for (reg = id->regs; reg != NULL; reg = reg->next)
680 if (reg->type != OP_IN)
681 bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
682 else
684 if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
685 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
687 if (CALL_P (insn))
688 for (int i = 0; i < call_used_regs_arr_len; i++)
689 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
690 call_used_regs_arr[i]);
693 /* Calculate changed_regs and dead_regs for each BB. */
694 static void
695 calculate_local_reg_remat_bb_data (void)
697 basic_block bb;
698 rtx_insn *insn;
700 FOR_EACH_BB_FN (bb, cfun)
701 FOR_BB_INSNS (bb, insn)
702 if (INSN_P (insn))
703 set_bb_regs (bb, insn);
708 /* Return true if REGNO is an input operand of INSN. */
709 static bool
710 input_regno_present_p (rtx_insn *insn, int regno)
712 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
713 struct lra_insn_reg *reg;
715 for (reg = id->regs; reg != NULL; reg = reg->next)
716 if (reg->type == OP_IN && reg->regno == regno)
717 return true;
718 return false;
721 /* Return true if a call used register is an input operand of INSN. */
722 static bool
723 call_used_input_regno_present_p (rtx_insn *insn)
725 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
726 struct lra_insn_reg *reg;
728 for (reg = id->regs; reg != NULL; reg = reg->next)
729 if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
730 && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
731 return true;
732 return false;
735 /* Calculate livein_cands for each BB. */
736 static void
737 calculate_livein_cands (void)
739 basic_block bb;
741 FOR_EACH_BB_FN (bb, cfun)
743 bitmap livein_regs = df_get_live_in (bb);
744 bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
745 for (unsigned int i = 0; i < cands_num; i++)
747 cand_t cand = all_cands[i];
748 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
749 struct lra_insn_reg *reg;
751 for (reg = id->regs; reg != NULL; reg = reg->next)
752 if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
753 break;
754 if (reg == NULL)
755 bitmap_set_bit (livein_cands, i);
760 /* Calculate gen_cands for each BB. */
761 static void
762 calculate_gen_cands (void)
764 basic_block bb;
765 bitmap gen_cands;
766 bitmap_head gen_insns;
767 rtx_insn *insn;
769 bitmap_initialize (&gen_insns, &reg_obstack);
770 FOR_EACH_BB_FN (bb, cfun)
772 gen_cands = &get_remat_bb_data (bb)->gen_cands;
773 bitmap_clear (&gen_insns);
774 FOR_BB_INSNS (bb, insn)
775 if (INSN_P (insn))
777 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
778 struct lra_insn_reg *reg;
779 unsigned int uid;
780 bitmap_iterator bi;
781 cand_t cand;
782 rtx set;
783 int src_regno = -1, dst_regno = -1;
785 if ((set = single_set (insn)) != NULL
786 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
788 src_regno = REGNO (SET_SRC (set));
789 dst_regno = REGNO (SET_DEST (set));
792 /* Update gen_cands: */
793 bitmap_clear (&temp_bitmap);
794 for (reg = id->regs; reg != NULL; reg = reg->next)
795 if (reg->type != OP_IN
796 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
797 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
799 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
801 cand = insn_to_cand[INSN_UID (insn2)];
802 gcc_assert (cand != NULL);
803 /* Ignore the reload insn. */
804 if (src_regno == cand->reload_regno
805 && dst_regno == cand->regno)
806 continue;
807 if (cand->regno == reg->regno
808 || input_regno_present_p (insn2, reg->regno))
810 bitmap_clear_bit (gen_cands, cand->index);
811 bitmap_set_bit (&temp_bitmap, uid);
815 if (CALL_P (insn))
816 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
818 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
820 cand = insn_to_cand[INSN_UID (insn2)];
821 gcc_assert (cand != NULL);
822 if (call_used_input_regno_present_p (insn2))
824 bitmap_clear_bit (gen_cands, cand->index);
825 bitmap_set_bit (&temp_bitmap, uid);
828 bitmap_and_compl_into (&gen_insns, &temp_bitmap);
830 cand = insn_to_cand[INSN_UID (insn)];
831 if (cand != NULL)
833 bitmap_set_bit (gen_cands, cand->index);
834 bitmap_set_bit (&gen_insns, INSN_UID (insn));
838 bitmap_clear (&gen_insns);
843 /* The common transfer function used by the DF equation solver to
844 propagate (partial) availability info BB_IN to BB_OUT through block
845 with BB_INDEX according to the following equation:
847 bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
849 static bool
850 cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
852 remat_bb_data_t bb_info;
853 bitmap bb_livein, bb_changed_regs, bb_dead_regs;
854 unsigned int cid;
855 bitmap_iterator bi;
857 bb_info = get_remat_bb_data_by_index (bb_index);
858 bb_livein = &bb_info->livein_cands;
859 bb_changed_regs = &bb_info->changed_regs;
860 bb_dead_regs = &bb_info->dead_regs;
861 /* Calculate killed avin cands -- cands whose regs are changed or
862 becoming dead in the BB. We calculate it here as we hope that
863 repeated calculations are compensated by smaller size of BB_IN in
864 comparison with all candidates number. */
865 bitmap_clear (&temp_bitmap);
866 EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
868 cand_t cand = all_cands[cid];
869 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
870 struct lra_insn_reg *reg;
872 if (! bitmap_bit_p (bb_livein, cid))
874 bitmap_set_bit (&temp_bitmap, cid);
875 continue;
877 for (reg = id->regs; reg != NULL; reg = reg->next)
878 /* Ignore all outputs which are not the regno for
879 rematerialization. */
880 if (reg->type == OP_OUT && reg->regno != cand->regno)
881 continue;
882 else if (bitmap_bit_p (bb_changed_regs, reg->regno)
883 || bitmap_bit_p (bb_dead_regs, reg->regno))
885 bitmap_set_bit (&temp_bitmap, cid);
886 break;
888 /* Check regno for rematerialization. */
889 if (bitmap_bit_p (bb_changed_regs, cand->regno)
890 || bitmap_bit_p (bb_dead_regs, cand->regno))
891 bitmap_set_bit (&temp_bitmap, cid);
893 return bitmap_ior_and_compl (bb_out,
894 &bb_info->gen_cands, bb_in, &temp_bitmap);
899 /* The transfer function used by the DF equation solver to propagate
900 partial candidate availability info through block with BB_INDEX
901 according to the following equation:
903 bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
905 static bool
906 cand_pav_trans_fun (int bb_index)
908 remat_bb_data_t bb_info;
910 bb_info = get_remat_bb_data_by_index (bb_index);
911 return cand_trans_fun (bb_index, &bb_info->pavin_cands,
912 &bb_info->pavout_cands);
915 /* The confluence function used by the DF equation solver to set up
916 cand_pav info for a block BB without predecessor. */
917 static void
918 cand_pav_con_fun_0 (basic_block bb)
920 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
923 /* The confluence function used by the DF equation solver to propagate
924 partial candidate availability info from predecessor to successor
925 on edge E (pred->bb) according to the following equation:
927 bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
929 static bool
930 cand_pav_con_fun_n (edge e)
932 basic_block pred = e->src;
933 basic_block bb = e->dest;
934 remat_bb_data_t bb_info;
935 bitmap bb_pavin, pred_pavout;
937 bb_info = get_remat_bb_data (bb);
938 bb_pavin = &bb_info->pavin_cands;
939 pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
940 return bitmap_ior_into (bb_pavin, pred_pavout);
945 /* The transfer function used by the DF equation solver to propagate
946 candidate availability info through block with BB_INDEX according
947 to the following equation:
949 bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
951 static bool
952 cand_av_trans_fun (int bb_index)
954 remat_bb_data_t bb_info;
956 bb_info = get_remat_bb_data_by_index (bb_index);
957 return cand_trans_fun (bb_index, &bb_info->avin_cands,
958 &bb_info->avout_cands);
961 /* The confluence function used by the DF equation solver to set up
962 cand_av info for a block BB without predecessor. */
963 static void
964 cand_av_con_fun_0 (basic_block bb)
966 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
969 /* The confluence function used by the DF equation solver to propagate
970 cand_av info from predecessor to successor on edge E (pred->bb)
971 according to the following equation:
973 bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
975 static bool
976 cand_av_con_fun_n (edge e)
978 basic_block pred = e->src;
979 basic_block bb = e->dest;
980 remat_bb_data_t bb_info;
981 bitmap bb_avin, pred_avout;
983 bb_info = get_remat_bb_data (bb);
984 bb_avin = &bb_info->avin_cands;
985 pred_avout = &get_remat_bb_data (pred)->avout_cands;
986 return bitmap_and_into (bb_avin, pred_avout);
989 /* Calculate available candidates for each BB. */
990 static void
991 calculate_global_remat_bb_data (void)
993 basic_block bb;
995 df_simple_dataflow
996 (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
997 cand_pav_trans_fun, &all_blocks,
998 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
999 /* Initialize avin by pavin. */
1000 FOR_EACH_BB_FN (bb, cfun)
1001 bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
1002 &get_remat_bb_data (bb)->pavin_cands);
1003 df_simple_dataflow
1004 (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
1005 cand_av_trans_fun, &all_blocks,
1006 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1011 /* Setup sp offset attribute to SP_OFFSET for all INSNS. */
1012 static void
1013 change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
1015 for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
1016 eliminate_regs_in_insn (insn, false, false, sp_offset);
1019 /* Return start hard register of REG (can be a hard or a pseudo reg)
1020 or -1 (if it is a spilled pseudo). Return number of hard registers
1021 occupied by REG through parameter NREGS if the start hard reg is
1022 not negative. */
1023 static int
1024 get_hard_regs (struct lra_insn_reg *reg, int &nregs)
1026 int regno = reg->regno;
1027 int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
1029 if (hard_regno >= 0)
1030 nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
1031 return hard_regno;
1034 /* Insert rematerialization insns using the data-flow data calculated
1035 earlier. */
1036 static bool
1037 do_remat (void)
1039 rtx_insn *insn;
1040 basic_block bb;
1041 bitmap_head avail_cands;
1042 bool changed_p = false;
1043 /* Living hard regs and hard registers of living pseudos. */
1044 HARD_REG_SET live_hard_regs;
1046 bitmap_initialize (&avail_cands, &reg_obstack);
1047 FOR_EACH_BB_FN (bb, cfun)
1049 REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
1050 bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
1051 &get_remat_bb_data (bb)->livein_cands);
1052 FOR_BB_INSNS (bb, insn)
1054 if (!NONDEBUG_INSN_P (insn))
1055 continue;
1057 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
1058 struct lra_static_insn_data *static_id = id->insn_static_data;
1059 struct lra_insn_reg *reg;
1060 cand_t cand;
1061 unsigned int cid;
1062 bitmap_iterator bi;
1063 rtx set;
1064 int src_regno = -1, dst_regno = -1;
1066 if ((set = single_set (insn)) != NULL
1067 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
1069 src_regno = REGNO (SET_SRC (set));
1070 dst_regno = REGNO (SET_DEST (set));
1073 cand = NULL;
1074 /* Check possibility of rematerialization (hard reg or
1075 unpsilled pseudo <- spilled pseudo): */
1076 if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
1077 && reg_renumber[src_regno] < 0
1078 && (dst_regno < FIRST_PSEUDO_REGISTER
1079 || reg_renumber[dst_regno] >= 0))
1081 for (cand = regno_cands[src_regno];
1082 cand != NULL;
1083 cand = cand->next_regno_cand)
1084 if (bitmap_bit_p (&avail_cands, cand->index))
1085 break;
1087 int i, hard_regno, nregs;
1088 rtx_insn *remat_insn = NULL;
1089 HOST_WIDE_INT cand_sp_offset = 0;
1090 if (cand != NULL)
1092 lra_insn_recog_data_t cand_id
1093 = lra_get_insn_recog_data (cand->insn);
1094 struct lra_static_insn_data *static_cand_id
1095 = cand_id->insn_static_data;
1096 rtx saved_op = *cand_id->operand_loc[cand->nop];
1098 /* Check clobbers do not kill something living. */
1099 gcc_assert (REG_P (saved_op));
1100 int ignore_regno = REGNO (saved_op);
1102 for (reg = cand_id->regs; reg != NULL; reg = reg->next)
1103 if (reg->type != OP_IN && reg->regno != ignore_regno)
1105 hard_regno = get_hard_regs (reg, nregs);
1106 gcc_assert (hard_regno >= 0);
1107 for (i = 0; i < nregs; i++)
1108 if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
1109 break;
1110 if (i < nregs)
1111 break;
1114 if (reg == NULL)
1116 for (reg = static_cand_id->hard_regs;
1117 reg != NULL;
1118 reg = reg->next)
1119 if (reg->type != OP_IN
1120 && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
1121 break;
1124 if (reg == NULL)
1126 *cand_id->operand_loc[cand->nop] = SET_DEST (set);
1127 lra_update_insn_regno_info (cand->insn);
1128 bool ok_p = lra_constrain_insn (cand->insn);
1129 if (ok_p)
1131 rtx remat_pat = copy_insn (PATTERN (cand->insn));
1133 start_sequence ();
1134 emit_insn (remat_pat);
1135 remat_insn = get_insns ();
1136 end_sequence ();
1137 if (recog_memoized (remat_insn) < 0)
1138 remat_insn = NULL;
1139 cand_sp_offset = cand_id->sp_offset;
1141 *cand_id->operand_loc[cand->nop] = saved_op;
1142 lra_update_insn_regno_info (cand->insn);
1146 bitmap_clear (&temp_bitmap);
1147 /* Update avail_cands (see analogous code for
1148 calculate_gen_cands). */
1149 for (reg = id->regs; reg != NULL; reg = reg->next)
1150 if (reg->type != OP_IN
1151 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1152 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1154 cand = all_cands[cid];
1156 /* Ignore the reload insn. */
1157 if (src_regno == cand->reload_regno
1158 && dst_regno == cand->regno)
1159 continue;
1160 if (cand->regno == reg->regno
1161 || input_regno_present_p (cand->insn, reg->regno))
1162 bitmap_set_bit (&temp_bitmap, cand->index);
1165 if (CALL_P (insn))
1166 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1168 cand = all_cands[cid];
1170 if (call_used_input_regno_present_p (cand->insn))
1171 bitmap_set_bit (&temp_bitmap, cand->index);
1174 bitmap_and_compl_into (&avail_cands, &temp_bitmap);
1175 if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
1176 bitmap_set_bit (&avail_cands, cand->index);
1178 if (remat_insn != NULL)
1180 HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
1181 if (sp_offset_change != 0)
1182 change_sp_offset (remat_insn, sp_offset_change);
1183 lra_process_new_insns (insn, remat_insn, NULL,
1184 "Inserting rematerialization insn");
1185 lra_set_insn_deleted (insn);
1186 changed_p = true;
1187 continue;
1190 /* Update live hard regs: */
1191 for (reg = id->regs; reg != NULL; reg = reg->next)
1192 if (reg->type == OP_IN
1193 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1195 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1196 continue;
1197 for (i = 0; i < nregs; i++)
1198 CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1200 else if (reg->type != OP_IN
1201 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1203 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1204 continue;
1205 for (i = 0; i < nregs; i++)
1206 SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1208 /* Process also hard regs (e.g. CC register) which are part
1209 of insn definition. */
1210 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1211 if (reg->type == OP_IN
1212 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1213 CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
1214 else if (reg->type != OP_IN
1215 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1216 SET_HARD_REG_BIT (live_hard_regs, reg->regno);
1219 bitmap_clear (&avail_cands);
1220 return changed_p;
1225 /* Entry point of the rematerialization sub-pass. Return true if we
1226 did any rematerialization. */
1227 bool
1228 lra_remat (void)
1230 basic_block bb;
1231 bool result;
1232 int max_regno = max_reg_num ();
1234 if (! flag_lra_remat)
1235 return false;
1236 timevar_push (TV_LRA_REMAT);
1237 insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
1238 regno_cands = XCNEWVEC (cand_t, max_regno);
1239 all_cands.create (8000);
1240 call_used_regs_arr_len = 0;
1241 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1242 if (call_used_regs[i])
1243 call_used_regs_arr[call_used_regs_arr_len++] = i;
1244 initiate_cand_table ();
1245 create_cands ();
1246 create_remat_bb_data ();
1247 bitmap_initialize (&temp_bitmap, &reg_obstack);
1248 calculate_local_reg_remat_bb_data ();
1249 calculate_livein_cands ();
1250 calculate_gen_cands ();
1251 bitmap_initialize (&all_blocks, &reg_obstack);
1252 FOR_ALL_BB_FN (bb, cfun)
1253 bitmap_set_bit (&all_blocks, bb->index);
1254 calculate_global_remat_bb_data ();
1255 dump_candidates_and_remat_bb_data ();
1256 result = do_remat ();
1257 all_cands.release ();
1258 bitmap_clear (&temp_bitmap);
1259 finish_remat_bb_data ();
1260 finish_cand_table ();
1261 bitmap_clear (&all_blocks);
1262 free (regno_cands);
1263 free (insn_to_cand);
1264 timevar_pop (TV_LRA_REMAT);
1265 return result;