Update ChangeLog and version files for release
[official-gcc.git] / gcc / lra-remat.c
blobf2d226c6fcb06357ce9ca523a437bbe940259245
1 /* Rematerialize pseudos values.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This code objective is to rematerialize spilled pseudo values. To
22 do this we calculate available insn candidates. The candidate is
23 available at some point if there is dominated set of insns with the
24 same pattern, the insn inputs are not dying or modified on any path
25 from the set, the outputs are not modified.
27 The insns containing memory or spilled pseudos (except for the
28 rematerialized pseudo) are not considered as such insns are not
29 profitable in comparison with regular loads of spilled pseudo
30 values. That simplifies the implementation as we don't need to
31 deal with memory aliasing.
33 To speed up available candidate calculation, we calculate partially
34 available candidates first and use them for initialization of the
35 availability. That is because (partial) availability sets are
36 sparse.
38 The rematerialization sub-pass could be improved further in the
39 following ways:
41 o We could make longer live ranges of inputs in the
42 rematerialization candidates if their hard registers are not used
43 for other purposes. This could be complicated if we need to
44 update BB live info information as LRA does not use
45 DF-infrastructure for compile-time reasons. This problem could
46 be overcome if constrain making live ranges longer only in BB/EBB
47 scope.
48 o We could use cost-based decision to choose rematerialization insn
49 (currently all insns without memory is can be used).
50 o We could use other free hard regs for unused output pseudos in
51 rematerialization candidates although such cases probably will
52 be very rare. */
55 #include "config.h"
56 #include "system.h"
57 #include "coretypes.h"
58 #include "tm.h"
59 #include "hard-reg-set.h"
60 #include "rtl.h"
61 #include "rtl-error.h"
62 #include "tm_p.h"
63 #include "target.h"
64 #include "insn-config.h"
65 #include "recog.h"
66 #include "output.h"
67 #include "regs.h"
68 #include "hashtab.h"
69 #include "hash-set.h"
70 #include "vec.h"
71 #include "machmode.h"
72 #include "input.h"
73 #include "function.h"
74 #include "symtab.h"
75 #include "flags.h"
76 #include "statistics.h"
77 #include "double-int.h"
78 #include "real.h"
79 #include "fixed-value.h"
80 #include "alias.h"
81 #include "wide-int.h"
82 #include "inchash.h"
83 #include "tree.h"
84 #include "expmed.h"
85 #include "dojump.h"
86 #include "explow.h"
87 #include "calls.h"
88 #include "emit-rtl.h"
89 #include "varasm.h"
90 #include "stmt.h"
91 #include "expr.h"
92 #include "predict.h"
93 #include "dominance.h"
94 #include "cfg.h"
95 #include "basic-block.h"
96 #include "except.h"
97 #include "df.h"
98 #include "ira.h"
99 #include "sparseset.h"
100 #include "params.h"
101 #include "lra-int.h"
103 /* Number of candidates for rematerialization. */
104 static unsigned int cands_num;
106 /* The following is used for representation of call_used_reg_set in
107 form array whose elements are hard register numbers with nonzero bit
108 in CALL_USED_REG_SET. */
109 static int call_used_regs_arr_len;
110 static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
112 /* Bitmap used for different calculations. */
113 static bitmap_head temp_bitmap;
115 typedef struct cand *cand_t;
116 typedef const struct cand *const_cand_t;
118 /* Insn candidates for rematerialization. The candidate insn should
119 have the following properies:
120 o no any memory (as access to memory is non-profitable)
121 o no INOUT regs (it means no non-paradoxical subreg of output reg)
122 o one output spilled pseudo (or reload pseudo of a spilled pseudo)
123 o all other pseudos are with assigned hard regs. */
124 struct cand
126 /* Index of the candidates in all_cands. */
127 int index;
128 /* The candidate insn. */
129 rtx_insn *insn;
130 /* Insn pseudo regno for rematerialization. */
131 int regno;
132 /* Non-negative if a reload pseudo is in the insn instead of the
133 pseudo for rematerialization. */
134 int reload_regno;
135 /* Number of the operand containing the regno or its reload
136 regno. */
137 int nop;
138 /* Next candidate for the same regno. */
139 cand_t next_regno_cand;
142 /* Vector containing all candidates. */
143 static vec<cand_t> all_cands;
144 /* Map: insn -> candidate representing it. It is null if the insn can
145 not be used for rematerialization. */
146 static cand_t *insn_to_cand;
148 /* Map regno -> candidates can be used for the regno
149 rematerialization. */
150 static cand_t *regno_cands;
152 /* Data about basic blocks used for the rematerialization
153 sub-pass. */
154 struct remat_bb_data
156 /* Basic block about which the below data are. */
157 basic_block bb;
158 /* Registers changed in the basic block: */
159 bitmap_head changed_regs;
160 /* Registers becoming dead in the BB. */
161 bitmap_head dead_regs;
162 /* Cands present in the BB whose in/out regs are not changed after
163 the cands occurence and are not dead (except the reload
164 regno). */
165 bitmap_head gen_cands;
166 bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
167 bitmap_head pavin_cands; /* cands partially available at BB entry. */
168 bitmap_head pavout_cands; /* cands partially available at BB exit. */
169 bitmap_head avin_cands; /* cands available at the entry of the BB. */
170 bitmap_head avout_cands; /* cands available at the exit of the BB. */
173 /* Array for all BB data. Indexed by the corresponding BB index. */
174 typedef struct remat_bb_data *remat_bb_data_t;
176 /* Basic blocks for data flow problems -- all bocks except the special
177 ones. */
178 static bitmap_head all_blocks;
180 /* All basic block data are referred through the following array. */
181 static remat_bb_data_t remat_bb_data;
183 /* Two small functions for access to the bb data. */
184 static inline remat_bb_data_t
185 get_remat_bb_data (basic_block bb)
187 return &remat_bb_data[(bb)->index];
190 static inline remat_bb_data_t
191 get_remat_bb_data_by_index (int index)
193 return &remat_bb_data[index];
198 /* Recursive hash function for RTL X. */
199 static hashval_t
200 rtx_hash (rtx x)
202 int i, j;
203 enum rtx_code code;
204 const char *fmt;
205 hashval_t val = 0;
207 if (x == 0)
208 return val;
210 code = GET_CODE (x);
211 val += (int) code + 4095;
213 /* Some RTL can be compared nonrecursively. */
214 switch (code)
216 case REG:
217 return val + REGNO (x);
219 case LABEL_REF:
220 return iterative_hash_object (XEXP (x, 0), val);
222 case SYMBOL_REF:
223 return iterative_hash_object (XSTR (x, 0), val);
225 case SCRATCH:
226 case CONST_DOUBLE:
227 case CONST_INT:
228 case CONST_VECTOR:
229 return val;
231 default:
232 break;
235 /* Hash the elements. */
236 fmt = GET_RTX_FORMAT (code);
237 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
239 switch (fmt[i])
241 case 'w':
242 val += XWINT (x, i);
243 break;
245 case 'n':
246 case 'i':
247 val += XINT (x, i);
248 break;
250 case 'V':
251 case 'E':
252 val += XVECLEN (x, i);
254 for (j = 0; j < XVECLEN (x, i); j++)
255 val += rtx_hash (XVECEXP (x, i, j));
256 break;
258 case 'e':
259 val += rtx_hash (XEXP (x, i));
260 break;
262 case 'S':
263 case 's':
264 val += htab_hash_string (XSTR (x, i));
265 break;
267 case 'u':
268 case '0':
269 case 't':
270 break;
272 /* It is believed that rtx's at this level will never
273 contain anything but integers and other rtx's, except for
274 within LABEL_REFs and SYMBOL_REFs. */
275 default:
276 abort ();
279 return val;
284 /* Hash table for the candidates. Different insns (e.g. structurally
285 the same insns or even insns with different unused output regs) can
286 be represented by the same candidate in the table. */
287 static htab_t cand_table;
289 /* Hash function for candidate CAND. */
290 static hashval_t
291 cand_hash (const void *cand)
293 const_cand_t c = (const_cand_t) cand;
294 lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
295 struct lra_static_insn_data *static_id = id->insn_static_data;
296 int nops = static_id->n_operands;
297 hashval_t hash = 0;
299 for (int i = 0; i < nops; i++)
300 if (i == c->nop)
301 hash = iterative_hash_object (c->regno, hash);
302 else if (static_id->operand[i].type == OP_IN)
303 hash = iterative_hash_object (*id->operand_loc[i], hash);
304 return hash;
307 /* Equal function for candidates CAND1 and CAND2. They are equal if
308 the corresponding candidate insns have the same code, the same
309 regno for rematerialization, the same input operands. */
310 static int
311 cand_eq_p (const void *cand1, const void *cand2)
313 const_cand_t c1 = (const_cand_t) cand1;
314 const_cand_t c2 = (const_cand_t) cand2;
315 lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
316 lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
317 struct lra_static_insn_data *static_id1 = id1->insn_static_data;
318 int nops = static_id1->n_operands;
320 if (c1->regno != c2->regno
321 || INSN_CODE (c1->insn) < 0
322 || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
323 return false;
324 gcc_assert (c1->nop == c2->nop);
325 for (int i = 0; i < nops; i++)
326 if (i != c1->nop && static_id1->operand[i].type == OP_IN
327 && *id1->operand_loc[i] != *id2->operand_loc[i])
328 return false;
329 return true;
332 /* Insert candidate CAND into the table if it is not there yet.
333 Return candidate which is in the table. */
334 static cand_t
335 insert_cand (cand_t cand)
337 void **entry_ptr;
339 entry_ptr = htab_find_slot (cand_table, cand, INSERT);
340 if (*entry_ptr == NULL)
341 *entry_ptr = (void *) cand;
342 return (cand_t) *entry_ptr;
345 /* Free candidate CAND memory. */
346 static void
347 free_cand (void *cand)
349 free (cand);
352 /* Initiate the candidate table. */
353 static void
354 initiate_cand_table (void)
356 cand_table = htab_create (8000, cand_hash, cand_eq_p,
357 (htab_del) free_cand);
360 /* Finish the candidate table. */
361 static void
362 finish_cand_table (void)
364 htab_delete (cand_table);
369 /* Return true if X contains memory or some UNSPEC. We can not just
370 check insn operands as memory or unspec might be not an operand
371 itself but contain an operand. Insn with memory access is not
372 profitable for rematerialization. Rematerialization of UNSPEC
373 might result in wrong code generation as the UNPEC effect is
374 unknown (e.g. generating a label). */
375 static bool
376 bad_for_rematerialization_p (rtx x)
378 int i, j;
379 const char *fmt;
380 enum rtx_code code;
382 if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
383 return true;
384 code = GET_CODE (x);
385 fmt = GET_RTX_FORMAT (code);
386 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
388 if (fmt[i] == 'e')
390 if (bad_for_rematerialization_p (XEXP (x, i)))
391 return true;
393 else if (fmt[i] == 'E')
395 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
396 if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
397 return true;
400 return false;
403 /* If INSN can not be used for rematerialization, return negative
404 value. If INSN can be considered as a candidate for
405 rematerialization, return value which is the operand number of the
406 pseudo for which the insn can be used for rematerialization. Here
407 we consider the insns without any memory, spilled pseudo (except
408 for the rematerialization pseudo), or dying or unused regs. */
409 static int
410 operand_to_remat (rtx_insn *insn)
412 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
413 struct lra_static_insn_data *static_id = id->insn_static_data;
414 struct lra_insn_reg *reg, *found_reg = NULL;
416 /* Don't rematerialize insns which can change PC. */
417 if (JUMP_P (insn) || CALL_P (insn))
418 return -1;
419 /* First find a pseudo which can be rematerialized. */
420 for (reg = id->regs; reg != NULL; reg = reg->next)
421 /* True FRAME_POINTER_NEEDED might be because we can not follow
422 changing sp offsets, e.g. alloca is used. If the insn contains
423 stack pointer in such case, we can not rematerialize it as we
424 can not know sp offset at a rematerialization place. */
425 if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
426 return -1;
427 else if (reg->type == OP_OUT && ! reg->subreg_p
428 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
430 /* We permits only one spilled reg. */
431 if (found_reg != NULL)
432 return -1;
433 found_reg = reg;
435 /* IRA calculates conflicts separately for subregs of two words
436 pseudo. Even if the pseudo lives, e.g. one its subreg can be
437 used lately, another subreg hard register can be already used
438 for something else. In such case, it is not safe to
439 rematerialize the insn. */
440 else if (reg->type == OP_IN && reg->subreg_p
441 && reg->regno >= FIRST_PSEUDO_REGISTER
442 && (GET_MODE_SIZE (PSEUDO_REGNO_MODE (reg->regno))
443 == 2 * UNITS_PER_WORD))
444 return -1;
445 if (found_reg == NULL)
446 return -1;
447 if (found_reg->regno < FIRST_PSEUDO_REGISTER)
448 return -1;
449 if (bad_for_rematerialization_p (PATTERN (insn)))
450 return -1;
451 /* Check the other regs are not spilled. */
452 for (reg = id->regs; reg != NULL; reg = reg->next)
453 if (found_reg == reg)
454 continue;
455 else if (reg->type == OP_INOUT)
456 return -1;
457 else if (reg->regno >= FIRST_PSEUDO_REGISTER
458 && reg_renumber[reg->regno] < 0)
459 /* Another spilled reg. */
460 return -1;
461 else if (reg->type == OP_IN)
463 if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
464 /* We don't want to make live ranges longer. */
465 return -1;
466 /* Check that there is no output reg as the input one. */
467 for (struct lra_insn_reg *reg2 = id->regs;
468 reg2 != NULL;
469 reg2 = reg2->next)
470 if (reg2->type == OP_OUT && reg->regno == reg2->regno)
471 return -1;
472 if (reg->regno < FIRST_PSEUDO_REGISTER)
473 for (struct lra_insn_reg *reg2 = static_id->hard_regs;
474 reg2 != NULL;
475 reg2 = reg2->next)
476 if (reg2->type == OP_OUT
477 && reg->regno <= reg2->regno
478 && (reg2->regno
479 < (reg->regno
480 + hard_regno_nregs[reg->regno][reg->biggest_mode])))
481 return -1;
483 /* Find the rematerialization operand. */
484 int nop = static_id->n_operands;
485 for (int i = 0; i < nop; i++)
486 if (REG_P (*id->operand_loc[i])
487 && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
488 return i;
489 return -1;
492 /* Create candidate for INSN with rematerialization operand NOP and
493 REGNO. Insert the candidate into the table and set up the
494 corresponding INSN_TO_CAND element. */
495 static void
496 create_cand (rtx_insn *insn, int nop, int regno)
498 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
499 rtx reg = *id->operand_loc[nop];
500 gcc_assert (REG_P (reg));
501 int op_regno = REGNO (reg);
502 gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
503 cand_t cand = XNEW (struct cand);
504 cand->insn = insn;
505 cand->nop = nop;
506 cand->regno = regno;
507 cand->reload_regno = op_regno == regno ? -1 : op_regno;
508 gcc_assert (cand->regno >= 0);
509 cand_t cand_in_table = insert_cand (cand);
510 insn_to_cand[INSN_UID (insn)] = cand_in_table;
511 if (cand != cand_in_table)
512 free (cand);
513 else
515 /* A new cand. */
516 cand->index = all_cands.length ();
517 all_cands.safe_push (cand);
518 cand->next_regno_cand = regno_cands[cand->regno];
519 regno_cands[cand->regno] = cand;
523 /* Create rematerialization candidates (inserting them into the
524 table). */
525 static void
526 create_cands (void)
528 rtx_insn *insn;
529 struct potential_cand
531 rtx_insn *insn;
532 int nop;
534 struct potential_cand *regno_potential_cand;
536 /* Create candidates. */
537 regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
538 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
539 if (INSN_P (insn))
541 rtx set;
542 int src_regno, dst_regno;
543 rtx_insn *insn2;
544 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
545 int nop = operand_to_remat (insn);
546 int regno = -1;
548 if ((set = single_set (insn)) != NULL
549 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
550 && ((src_regno = REGNO (SET_SRC (set)))
551 >= lra_constraint_new_regno_start)
552 && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
553 && reg_renumber[dst_regno] < 0
554 && (insn2 = regno_potential_cand[src_regno].insn) != NULL
555 && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
556 /* It is an output reload insn after insn can be
557 rematerialized (potential candidate). */
558 create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
559 if (nop < 0)
560 goto fail;
561 gcc_assert (REG_P (*id->operand_loc[nop]));
562 regno = REGNO (*id->operand_loc[nop]);
563 gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
564 if (reg_renumber[regno] < 0)
565 create_cand (insn, nop, regno);
566 else
568 regno_potential_cand[regno].insn = insn;
569 regno_potential_cand[regno].nop = nop;
570 goto fail;
572 regno = -1;
573 fail:
574 for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
575 if (reg->type != OP_IN && reg->regno != regno
576 && reg->regno >= FIRST_PSEUDO_REGISTER)
577 regno_potential_cand[reg->regno].insn = NULL;
579 cands_num = all_cands.length ();
580 free (regno_potential_cand);
585 /* Create and initialize BB data. */
586 static void
587 create_remat_bb_data (void)
589 basic_block bb;
590 remat_bb_data_t bb_info;
592 remat_bb_data = XNEWVEC (struct remat_bb_data,
593 last_basic_block_for_fn (cfun));
594 FOR_ALL_BB_FN (bb, cfun)
596 #ifdef ENABLE_CHECKING
597 if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun))
598 abort ();
599 #endif
600 bb_info = get_remat_bb_data (bb);
601 bb_info->bb = bb;
602 bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
603 bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
604 bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
605 bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
606 bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
607 bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
608 bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
609 bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
613 /* Dump all candidates to DUMP_FILE. */
614 static void
615 dump_cands (FILE *dump_file)
617 int i;
618 cand_t cand;
620 fprintf (dump_file, "\nCands:\n");
621 for (i = 0; i < (int) cands_num; i++)
623 cand = all_cands[i];
624 fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
625 i, cand->nop, cand->regno, cand->reload_regno);
626 print_inline_rtx (dump_file, cand->insn, 6);
627 fprintf (dump_file, "\n");
631 /* Dump all candidates and BB data. */
632 static void
633 dump_candidates_and_remat_bb_data (void)
635 basic_block bb;
637 if (lra_dump_file == NULL)
638 return;
639 dump_cands (lra_dump_file);
640 FOR_EACH_BB_FN (bb, cfun)
642 fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
643 /* Livein */
644 fprintf (lra_dump_file, " register live in:");
645 dump_regset (df_get_live_in (bb), lra_dump_file);
646 putc ('\n', lra_dump_file);
647 /* Liveout */
648 fprintf (lra_dump_file, " register live out:");
649 dump_regset (df_get_live_out (bb), lra_dump_file);
650 putc ('\n', lra_dump_file);
651 /* Changed/dead regs: */
652 fprintf (lra_dump_file, " changed regs:");
653 dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
654 putc ('\n', lra_dump_file);
655 fprintf (lra_dump_file, " dead regs:");
656 dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
657 putc ('\n', lra_dump_file);
658 lra_dump_bitmap_with_title ("cands generated in BB",
659 &get_remat_bb_data (bb)->gen_cands, bb->index);
660 lra_dump_bitmap_with_title ("livein cands in BB",
661 &get_remat_bb_data (bb)->livein_cands, bb->index);
662 lra_dump_bitmap_with_title ("pavin cands in BB",
663 &get_remat_bb_data (bb)->pavin_cands, bb->index);
664 lra_dump_bitmap_with_title ("pavout cands in BB",
665 &get_remat_bb_data (bb)->pavout_cands, bb->index);
666 lra_dump_bitmap_with_title ("avin cands in BB",
667 &get_remat_bb_data (bb)->avin_cands, bb->index);
668 lra_dump_bitmap_with_title ("avout cands in BB",
669 &get_remat_bb_data (bb)->avout_cands, bb->index);
673 /* Free all BB data. */
674 static void
675 finish_remat_bb_data (void)
677 basic_block bb;
679 FOR_EACH_BB_FN (bb, cfun)
681 bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
682 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
683 bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
684 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
685 bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
686 bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
687 bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
688 bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
690 free (remat_bb_data);
695 /* Update changed_regs and dead_regs of BB from INSN. */
696 static void
697 set_bb_regs (basic_block bb, rtx_insn *insn)
699 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
700 struct lra_insn_reg *reg;
702 for (reg = id->regs; reg != NULL; reg = reg->next)
703 if (reg->type != OP_IN)
704 bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
705 else
707 if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
708 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
710 if (CALL_P (insn))
711 for (int i = 0; i < call_used_regs_arr_len; i++)
712 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
713 call_used_regs_arr[i]);
716 /* Calculate changed_regs and dead_regs for each BB. */
717 static void
718 calculate_local_reg_remat_bb_data (void)
720 basic_block bb;
721 rtx_insn *insn;
723 FOR_EACH_BB_FN (bb, cfun)
724 FOR_BB_INSNS (bb, insn)
725 if (INSN_P (insn))
726 set_bb_regs (bb, insn);
731 /* Return true if REGNO is an input operand of INSN. */
732 static bool
733 input_regno_present_p (rtx_insn *insn, int regno)
735 int iter;
736 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
737 struct lra_static_insn_data *static_id = id->insn_static_data;
738 struct lra_insn_reg *reg;
740 for (iter = 0; iter < 2; iter++)
741 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
742 reg != NULL;
743 reg = reg->next)
744 if (reg->type == OP_IN && reg->regno == regno)
745 return true;
746 return false;
749 /* Return true if a call used register is an input operand of INSN. */
750 static bool
751 call_used_input_regno_present_p (rtx_insn *insn)
753 int iter;
754 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
755 struct lra_static_insn_data *static_id = id->insn_static_data;
756 struct lra_insn_reg *reg;
758 for (iter = 0; iter < 2; iter++)
759 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
760 reg != NULL;
761 reg = reg->next)
762 if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
763 && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
764 return true;
765 return false;
768 /* Calculate livein_cands for each BB. */
769 static void
770 calculate_livein_cands (void)
772 basic_block bb;
774 FOR_EACH_BB_FN (bb, cfun)
776 bitmap livein_regs = df_get_live_in (bb);
777 bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
778 for (unsigned int i = 0; i < cands_num; i++)
780 cand_t cand = all_cands[i];
781 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
782 struct lra_insn_reg *reg;
784 for (reg = id->regs; reg != NULL; reg = reg->next)
785 if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
786 break;
787 if (reg == NULL)
788 bitmap_set_bit (livein_cands, i);
793 /* Calculate gen_cands for each BB. */
794 static void
795 calculate_gen_cands (void)
797 basic_block bb;
798 bitmap gen_cands;
799 bitmap_head gen_insns;
800 rtx_insn *insn;
802 bitmap_initialize (&gen_insns, &reg_obstack);
803 FOR_EACH_BB_FN (bb, cfun)
805 gen_cands = &get_remat_bb_data (bb)->gen_cands;
806 bitmap_clear (&gen_insns);
807 FOR_BB_INSNS (bb, insn)
808 if (INSN_P (insn))
810 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
811 struct lra_static_insn_data *static_id = id->insn_static_data;
812 struct lra_insn_reg *reg;
813 unsigned int uid;
814 bitmap_iterator bi;
815 cand_t cand;
816 rtx set;
817 int iter;
818 int src_regno = -1, dst_regno = -1;
820 if ((set = single_set (insn)) != NULL
821 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
823 src_regno = REGNO (SET_SRC (set));
824 dst_regno = REGNO (SET_DEST (set));
827 /* Update gen_cands: */
828 bitmap_clear (&temp_bitmap);
829 for (iter = 0; iter < 2; iter++)
830 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
831 reg != NULL;
832 reg = reg->next)
833 if (reg->type != OP_IN
834 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
835 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
837 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
839 cand = insn_to_cand[INSN_UID (insn2)];
840 gcc_assert (cand != NULL);
841 /* Ignore the reload insn. */
842 if (src_regno == cand->reload_regno
843 && dst_regno == cand->regno)
844 continue;
845 if (cand->regno == reg->regno
846 || input_regno_present_p (insn2, reg->regno))
848 bitmap_clear_bit (gen_cands, cand->index);
849 bitmap_set_bit (&temp_bitmap, uid);
853 if (CALL_P (insn))
854 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
856 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
858 cand = insn_to_cand[INSN_UID (insn2)];
859 gcc_assert (cand != NULL);
860 if (call_used_input_regno_present_p (insn2))
862 bitmap_clear_bit (gen_cands, cand->index);
863 bitmap_set_bit (&temp_bitmap, uid);
866 bitmap_and_compl_into (&gen_insns, &temp_bitmap);
868 cand = insn_to_cand[INSN_UID (insn)];
869 if (cand != NULL)
871 bitmap_set_bit (gen_cands, cand->index);
872 bitmap_set_bit (&gen_insns, INSN_UID (insn));
876 bitmap_clear (&gen_insns);
881 /* The common transfer function used by the DF equation solver to
882 propagate (partial) availability info BB_IN to BB_OUT through block
883 with BB_INDEX according to the following equation:
885 bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
887 static bool
888 cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
890 remat_bb_data_t bb_info;
891 bitmap bb_livein, bb_changed_regs, bb_dead_regs;
892 unsigned int cid;
893 bitmap_iterator bi;
895 bb_info = get_remat_bb_data_by_index (bb_index);
896 bb_livein = &bb_info->livein_cands;
897 bb_changed_regs = &bb_info->changed_regs;
898 bb_dead_regs = &bb_info->dead_regs;
899 /* Calculate killed avin cands -- cands whose regs are changed or
900 becoming dead in the BB. We calculate it here as we hope that
901 repeated calculations are compensated by smaller size of BB_IN in
902 comparison with all candidates number. */
903 bitmap_clear (&temp_bitmap);
904 EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
906 cand_t cand = all_cands[cid];
907 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
908 struct lra_insn_reg *reg;
910 if (! bitmap_bit_p (bb_livein, cid))
912 bitmap_set_bit (&temp_bitmap, cid);
913 continue;
915 for (reg = id->regs; reg != NULL; reg = reg->next)
916 /* Ignore all outputs which are not the regno for
917 rematerialization. */
918 if (reg->type == OP_OUT && reg->regno != cand->regno)
919 continue;
920 else if (bitmap_bit_p (bb_changed_regs, reg->regno)
921 || bitmap_bit_p (bb_dead_regs, reg->regno))
923 bitmap_set_bit (&temp_bitmap, cid);
924 break;
926 /* Check regno for rematerialization. */
927 if (bitmap_bit_p (bb_changed_regs, cand->regno)
928 || bitmap_bit_p (bb_dead_regs, cand->regno))
929 bitmap_set_bit (&temp_bitmap, cid);
931 return bitmap_ior_and_compl (bb_out,
932 &bb_info->gen_cands, bb_in, &temp_bitmap);
937 /* The transfer function used by the DF equation solver to propagate
938 partial candidate availability info through block with BB_INDEX
939 according to the following equation:
941 bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
943 static bool
944 cand_pav_trans_fun (int bb_index)
946 remat_bb_data_t bb_info;
948 bb_info = get_remat_bb_data_by_index (bb_index);
949 return cand_trans_fun (bb_index, &bb_info->pavin_cands,
950 &bb_info->pavout_cands);
953 /* The confluence function used by the DF equation solver to set up
954 cand_pav info for a block BB without predecessor. */
955 static void
956 cand_pav_con_fun_0 (basic_block bb)
958 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
961 /* The confluence function used by the DF equation solver to propagate
962 partial candidate availability info from predecessor to successor
963 on edge E (pred->bb) according to the following equation:
965 bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
967 static bool
968 cand_pav_con_fun_n (edge e)
970 basic_block pred = e->src;
971 basic_block bb = e->dest;
972 remat_bb_data_t bb_info;
973 bitmap bb_pavin, pred_pavout;
975 bb_info = get_remat_bb_data (bb);
976 bb_pavin = &bb_info->pavin_cands;
977 pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
978 return bitmap_ior_into (bb_pavin, pred_pavout);
983 /* The transfer function used by the DF equation solver to propagate
984 candidate availability info through block with BB_INDEX according
985 to the following equation:
987 bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
989 static bool
990 cand_av_trans_fun (int bb_index)
992 remat_bb_data_t bb_info;
994 bb_info = get_remat_bb_data_by_index (bb_index);
995 return cand_trans_fun (bb_index, &bb_info->avin_cands,
996 &bb_info->avout_cands);
999 /* The confluence function used by the DF equation solver to set up
1000 cand_av info for a block BB without predecessor. */
1001 static void
1002 cand_av_con_fun_0 (basic_block bb)
1004 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
1007 /* The confluence function used by the DF equation solver to propagate
1008 cand_av info from predecessor to successor on edge E (pred->bb)
1009 according to the following equation:
1011 bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
1013 static bool
1014 cand_av_con_fun_n (edge e)
1016 basic_block pred = e->src;
1017 basic_block bb = e->dest;
1018 remat_bb_data_t bb_info;
1019 bitmap bb_avin, pred_avout;
1021 bb_info = get_remat_bb_data (bb);
1022 bb_avin = &bb_info->avin_cands;
1023 pred_avout = &get_remat_bb_data (pred)->avout_cands;
1024 return bitmap_and_into (bb_avin, pred_avout);
1027 /* Calculate available candidates for each BB. */
1028 static void
1029 calculate_global_remat_bb_data (void)
1031 basic_block bb;
1033 df_simple_dataflow
1034 (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
1035 cand_pav_trans_fun, &all_blocks,
1036 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1037 /* Initialize avin by pavin. */
1038 FOR_EACH_BB_FN (bb, cfun)
1039 bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
1040 &get_remat_bb_data (bb)->pavin_cands);
1041 df_simple_dataflow
1042 (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
1043 cand_av_trans_fun, &all_blocks,
1044 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1049 /* Setup sp offset attribute to SP_OFFSET for all INSNS. */
1050 static void
1051 change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
1053 for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
1054 eliminate_regs_in_insn (insn, false, false, sp_offset);
1057 /* Return start hard register of REG (can be a hard or a pseudo reg)
1058 or -1 (if it is a spilled pseudo). Return number of hard registers
1059 occupied by REG through parameter NREGS if the start hard reg is
1060 not negative. */
1061 static int
1062 get_hard_regs (struct lra_insn_reg *reg, int &nregs)
1064 int regno = reg->regno;
1065 int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
1067 if (hard_regno >= 0)
1068 nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
1069 return hard_regno;
1072 /* Make copy of and register scratch pseudos in rematerialized insn
1073 REMAT_INSN. */
1074 static void
1075 update_scratch_ops (rtx_insn *remat_insn)
1077 lra_insn_recog_data_t id = lra_get_insn_recog_data (remat_insn);
1078 struct lra_static_insn_data *static_id = id->insn_static_data;
1079 for (int i = 0; i < static_id->n_operands; i++)
1081 rtx *loc = id->operand_loc[i];
1082 if (! REG_P (*loc))
1083 continue;
1084 int regno = REGNO (*loc);
1085 if (! lra_former_scratch_p (regno))
1086 continue;
1087 *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
1088 lra_get_allocno_class (regno),
1089 "scratch pseudo copy");
1090 lra_register_new_scratch_op (remat_insn, i);
1095 /* Insert rematerialization insns using the data-flow data calculated
1096 earlier. */
1097 static bool
1098 do_remat (void)
1100 rtx_insn *insn;
1101 basic_block bb;
1102 bitmap_head avail_cands;
1103 bool changed_p = false;
1104 /* Living hard regs and hard registers of living pseudos. */
1105 HARD_REG_SET live_hard_regs;
1107 bitmap_initialize (&avail_cands, &reg_obstack);
1108 FOR_EACH_BB_FN (bb, cfun)
1110 REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
1111 bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
1112 &get_remat_bb_data (bb)->livein_cands);
1113 FOR_BB_INSNS (bb, insn)
1115 if (!NONDEBUG_INSN_P (insn))
1116 continue;
1118 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
1119 struct lra_static_insn_data *static_id = id->insn_static_data;
1120 struct lra_insn_reg *reg;
1121 cand_t cand;
1122 unsigned int cid;
1123 bitmap_iterator bi;
1124 rtx set;
1125 int iter;
1126 int src_regno = -1, dst_regno = -1;
1128 if ((set = single_set (insn)) != NULL
1129 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
1131 src_regno = REGNO (SET_SRC (set));
1132 dst_regno = REGNO (SET_DEST (set));
1135 cand = NULL;
1136 /* Check possibility of rematerialization (hard reg or
1137 unpsilled pseudo <- spilled pseudo): */
1138 if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
1139 && reg_renumber[src_regno] < 0
1140 && (dst_regno < FIRST_PSEUDO_REGISTER
1141 || reg_renumber[dst_regno] >= 0))
1143 for (cand = regno_cands[src_regno];
1144 cand != NULL;
1145 cand = cand->next_regno_cand)
1146 if (bitmap_bit_p (&avail_cands, cand->index))
1147 break;
1149 int i, hard_regno, nregs;
1150 rtx_insn *remat_insn = NULL;
1151 HOST_WIDE_INT cand_sp_offset = 0;
1152 if (cand != NULL)
1154 lra_insn_recog_data_t cand_id
1155 = lra_get_insn_recog_data (cand->insn);
1156 struct lra_static_insn_data *static_cand_id
1157 = cand_id->insn_static_data;
1158 rtx saved_op = *cand_id->operand_loc[cand->nop];
1160 /* Check clobbers do not kill something living. */
1161 gcc_assert (REG_P (saved_op));
1162 int ignore_regno = REGNO (saved_op);
1164 for (reg = cand_id->regs; reg != NULL; reg = reg->next)
1165 if (reg->type != OP_IN && reg->regno != ignore_regno)
1167 hard_regno = get_hard_regs (reg, nregs);
1168 gcc_assert (hard_regno >= 0);
1169 for (i = 0; i < nregs; i++)
1170 if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
1171 break;
1172 if (i < nregs)
1173 break;
1176 if (reg == NULL)
1178 for (reg = static_cand_id->hard_regs;
1179 reg != NULL;
1180 reg = reg->next)
1181 if (reg->type != OP_IN
1182 && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
1183 break;
1186 if (reg == NULL)
1188 *cand_id->operand_loc[cand->nop] = SET_DEST (set);
1189 lra_update_insn_regno_info (cand->insn);
1190 bool ok_p = lra_constrain_insn (cand->insn);
1191 if (ok_p)
1193 rtx remat_pat = copy_insn (PATTERN (cand->insn));
1195 start_sequence ();
1196 emit_insn (remat_pat);
1197 remat_insn = get_insns ();
1198 end_sequence ();
1199 if (recog_memoized (remat_insn) < 0)
1200 remat_insn = NULL;
1201 cand_sp_offset = cand_id->sp_offset;
1203 *cand_id->operand_loc[cand->nop] = saved_op;
1204 lra_update_insn_regno_info (cand->insn);
1208 bitmap_clear (&temp_bitmap);
1209 /* Update avail_cands (see analogous code for
1210 calculate_gen_cands). */
1211 for (iter = 0; iter < 2; iter++)
1212 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
1213 reg != NULL;
1214 reg = reg->next)
1215 if (reg->type != OP_IN
1216 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1217 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1219 cand = all_cands[cid];
1221 /* Ignore the reload insn. */
1222 if (src_regno == cand->reload_regno
1223 && dst_regno == cand->regno)
1224 continue;
1225 if (cand->regno == reg->regno
1226 || input_regno_present_p (cand->insn, reg->regno))
1227 bitmap_set_bit (&temp_bitmap, cand->index);
1230 if (CALL_P (insn))
1231 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1233 cand = all_cands[cid];
1235 if (call_used_input_regno_present_p (cand->insn))
1236 bitmap_set_bit (&temp_bitmap, cand->index);
1239 bitmap_and_compl_into (&avail_cands, &temp_bitmap);
1240 if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
1241 bitmap_set_bit (&avail_cands, cand->index);
1243 if (remat_insn != NULL)
1245 HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
1246 if (sp_offset_change != 0)
1247 change_sp_offset (remat_insn, sp_offset_change);
1248 update_scratch_ops (remat_insn);
1249 lra_process_new_insns (insn, remat_insn, NULL,
1250 "Inserting rematerialization insn");
1251 lra_set_insn_deleted (insn);
1252 changed_p = true;
1253 continue;
1256 /* Update live hard regs: */
1257 for (reg = id->regs; reg != NULL; reg = reg->next)
1258 if (reg->type == OP_IN
1259 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1261 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1262 continue;
1263 for (i = 0; i < nregs; i++)
1264 CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1266 /* Process also hard regs (e.g. CC register) which are part
1267 of insn definition. */
1268 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1269 if (reg->type == OP_IN
1270 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1271 CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
1272 /* Inputs have been processed, now process outputs. */
1273 for (reg = id->regs; reg != NULL; reg = reg->next)
1274 if (reg->type != OP_IN
1275 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1277 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1278 continue;
1279 for (i = 0; i < nregs; i++)
1280 SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1282 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1283 if (reg->type != OP_IN
1284 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1285 SET_HARD_REG_BIT (live_hard_regs, reg->regno);
1288 bitmap_clear (&avail_cands);
1289 return changed_p;
1294 /* Current number of rematerialization iteration. */
1295 int lra_rematerialization_iter;
1297 /* Entry point of the rematerialization sub-pass. Return true if we
1298 did any rematerialization. */
1299 bool
1300 lra_remat (void)
1302 basic_block bb;
1303 bool result;
1304 int max_regno = max_reg_num ();
1306 if (! flag_lra_remat)
1307 return false;
1308 lra_rematerialization_iter++;
1309 if (lra_rematerialization_iter > LRA_MAX_REMATERIALIZATION_PASSES)
1310 return false;
1311 if (lra_dump_file != NULL)
1312 fprintf (lra_dump_file,
1313 "\n******** Rematerialization #%d: ********\n\n",
1314 lra_rematerialization_iter);
1315 timevar_push (TV_LRA_REMAT);
1316 insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
1317 regno_cands = XCNEWVEC (cand_t, max_regno);
1318 all_cands.create (8000);
1319 call_used_regs_arr_len = 0;
1320 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1321 if (call_used_regs[i])
1322 call_used_regs_arr[call_used_regs_arr_len++] = i;
1323 initiate_cand_table ();
1324 create_cands ();
1325 create_remat_bb_data ();
1326 bitmap_initialize (&temp_bitmap, &reg_obstack);
1327 calculate_local_reg_remat_bb_data ();
1328 calculate_livein_cands ();
1329 calculate_gen_cands ();
1330 bitmap_initialize (&all_blocks, &reg_obstack);
1331 FOR_ALL_BB_FN (bb, cfun)
1332 bitmap_set_bit (&all_blocks, bb->index);
1333 calculate_global_remat_bb_data ();
1334 dump_candidates_and_remat_bb_data ();
1335 result = do_remat ();
1336 all_cands.release ();
1337 bitmap_clear (&temp_bitmap);
1338 finish_remat_bb_data ();
1339 finish_cand_table ();
1340 bitmap_clear (&all_blocks);
1341 free (regno_cands);
1342 free (insn_to_cand);
1343 timevar_pop (TV_LRA_REMAT);
1344 return result;