Update my email address.
[official-gcc.git] / gcc / lra-remat.c
blob66532b8f464f033a8911ffc949b1200c98f5fe2a
1 /* Rematerialize pseudos values.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This code objective is to rematerialize spilled pseudo values. To
22 do this we calculate available insn candidates. The candidate is
23 available at some point if there is dominated set of insns with the
24 same pattern, the insn inputs are not dying or modified on any path
25 from the set, the outputs are not modified.
27 The insns containing memory or spilled pseudos (except for the
28 rematerialized pseudo) are not considered as such insns are not
29 profitable in comparison with regular loads of spilled pseudo
30 values. That simplifies the implementation as we don't need to
31 deal with memory aliasing.
33 To speed up available candidate calculation, we calculate partially
34 available candidates first and use them for initialization of the
35 availability. That is because (partial) availability sets are
36 sparse.
38 The rematerialization sub-pass could be improved further in the
39 following ways:
41 o We could make longer live ranges of inputs in the
42 rematerialization candidates if their hard registers are not used
43 for other purposes. This could be complicated if we need to
44 update BB live info information as LRA does not use
45 DF-infrastructure for compile-time reasons. This problem could
46 be overcome if constrain making live ranges longer only in BB/EBB
47 scope.
48 o We could use cost-based decision to choose rematerialization insn
49 (currently all insns without memory is can be used).
50 o We could use other free hard regs for unused output pseudos in
51 rematerialization candidates although such cases probably will
52 be very rare. */
55 #include "config.h"
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "rtl.h"
61 #include "df.h"
62 #include "rtl-error.h"
63 #include "tm_p.h"
64 #include "target.h"
65 #include "insn-config.h"
66 #include "recog.h"
67 #include "output.h"
68 #include "regs.h"
69 #include "flags.h"
70 #include "alias.h"
71 #include "expmed.h"
72 #include "dojump.h"
73 #include "explow.h"
74 #include "calls.h"
75 #include "emit-rtl.h"
76 #include "varasm.h"
77 #include "stmt.h"
78 #include "expr.h"
79 #include "except.h"
80 #include "ira.h"
81 #include "sparseset.h"
82 #include "params.h"
83 #include "lra.h"
84 #include "insn-attr.h"
85 #include "insn-codes.h"
86 #include "lra-int.h"
88 /* Number of candidates for rematerialization. */
89 static unsigned int cands_num;
91 /* The following is used for representation of call_used_reg_set in
92 form array whose elements are hard register numbers with nonzero bit
93 in CALL_USED_REG_SET. */
94 static int call_used_regs_arr_len;
95 static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
97 /* Bitmap used for different calculations. */
98 static bitmap_head temp_bitmap;
100 typedef struct cand *cand_t;
101 typedef const struct cand *const_cand_t;
103 /* Insn candidates for rematerialization. The candidate insn should
104 have the following properies:
105 o no any memory (as access to memory is non-profitable)
106 o no INOUT regs (it means no non-paradoxical subreg of output reg)
107 o one output spilled pseudo (or reload pseudo of a spilled pseudo)
108 o all other pseudos are with assigned hard regs. */
109 struct cand
111 /* Index of the candidates in all_cands. */
112 int index;
113 /* The candidate insn. */
114 rtx_insn *insn;
115 /* Insn pseudo regno for rematerialization. */
116 int regno;
117 /* Non-negative if a reload pseudo is in the insn instead of the
118 pseudo for rematerialization. */
119 int reload_regno;
120 /* Number of the operand containing the regno or its reload
121 regno. */
122 int nop;
123 /* Next candidate for the same regno. */
124 cand_t next_regno_cand;
127 /* Vector containing all candidates. */
128 static vec<cand_t> all_cands;
129 /* Map: insn -> candidate representing it. It is null if the insn can
130 not be used for rematerialization. */
131 static cand_t *insn_to_cand;
133 /* Map regno -> candidates can be used for the regno
134 rematerialization. */
135 static cand_t *regno_cands;
137 /* Data about basic blocks used for the rematerialization
138 sub-pass. */
139 struct remat_bb_data
141 /* Basic block about which the below data are. */
142 basic_block bb;
143 /* Registers changed in the basic block: */
144 bitmap_head changed_regs;
145 /* Registers becoming dead in the BB. */
146 bitmap_head dead_regs;
147 /* Cands present in the BB whose in/out regs are not changed after
148 the cands occurence and are not dead (except the reload
149 regno). */
150 bitmap_head gen_cands;
151 bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
152 bitmap_head pavin_cands; /* cands partially available at BB entry. */
153 bitmap_head pavout_cands; /* cands partially available at BB exit. */
154 bitmap_head avin_cands; /* cands available at the entry of the BB. */
155 bitmap_head avout_cands; /* cands available at the exit of the BB. */
158 /* Array for all BB data. Indexed by the corresponding BB index. */
159 typedef struct remat_bb_data *remat_bb_data_t;
161 /* Basic blocks for data flow problems -- all bocks except the special
162 ones. */
163 static bitmap_head all_blocks;
165 /* All basic block data are referred through the following array. */
166 static remat_bb_data_t remat_bb_data;
168 /* Two small functions for access to the bb data. */
169 static inline remat_bb_data_t
170 get_remat_bb_data (basic_block bb)
172 return &remat_bb_data[(bb)->index];
175 static inline remat_bb_data_t
176 get_remat_bb_data_by_index (int index)
178 return &remat_bb_data[index];
183 /* Recursive hash function for RTL X. */
184 static hashval_t
185 rtx_hash (rtx x)
187 int i, j;
188 enum rtx_code code;
189 const char *fmt;
190 hashval_t val = 0;
192 if (x == 0)
193 return val;
195 code = GET_CODE (x);
196 val += (int) code + 4095;
198 /* Some RTL can be compared nonrecursively. */
199 switch (code)
201 case REG:
202 return val + REGNO (x);
204 case LABEL_REF:
205 return iterative_hash_object (XEXP (x, 0), val);
207 case SYMBOL_REF:
208 return iterative_hash_object (XSTR (x, 0), val);
210 case SCRATCH:
211 case CONST_DOUBLE:
212 case CONST_INT:
213 case CONST_VECTOR:
214 return val;
216 default:
217 break;
220 /* Hash the elements. */
221 fmt = GET_RTX_FORMAT (code);
222 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
224 switch (fmt[i])
226 case 'w':
227 val += XWINT (x, i);
228 break;
230 case 'n':
231 case 'i':
232 val += XINT (x, i);
233 break;
235 case 'V':
236 case 'E':
237 val += XVECLEN (x, i);
239 for (j = 0; j < XVECLEN (x, i); j++)
240 val += rtx_hash (XVECEXP (x, i, j));
241 break;
243 case 'e':
244 val += rtx_hash (XEXP (x, i));
245 break;
247 case 'S':
248 case 's':
249 val += htab_hash_string (XSTR (x, i));
250 break;
252 case 'u':
253 case '0':
254 case 't':
255 break;
257 /* It is believed that rtx's at this level will never
258 contain anything but integers and other rtx's, except for
259 within LABEL_REFs and SYMBOL_REFs. */
260 default:
261 abort ();
264 return val;
269 /* Hash table for the candidates. Different insns (e.g. structurally
270 the same insns or even insns with different unused output regs) can
271 be represented by the same candidate in the table. */
272 static htab_t cand_table;
274 /* Hash function for candidate CAND. */
275 static hashval_t
276 cand_hash (const void *cand)
278 const_cand_t c = (const_cand_t) cand;
279 lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
280 struct lra_static_insn_data *static_id = id->insn_static_data;
281 int nops = static_id->n_operands;
282 hashval_t hash = 0;
284 for (int i = 0; i < nops; i++)
285 if (i == c->nop)
286 hash = iterative_hash_object (c->regno, hash);
287 else if (static_id->operand[i].type == OP_IN)
288 hash = iterative_hash_object (*id->operand_loc[i], hash);
289 return hash;
292 /* Equal function for candidates CAND1 and CAND2. They are equal if
293 the corresponding candidate insns have the same code, the same
294 regno for rematerialization, the same input operands. */
295 static int
296 cand_eq_p (const void *cand1, const void *cand2)
298 const_cand_t c1 = (const_cand_t) cand1;
299 const_cand_t c2 = (const_cand_t) cand2;
300 lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
301 lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
302 struct lra_static_insn_data *static_id1 = id1->insn_static_data;
303 int nops = static_id1->n_operands;
305 if (c1->regno != c2->regno
306 || INSN_CODE (c1->insn) < 0
307 || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
308 return false;
309 gcc_assert (c1->nop == c2->nop);
310 for (int i = 0; i < nops; i++)
311 if (i != c1->nop && static_id1->operand[i].type == OP_IN
312 && *id1->operand_loc[i] != *id2->operand_loc[i])
313 return false;
314 return true;
317 /* Insert candidate CAND into the table if it is not there yet.
318 Return candidate which is in the table. */
319 static cand_t
320 insert_cand (cand_t cand)
322 void **entry_ptr;
324 entry_ptr = htab_find_slot (cand_table, cand, INSERT);
325 if (*entry_ptr == NULL)
326 *entry_ptr = (void *) cand;
327 return (cand_t) *entry_ptr;
330 /* Free candidate CAND memory. */
331 static void
332 free_cand (void *cand)
334 free (cand);
337 /* Initiate the candidate table. */
338 static void
339 initiate_cand_table (void)
341 cand_table = htab_create (8000, cand_hash, cand_eq_p,
342 (htab_del) free_cand);
345 /* Finish the candidate table. */
346 static void
347 finish_cand_table (void)
349 htab_delete (cand_table);
354 /* Return true if X contains memory or some UNSPEC. We can not just
355 check insn operands as memory or unspec might be not an operand
356 itself but contain an operand. Insn with memory access is not
357 profitable for rematerialization. Rematerialization of UNSPEC
358 might result in wrong code generation as the UNPEC effect is
359 unknown (e.g. generating a label). */
360 static bool
361 bad_for_rematerialization_p (rtx x)
363 int i, j;
364 const char *fmt;
365 enum rtx_code code;
367 if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
368 return true;
369 code = GET_CODE (x);
370 fmt = GET_RTX_FORMAT (code);
371 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
373 if (fmt[i] == 'e')
375 if (bad_for_rematerialization_p (XEXP (x, i)))
376 return true;
378 else if (fmt[i] == 'E')
380 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
381 if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
382 return true;
385 return false;
388 /* If INSN can not be used for rematerialization, return negative
389 value. If INSN can be considered as a candidate for
390 rematerialization, return value which is the operand number of the
391 pseudo for which the insn can be used for rematerialization. Here
392 we consider the insns without any memory, spilled pseudo (except
393 for the rematerialization pseudo), or dying or unused regs. */
394 static int
395 operand_to_remat (rtx_insn *insn)
397 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
398 struct lra_static_insn_data *static_id = id->insn_static_data;
399 struct lra_insn_reg *reg, *found_reg = NULL;
401 /* Don't rematerialize insns which can change PC. */
402 if (JUMP_P (insn) || CALL_P (insn))
403 return -1;
404 /* First find a pseudo which can be rematerialized. */
405 for (reg = id->regs; reg != NULL; reg = reg->next)
406 /* True FRAME_POINTER_NEEDED might be because we can not follow
407 changing sp offsets, e.g. alloca is used. If the insn contains
408 stack pointer in such case, we can not rematerialize it as we
409 can not know sp offset at a rematerialization place. */
410 if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
411 return -1;
412 else if (reg->type == OP_OUT && ! reg->subreg_p
413 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
415 /* We permits only one spilled reg. */
416 if (found_reg != NULL)
417 return -1;
418 found_reg = reg;
420 /* IRA calculates conflicts separately for subregs of two words
421 pseudo. Even if the pseudo lives, e.g. one its subreg can be
422 used lately, another subreg hard register can be already used
423 for something else. In such case, it is not safe to
424 rematerialize the insn. */
425 else if (reg->type == OP_IN && reg->subreg_p
426 && reg->regno >= FIRST_PSEUDO_REGISTER
427 && (GET_MODE_SIZE (PSEUDO_REGNO_MODE (reg->regno))
428 == 2 * UNITS_PER_WORD))
429 return -1;
430 if (found_reg == NULL)
431 return -1;
432 if (found_reg->regno < FIRST_PSEUDO_REGISTER)
433 return -1;
434 if (bad_for_rematerialization_p (PATTERN (insn)))
435 return -1;
436 /* Check the other regs are not spilled. */
437 for (reg = id->regs; reg != NULL; reg = reg->next)
438 if (found_reg == reg)
439 continue;
440 else if (reg->type == OP_INOUT)
441 return -1;
442 else if (reg->regno >= FIRST_PSEUDO_REGISTER
443 && reg_renumber[reg->regno] < 0)
444 /* Another spilled reg. */
445 return -1;
446 else if (reg->type == OP_IN)
448 if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
449 /* We don't want to make live ranges longer. */
450 return -1;
451 /* Check that there is no output reg as the input one. */
452 for (struct lra_insn_reg *reg2 = id->regs;
453 reg2 != NULL;
454 reg2 = reg2->next)
455 if (reg2->type == OP_OUT && reg->regno == reg2->regno)
456 return -1;
457 if (reg->regno < FIRST_PSEUDO_REGISTER)
458 for (struct lra_insn_reg *reg2 = static_id->hard_regs;
459 reg2 != NULL;
460 reg2 = reg2->next)
461 if (reg2->type == OP_OUT
462 && reg->regno <= reg2->regno
463 && (reg2->regno
464 < (reg->regno
465 + hard_regno_nregs[reg->regno][reg->biggest_mode])))
466 return -1;
468 /* Find the rematerialization operand. */
469 int nop = static_id->n_operands;
470 for (int i = 0; i < nop; i++)
471 if (REG_P (*id->operand_loc[i])
472 && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
473 return i;
474 return -1;
477 /* Create candidate for INSN with rematerialization operand NOP and
478 REGNO. Insert the candidate into the table and set up the
479 corresponding INSN_TO_CAND element. */
480 static void
481 create_cand (rtx_insn *insn, int nop, int regno)
483 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
484 rtx reg = *id->operand_loc[nop];
485 gcc_assert (REG_P (reg));
486 int op_regno = REGNO (reg);
487 gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
488 cand_t cand = XNEW (struct cand);
489 cand->insn = insn;
490 cand->nop = nop;
491 cand->regno = regno;
492 cand->reload_regno = op_regno == regno ? -1 : op_regno;
493 gcc_assert (cand->regno >= 0);
494 cand_t cand_in_table = insert_cand (cand);
495 insn_to_cand[INSN_UID (insn)] = cand_in_table;
496 if (cand != cand_in_table)
497 free (cand);
498 else
500 /* A new cand. */
501 cand->index = all_cands.length ();
502 all_cands.safe_push (cand);
503 cand->next_regno_cand = regno_cands[cand->regno];
504 regno_cands[cand->regno] = cand;
508 /* Create rematerialization candidates (inserting them into the
509 table). */
510 static void
511 create_cands (void)
513 rtx_insn *insn;
514 struct potential_cand
516 rtx_insn *insn;
517 int nop;
519 struct potential_cand *regno_potential_cand;
521 /* Create candidates. */
522 regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
523 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
524 if (INSN_P (insn))
526 rtx set;
527 int src_regno, dst_regno;
528 rtx_insn *insn2;
529 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
530 int nop = operand_to_remat (insn);
531 int regno = -1;
533 if ((set = single_set (insn)) != NULL
534 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
535 && ((src_regno = REGNO (SET_SRC (set)))
536 >= lra_constraint_new_regno_start)
537 && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
538 && reg_renumber[dst_regno] < 0
539 && (insn2 = regno_potential_cand[src_regno].insn) != NULL
540 && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
541 /* It is an output reload insn after insn can be
542 rematerialized (potential candidate). */
543 create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
544 if (nop < 0)
545 goto fail;
546 gcc_assert (REG_P (*id->operand_loc[nop]));
547 regno = REGNO (*id->operand_loc[nop]);
548 gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
549 if (reg_renumber[regno] < 0)
550 create_cand (insn, nop, regno);
551 else
553 regno_potential_cand[regno].insn = insn;
554 regno_potential_cand[regno].nop = nop;
555 goto fail;
557 regno = -1;
558 fail:
559 for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
560 if (reg->type != OP_IN && reg->regno != regno
561 && reg->regno >= FIRST_PSEUDO_REGISTER)
562 regno_potential_cand[reg->regno].insn = NULL;
564 cands_num = all_cands.length ();
565 free (regno_potential_cand);
570 /* Create and initialize BB data. */
571 static void
572 create_remat_bb_data (void)
574 basic_block bb;
575 remat_bb_data_t bb_info;
577 remat_bb_data = XNEWVEC (struct remat_bb_data,
578 last_basic_block_for_fn (cfun));
579 FOR_ALL_BB_FN (bb, cfun)
581 #ifdef ENABLE_CHECKING
582 if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun))
583 abort ();
584 #endif
585 bb_info = get_remat_bb_data (bb);
586 bb_info->bb = bb;
587 bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
588 bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
589 bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
590 bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
591 bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
592 bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
593 bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
594 bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
598 /* Dump all candidates to DUMP_FILE. */
599 static void
600 dump_cands (FILE *dump_file)
602 int i;
603 cand_t cand;
605 fprintf (dump_file, "\nCands:\n");
606 for (i = 0; i < (int) cands_num; i++)
608 cand = all_cands[i];
609 fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
610 i, cand->nop, cand->regno, cand->reload_regno);
611 print_inline_rtx (dump_file, cand->insn, 6);
612 fprintf (dump_file, "\n");
616 /* Dump all candidates and BB data. */
617 static void
618 dump_candidates_and_remat_bb_data (void)
620 basic_block bb;
622 if (lra_dump_file == NULL)
623 return;
624 dump_cands (lra_dump_file);
625 FOR_EACH_BB_FN (bb, cfun)
627 fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
628 /* Livein */
629 fprintf (lra_dump_file, " register live in:");
630 dump_regset (df_get_live_in (bb), lra_dump_file);
631 putc ('\n', lra_dump_file);
632 /* Liveout */
633 fprintf (lra_dump_file, " register live out:");
634 dump_regset (df_get_live_out (bb), lra_dump_file);
635 putc ('\n', lra_dump_file);
636 /* Changed/dead regs: */
637 fprintf (lra_dump_file, " changed regs:");
638 dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
639 putc ('\n', lra_dump_file);
640 fprintf (lra_dump_file, " dead regs:");
641 dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
642 putc ('\n', lra_dump_file);
643 lra_dump_bitmap_with_title ("cands generated in BB",
644 &get_remat_bb_data (bb)->gen_cands, bb->index);
645 lra_dump_bitmap_with_title ("livein cands in BB",
646 &get_remat_bb_data (bb)->livein_cands, bb->index);
647 lra_dump_bitmap_with_title ("pavin cands in BB",
648 &get_remat_bb_data (bb)->pavin_cands, bb->index);
649 lra_dump_bitmap_with_title ("pavout cands in BB",
650 &get_remat_bb_data (bb)->pavout_cands, bb->index);
651 lra_dump_bitmap_with_title ("avin cands in BB",
652 &get_remat_bb_data (bb)->avin_cands, bb->index);
653 lra_dump_bitmap_with_title ("avout cands in BB",
654 &get_remat_bb_data (bb)->avout_cands, bb->index);
658 /* Free all BB data. */
659 static void
660 finish_remat_bb_data (void)
662 basic_block bb;
664 FOR_EACH_BB_FN (bb, cfun)
666 bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
667 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
668 bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
669 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
670 bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
671 bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
672 bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
673 bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
675 free (remat_bb_data);
680 /* Update changed_regs and dead_regs of BB from INSN. */
681 static void
682 set_bb_regs (basic_block bb, rtx_insn *insn)
684 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
685 struct lra_insn_reg *reg;
687 for (reg = id->regs; reg != NULL; reg = reg->next)
688 if (reg->type != OP_IN)
689 bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
690 else
692 if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
693 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
695 if (CALL_P (insn))
696 for (int i = 0; i < call_used_regs_arr_len; i++)
697 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
698 call_used_regs_arr[i]);
701 /* Calculate changed_regs and dead_regs for each BB. */
702 static void
703 calculate_local_reg_remat_bb_data (void)
705 basic_block bb;
706 rtx_insn *insn;
708 FOR_EACH_BB_FN (bb, cfun)
709 FOR_BB_INSNS (bb, insn)
710 if (INSN_P (insn))
711 set_bb_regs (bb, insn);
716 /* Return true if REGNO is an input operand of INSN. */
717 static bool
718 input_regno_present_p (rtx_insn *insn, int regno)
720 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
721 struct lra_insn_reg *reg;
723 for (reg = id->regs; reg != NULL; reg = reg->next)
724 if (reg->type == OP_IN && reg->regno == regno)
725 return true;
726 return false;
729 /* Return true if a call used register is an input operand of INSN. */
730 static bool
731 call_used_input_regno_present_p (rtx_insn *insn)
733 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
734 struct lra_insn_reg *reg;
736 for (reg = id->regs; reg != NULL; reg = reg->next)
737 if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
738 && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
739 return true;
740 return false;
743 /* Calculate livein_cands for each BB. */
744 static void
745 calculate_livein_cands (void)
747 basic_block bb;
749 FOR_EACH_BB_FN (bb, cfun)
751 bitmap livein_regs = df_get_live_in (bb);
752 bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
753 for (unsigned int i = 0; i < cands_num; i++)
755 cand_t cand = all_cands[i];
756 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
757 struct lra_insn_reg *reg;
759 for (reg = id->regs; reg != NULL; reg = reg->next)
760 if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
761 break;
762 if (reg == NULL)
763 bitmap_set_bit (livein_cands, i);
768 /* Calculate gen_cands for each BB. */
769 static void
770 calculate_gen_cands (void)
772 basic_block bb;
773 bitmap gen_cands;
774 bitmap_head gen_insns;
775 rtx_insn *insn;
777 bitmap_initialize (&gen_insns, &reg_obstack);
778 FOR_EACH_BB_FN (bb, cfun)
780 gen_cands = &get_remat_bb_data (bb)->gen_cands;
781 bitmap_clear (&gen_insns);
782 FOR_BB_INSNS (bb, insn)
783 if (INSN_P (insn))
785 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
786 struct lra_insn_reg *reg;
787 unsigned int uid;
788 bitmap_iterator bi;
789 cand_t cand;
790 rtx set;
791 int src_regno = -1, dst_regno = -1;
793 if ((set = single_set (insn)) != NULL
794 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
796 src_regno = REGNO (SET_SRC (set));
797 dst_regno = REGNO (SET_DEST (set));
800 /* Update gen_cands: */
801 bitmap_clear (&temp_bitmap);
802 for (reg = id->regs; reg != NULL; reg = reg->next)
803 if (reg->type != OP_IN
804 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
805 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
807 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
809 cand = insn_to_cand[INSN_UID (insn2)];
810 gcc_assert (cand != NULL);
811 /* Ignore the reload insn. */
812 if (src_regno == cand->reload_regno
813 && dst_regno == cand->regno)
814 continue;
815 if (cand->regno == reg->regno
816 || input_regno_present_p (insn2, reg->regno))
818 bitmap_clear_bit (gen_cands, cand->index);
819 bitmap_set_bit (&temp_bitmap, uid);
823 if (CALL_P (insn))
824 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
826 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
828 cand = insn_to_cand[INSN_UID (insn2)];
829 gcc_assert (cand != NULL);
830 if (call_used_input_regno_present_p (insn2))
832 bitmap_clear_bit (gen_cands, cand->index);
833 bitmap_set_bit (&temp_bitmap, uid);
836 bitmap_and_compl_into (&gen_insns, &temp_bitmap);
838 cand = insn_to_cand[INSN_UID (insn)];
839 if (cand != NULL)
841 bitmap_set_bit (gen_cands, cand->index);
842 bitmap_set_bit (&gen_insns, INSN_UID (insn));
846 bitmap_clear (&gen_insns);
851 /* The common transfer function used by the DF equation solver to
852 propagate (partial) availability info BB_IN to BB_OUT through block
853 with BB_INDEX according to the following equation:
855 bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
857 static bool
858 cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
860 remat_bb_data_t bb_info;
861 bitmap bb_livein, bb_changed_regs, bb_dead_regs;
862 unsigned int cid;
863 bitmap_iterator bi;
865 bb_info = get_remat_bb_data_by_index (bb_index);
866 bb_livein = &bb_info->livein_cands;
867 bb_changed_regs = &bb_info->changed_regs;
868 bb_dead_regs = &bb_info->dead_regs;
869 /* Calculate killed avin cands -- cands whose regs are changed or
870 becoming dead in the BB. We calculate it here as we hope that
871 repeated calculations are compensated by smaller size of BB_IN in
872 comparison with all candidates number. */
873 bitmap_clear (&temp_bitmap);
874 EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
876 cand_t cand = all_cands[cid];
877 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
878 struct lra_insn_reg *reg;
880 if (! bitmap_bit_p (bb_livein, cid))
882 bitmap_set_bit (&temp_bitmap, cid);
883 continue;
885 for (reg = id->regs; reg != NULL; reg = reg->next)
886 /* Ignore all outputs which are not the regno for
887 rematerialization. */
888 if (reg->type == OP_OUT && reg->regno != cand->regno)
889 continue;
890 else if (bitmap_bit_p (bb_changed_regs, reg->regno)
891 || bitmap_bit_p (bb_dead_regs, reg->regno))
893 bitmap_set_bit (&temp_bitmap, cid);
894 break;
896 /* Check regno for rematerialization. */
897 if (bitmap_bit_p (bb_changed_regs, cand->regno)
898 || bitmap_bit_p (bb_dead_regs, cand->regno))
899 bitmap_set_bit (&temp_bitmap, cid);
901 return bitmap_ior_and_compl (bb_out,
902 &bb_info->gen_cands, bb_in, &temp_bitmap);
907 /* The transfer function used by the DF equation solver to propagate
908 partial candidate availability info through block with BB_INDEX
909 according to the following equation:
911 bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
913 static bool
914 cand_pav_trans_fun (int bb_index)
916 remat_bb_data_t bb_info;
918 bb_info = get_remat_bb_data_by_index (bb_index);
919 return cand_trans_fun (bb_index, &bb_info->pavin_cands,
920 &bb_info->pavout_cands);
923 /* The confluence function used by the DF equation solver to set up
924 cand_pav info for a block BB without predecessor. */
925 static void
926 cand_pav_con_fun_0 (basic_block bb)
928 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
931 /* The confluence function used by the DF equation solver to propagate
932 partial candidate availability info from predecessor to successor
933 on edge E (pred->bb) according to the following equation:
935 bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
937 static bool
938 cand_pav_con_fun_n (edge e)
940 basic_block pred = e->src;
941 basic_block bb = e->dest;
942 remat_bb_data_t bb_info;
943 bitmap bb_pavin, pred_pavout;
945 bb_info = get_remat_bb_data (bb);
946 bb_pavin = &bb_info->pavin_cands;
947 pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
948 return bitmap_ior_into (bb_pavin, pred_pavout);
953 /* The transfer function used by the DF equation solver to propagate
954 candidate availability info through block with BB_INDEX according
955 to the following equation:
957 bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
959 static bool
960 cand_av_trans_fun (int bb_index)
962 remat_bb_data_t bb_info;
964 bb_info = get_remat_bb_data_by_index (bb_index);
965 return cand_trans_fun (bb_index, &bb_info->avin_cands,
966 &bb_info->avout_cands);
969 /* The confluence function used by the DF equation solver to set up
970 cand_av info for a block BB without predecessor. */
971 static void
972 cand_av_con_fun_0 (basic_block bb)
974 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
977 /* The confluence function used by the DF equation solver to propagate
978 cand_av info from predecessor to successor on edge E (pred->bb)
979 according to the following equation:
981 bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
983 static bool
984 cand_av_con_fun_n (edge e)
986 basic_block pred = e->src;
987 basic_block bb = e->dest;
988 remat_bb_data_t bb_info;
989 bitmap bb_avin, pred_avout;
991 bb_info = get_remat_bb_data (bb);
992 bb_avin = &bb_info->avin_cands;
993 pred_avout = &get_remat_bb_data (pred)->avout_cands;
994 return bitmap_and_into (bb_avin, pred_avout);
997 /* Calculate available candidates for each BB. */
998 static void
999 calculate_global_remat_bb_data (void)
1001 basic_block bb;
1003 df_simple_dataflow
1004 (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
1005 cand_pav_trans_fun, &all_blocks,
1006 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1007 /* Initialize avin by pavin. */
1008 FOR_EACH_BB_FN (bb, cfun)
1009 bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
1010 &get_remat_bb_data (bb)->pavin_cands);
1011 df_simple_dataflow
1012 (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
1013 cand_av_trans_fun, &all_blocks,
1014 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1019 /* Setup sp offset attribute to SP_OFFSET for all INSNS. */
1020 static void
1021 change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
1023 for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
1024 eliminate_regs_in_insn (insn, false, false, sp_offset);
1027 /* Return start hard register of REG (can be a hard or a pseudo reg)
1028 or -1 (if it is a spilled pseudo). Return number of hard registers
1029 occupied by REG through parameter NREGS if the start hard reg is
1030 not negative. */
1031 static int
1032 get_hard_regs (struct lra_insn_reg *reg, int &nregs)
1034 int regno = reg->regno;
1035 int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
1037 if (hard_regno >= 0)
1038 nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
1039 return hard_regno;
1042 /* Make copy of and register scratch pseudos in rematerialized insn
1043 REMAT_INSN. */
1044 static void
1045 update_scratch_ops (rtx_insn *remat_insn)
1047 lra_insn_recog_data_t id = lra_get_insn_recog_data (remat_insn);
1048 struct lra_static_insn_data *static_id = id->insn_static_data;
1049 for (int i = 0; i < static_id->n_operands; i++)
1051 rtx *loc = id->operand_loc[i];
1052 if (! REG_P (*loc))
1053 continue;
1054 int regno = REGNO (*loc);
1055 if (! lra_former_scratch_p (regno))
1056 continue;
1057 *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
1058 lra_get_allocno_class (regno),
1059 "scratch pseudo copy");
1060 lra_register_new_scratch_op (remat_insn, i);
1065 /* Insert rematerialization insns using the data-flow data calculated
1066 earlier. */
1067 static bool
1068 do_remat (void)
1070 rtx_insn *insn;
1071 basic_block bb;
1072 bitmap_head avail_cands;
1073 bool changed_p = false;
1074 /* Living hard regs and hard registers of living pseudos. */
1075 HARD_REG_SET live_hard_regs;
1077 bitmap_initialize (&avail_cands, &reg_obstack);
1078 FOR_EACH_BB_FN (bb, cfun)
1080 REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
1081 bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
1082 &get_remat_bb_data (bb)->livein_cands);
1083 FOR_BB_INSNS (bb, insn)
1085 if (!NONDEBUG_INSN_P (insn))
1086 continue;
1088 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
1089 struct lra_static_insn_data *static_id = id->insn_static_data;
1090 struct lra_insn_reg *reg;
1091 cand_t cand;
1092 unsigned int cid;
1093 bitmap_iterator bi;
1094 rtx set;
1095 int src_regno = -1, dst_regno = -1;
1097 if ((set = single_set (insn)) != NULL
1098 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
1100 src_regno = REGNO (SET_SRC (set));
1101 dst_regno = REGNO (SET_DEST (set));
1104 cand = NULL;
1105 /* Check possibility of rematerialization (hard reg or
1106 unpsilled pseudo <- spilled pseudo): */
1107 if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
1108 && reg_renumber[src_regno] < 0
1109 && (dst_regno < FIRST_PSEUDO_REGISTER
1110 || reg_renumber[dst_regno] >= 0))
1112 for (cand = regno_cands[src_regno];
1113 cand != NULL;
1114 cand = cand->next_regno_cand)
1115 if (bitmap_bit_p (&avail_cands, cand->index))
1116 break;
1118 int i, hard_regno, nregs;
1119 rtx_insn *remat_insn = NULL;
1120 HOST_WIDE_INT cand_sp_offset = 0;
1121 if (cand != NULL)
1123 lra_insn_recog_data_t cand_id
1124 = lra_get_insn_recog_data (cand->insn);
1125 struct lra_static_insn_data *static_cand_id
1126 = cand_id->insn_static_data;
1127 rtx saved_op = *cand_id->operand_loc[cand->nop];
1129 /* Check clobbers do not kill something living. */
1130 gcc_assert (REG_P (saved_op));
1131 int ignore_regno = REGNO (saved_op);
1133 for (reg = cand_id->regs; reg != NULL; reg = reg->next)
1134 if (reg->type != OP_IN && reg->regno != ignore_regno)
1136 hard_regno = get_hard_regs (reg, nregs);
1137 gcc_assert (hard_regno >= 0);
1138 for (i = 0; i < nregs; i++)
1139 if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
1140 break;
1141 if (i < nregs)
1142 break;
1145 if (reg == NULL)
1147 for (reg = static_cand_id->hard_regs;
1148 reg != NULL;
1149 reg = reg->next)
1150 if (reg->type != OP_IN
1151 && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
1152 break;
1155 if (reg == NULL)
1157 *cand_id->operand_loc[cand->nop] = SET_DEST (set);
1158 lra_update_insn_regno_info (cand->insn);
1159 bool ok_p = lra_constrain_insn (cand->insn);
1160 if (ok_p)
1162 rtx remat_pat = copy_insn (PATTERN (cand->insn));
1164 start_sequence ();
1165 emit_insn (remat_pat);
1166 remat_insn = get_insns ();
1167 end_sequence ();
1168 if (recog_memoized (remat_insn) < 0)
1169 remat_insn = NULL;
1170 cand_sp_offset = cand_id->sp_offset;
1172 *cand_id->operand_loc[cand->nop] = saved_op;
1173 lra_update_insn_regno_info (cand->insn);
1177 bitmap_clear (&temp_bitmap);
1178 /* Update avail_cands (see analogous code for
1179 calculate_gen_cands). */
1180 for (reg = id->regs; reg != NULL; reg = reg->next)
1181 if (reg->type != OP_IN
1182 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1183 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1185 cand = all_cands[cid];
1187 /* Ignore the reload insn. */
1188 if (src_regno == cand->reload_regno
1189 && dst_regno == cand->regno)
1190 continue;
1191 if (cand->regno == reg->regno
1192 || input_regno_present_p (cand->insn, reg->regno))
1193 bitmap_set_bit (&temp_bitmap, cand->index);
1196 if (CALL_P (insn))
1197 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1199 cand = all_cands[cid];
1201 if (call_used_input_regno_present_p (cand->insn))
1202 bitmap_set_bit (&temp_bitmap, cand->index);
1205 bitmap_and_compl_into (&avail_cands, &temp_bitmap);
1206 if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
1207 bitmap_set_bit (&avail_cands, cand->index);
1209 if (remat_insn != NULL)
1211 HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
1212 if (sp_offset_change != 0)
1213 change_sp_offset (remat_insn, sp_offset_change);
1214 update_scratch_ops (remat_insn);
1215 lra_process_new_insns (insn, remat_insn, NULL,
1216 "Inserting rematerialization insn");
1217 lra_set_insn_deleted (insn);
1218 changed_p = true;
1219 continue;
1222 /* Update live hard regs: */
1223 for (reg = id->regs; reg != NULL; reg = reg->next)
1224 if (reg->type == OP_IN
1225 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1227 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1228 continue;
1229 for (i = 0; i < nregs; i++)
1230 CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1232 /* Process also hard regs (e.g. CC register) which are part
1233 of insn definition. */
1234 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1235 if (reg->type == OP_IN
1236 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1237 CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
1238 /* Inputs have been processed, now process outputs. */
1239 for (reg = id->regs; reg != NULL; reg = reg->next)
1240 if (reg->type != OP_IN
1241 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1243 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1244 continue;
1245 for (i = 0; i < nregs; i++)
1246 SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1248 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1249 if (reg->type != OP_IN
1250 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1251 SET_HARD_REG_BIT (live_hard_regs, reg->regno);
1254 bitmap_clear (&avail_cands);
1255 return changed_p;
1260 /* Current number of rematerialization iteration. */
1261 int lra_rematerialization_iter;
1263 /* Entry point of the rematerialization sub-pass. Return true if we
1264 did any rematerialization. */
1265 bool
1266 lra_remat (void)
1268 basic_block bb;
1269 bool result;
1270 int max_regno = max_reg_num ();
1272 if (! flag_lra_remat)
1273 return false;
1274 lra_rematerialization_iter++;
1275 if (lra_rematerialization_iter > LRA_MAX_REMATERIALIZATION_PASSES)
1276 return false;
1277 if (lra_dump_file != NULL)
1278 fprintf (lra_dump_file,
1279 "\n******** Rematerialization #%d: ********\n\n",
1280 lra_rematerialization_iter);
1281 timevar_push (TV_LRA_REMAT);
1282 insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
1283 regno_cands = XCNEWVEC (cand_t, max_regno);
1284 all_cands.create (8000);
1285 call_used_regs_arr_len = 0;
1286 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1287 if (call_used_regs[i])
1288 call_used_regs_arr[call_used_regs_arr_len++] = i;
1289 initiate_cand_table ();
1290 create_cands ();
1291 create_remat_bb_data ();
1292 bitmap_initialize (&temp_bitmap, &reg_obstack);
1293 calculate_local_reg_remat_bb_data ();
1294 calculate_livein_cands ();
1295 calculate_gen_cands ();
1296 bitmap_initialize (&all_blocks, &reg_obstack);
1297 FOR_ALL_BB_FN (bb, cfun)
1298 bitmap_set_bit (&all_blocks, bb->index);
1299 calculate_global_remat_bb_data ();
1300 dump_candidates_and_remat_bb_data ();
1301 result = do_remat ();
1302 all_cands.release ();
1303 bitmap_clear (&temp_bitmap);
1304 finish_remat_bb_data ();
1305 finish_cand_table ();
1306 bitmap_clear (&all_blocks);
1307 free (regno_cands);
1308 free (insn_to_cand);
1309 timevar_pop (TV_LRA_REMAT);
1310 return result;