2015-10-30 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / lra-remat.c
blob3fbee4bd9af3c39f4c8b7e64f5afdb8e6aec2d0f
1 /* Rematerialize pseudos values.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This code objective is to rematerialize spilled pseudo values. To
22 do this we calculate available insn candidates. The candidate is
23 available at some point if there is dominated set of insns with the
24 same pattern, the insn inputs are not dying or modified on any path
25 from the set, the outputs are not modified.
27 The insns containing memory or spilled pseudos (except for the
28 rematerialized pseudo) are not considered as such insns are not
29 profitable in comparison with regular loads of spilled pseudo
30 values. That simplifies the implementation as we don't need to
31 deal with memory aliasing.
33 To speed up available candidate calculation, we calculate partially
34 available candidates first and use them for initialization of the
35 availability. That is because (partial) availability sets are
36 sparse.
38 The rematerialization sub-pass could be improved further in the
39 following ways:
41 o We could make longer live ranges of inputs in the
42 rematerialization candidates if their hard registers are not used
43 for other purposes. This could be complicated if we need to
44 update BB live info information as LRA does not use
45 DF-infrastructure for compile-time reasons. This problem could
46 be overcome if constrain making live ranges longer only in BB/EBB
47 scope.
48 o We could use cost-based decision to choose rematerialization insn
49 (currently all insns without memory is can be used).
50 o We could use other free hard regs for unused output pseudos in
51 rematerialization candidates although such cases probably will
52 be very rare. */
55 #include "config.h"
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "rtl.h"
60 #include "df.h"
61 #include "insn-config.h"
62 #include "regs.h"
63 #include "ira.h"
64 #include "recog.h"
65 #include "lra.h"
66 #include "lra-int.h"
68 /* Number of candidates for rematerialization. */
69 static unsigned int cands_num;
71 /* The following is used for representation of call_used_reg_set in
72 form array whose elements are hard register numbers with nonzero bit
73 in CALL_USED_REG_SET. */
74 static int call_used_regs_arr_len;
75 static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
77 /* Bitmap used for different calculations. */
78 static bitmap_head temp_bitmap;
80 typedef struct cand *cand_t;
81 typedef const struct cand *const_cand_t;
83 /* Insn candidates for rematerialization. The candidate insn should
84 have the following properies:
85 o no any memory (as access to memory is non-profitable)
86 o no INOUT regs (it means no non-paradoxical subreg of output reg)
87 o one output spilled pseudo (or reload pseudo of a spilled pseudo)
88 o all other pseudos are with assigned hard regs. */
89 struct cand
91 /* Index of the candidates in all_cands. */
92 int index;
93 /* The candidate insn. */
94 rtx_insn *insn;
95 /* Insn pseudo regno for rematerialization. */
96 int regno;
97 /* Non-negative if a reload pseudo is in the insn instead of the
98 pseudo for rematerialization. */
99 int reload_regno;
100 /* Number of the operand containing the regno or its reload
101 regno. */
102 int nop;
103 /* Next candidate for the same regno. */
104 cand_t next_regno_cand;
107 /* Vector containing all candidates. */
108 static vec<cand_t> all_cands;
109 /* Map: insn -> candidate representing it. It is null if the insn can
110 not be used for rematerialization. */
111 static cand_t *insn_to_cand;
113 /* Map regno -> candidates can be used for the regno
114 rematerialization. */
115 static cand_t *regno_cands;
117 /* Data about basic blocks used for the rematerialization
118 sub-pass. */
119 struct remat_bb_data
121 /* Basic block about which the below data are. */
122 basic_block bb;
123 /* Registers changed in the basic block: */
124 bitmap_head changed_regs;
125 /* Registers becoming dead in the BB. */
126 bitmap_head dead_regs;
127 /* Cands present in the BB whose in/out regs are not changed after
128 the cands occurence and are not dead (except the reload
129 regno). */
130 bitmap_head gen_cands;
131 bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
132 bitmap_head pavin_cands; /* cands partially available at BB entry. */
133 bitmap_head pavout_cands; /* cands partially available at BB exit. */
134 bitmap_head avin_cands; /* cands available at the entry of the BB. */
135 bitmap_head avout_cands; /* cands available at the exit of the BB. */
138 /* Array for all BB data. Indexed by the corresponding BB index. */
139 typedef struct remat_bb_data *remat_bb_data_t;
141 /* Basic blocks for data flow problems -- all bocks except the special
142 ones. */
143 static bitmap_head all_blocks;
145 /* All basic block data are referred through the following array. */
146 static remat_bb_data_t remat_bb_data;
148 /* Two small functions for access to the bb data. */
149 static inline remat_bb_data_t
150 get_remat_bb_data (basic_block bb)
152 return &remat_bb_data[(bb)->index];
155 static inline remat_bb_data_t
156 get_remat_bb_data_by_index (int index)
158 return &remat_bb_data[index];
163 /* Recursive hash function for RTL X. */
164 static hashval_t
165 rtx_hash (rtx x)
167 int i, j;
168 enum rtx_code code;
169 const char *fmt;
170 hashval_t val = 0;
172 if (x == 0)
173 return val;
175 code = GET_CODE (x);
176 val += (int) code + 4095;
178 /* Some RTL can be compared nonrecursively. */
179 switch (code)
181 case REG:
182 return val + REGNO (x);
184 case LABEL_REF:
185 return iterative_hash_object (XEXP (x, 0), val);
187 case SYMBOL_REF:
188 return iterative_hash_object (XSTR (x, 0), val);
190 case SCRATCH:
191 case CONST_DOUBLE:
192 case CONST_INT:
193 case CONST_VECTOR:
194 return val;
196 default:
197 break;
200 /* Hash the elements. */
201 fmt = GET_RTX_FORMAT (code);
202 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
204 switch (fmt[i])
206 case 'w':
207 val += XWINT (x, i);
208 break;
210 case 'n':
211 case 'i':
212 val += XINT (x, i);
213 break;
215 case 'V':
216 case 'E':
217 val += XVECLEN (x, i);
219 for (j = 0; j < XVECLEN (x, i); j++)
220 val += rtx_hash (XVECEXP (x, i, j));
221 break;
223 case 'e':
224 val += rtx_hash (XEXP (x, i));
225 break;
227 case 'S':
228 case 's':
229 val += htab_hash_string (XSTR (x, i));
230 break;
232 case 'u':
233 case '0':
234 case 't':
235 break;
237 /* It is believed that rtx's at this level will never
238 contain anything but integers and other rtx's, except for
239 within LABEL_REFs and SYMBOL_REFs. */
240 default:
241 abort ();
244 return val;
249 /* Hash table for the candidates. Different insns (e.g. structurally
250 the same insns or even insns with different unused output regs) can
251 be represented by the same candidate in the table. */
252 static htab_t cand_table;
254 /* Hash function for candidate CAND. */
255 static hashval_t
256 cand_hash (const void *cand)
258 const_cand_t c = (const_cand_t) cand;
259 lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
260 struct lra_static_insn_data *static_id = id->insn_static_data;
261 int nops = static_id->n_operands;
262 hashval_t hash = 0;
264 for (int i = 0; i < nops; i++)
265 if (i == c->nop)
266 hash = iterative_hash_object (c->regno, hash);
267 else if (static_id->operand[i].type == OP_IN)
268 hash = iterative_hash_object (*id->operand_loc[i], hash);
269 return hash;
272 /* Equal function for candidates CAND1 and CAND2. They are equal if
273 the corresponding candidate insns have the same code, the same
274 regno for rematerialization, the same input operands. */
275 static int
276 cand_eq_p (const void *cand1, const void *cand2)
278 const_cand_t c1 = (const_cand_t) cand1;
279 const_cand_t c2 = (const_cand_t) cand2;
280 lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
281 lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
282 struct lra_static_insn_data *static_id1 = id1->insn_static_data;
283 int nops = static_id1->n_operands;
285 if (c1->regno != c2->regno
286 || INSN_CODE (c1->insn) < 0
287 || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
288 return false;
289 gcc_assert (c1->nop == c2->nop);
290 for (int i = 0; i < nops; i++)
291 if (i != c1->nop && static_id1->operand[i].type == OP_IN
292 && *id1->operand_loc[i] != *id2->operand_loc[i])
293 return false;
294 return true;
297 /* Insert candidate CAND into the table if it is not there yet.
298 Return candidate which is in the table. */
299 static cand_t
300 insert_cand (cand_t cand)
302 void **entry_ptr;
304 entry_ptr = htab_find_slot (cand_table, cand, INSERT);
305 if (*entry_ptr == NULL)
306 *entry_ptr = (void *) cand;
307 return (cand_t) *entry_ptr;
310 /* Free candidate CAND memory. */
311 static void
312 free_cand (void *cand)
314 free (cand);
317 /* Initiate the candidate table. */
318 static void
319 initiate_cand_table (void)
321 cand_table = htab_create (8000, cand_hash, cand_eq_p,
322 (htab_del) free_cand);
325 /* Finish the candidate table. */
326 static void
327 finish_cand_table (void)
329 htab_delete (cand_table);
334 /* Return true if X contains memory or some UNSPEC. We can not just
335 check insn operands as memory or unspec might be not an operand
336 itself but contain an operand. Insn with memory access is not
337 profitable for rematerialization. Rematerialization of UNSPEC
338 might result in wrong code generation as the UNPEC effect is
339 unknown (e.g. generating a label). */
340 static bool
341 bad_for_rematerialization_p (rtx x)
343 int i, j;
344 const char *fmt;
345 enum rtx_code code;
347 if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
348 return true;
349 code = GET_CODE (x);
350 fmt = GET_RTX_FORMAT (code);
351 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
353 if (fmt[i] == 'e')
355 if (bad_for_rematerialization_p (XEXP (x, i)))
356 return true;
358 else if (fmt[i] == 'E')
360 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
361 if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
362 return true;
365 return false;
368 /* If INSN can not be used for rematerialization, return negative
369 value. If INSN can be considered as a candidate for
370 rematerialization, return value which is the operand number of the
371 pseudo for which the insn can be used for rematerialization. Here
372 we consider the insns without any memory, spilled pseudo (except
373 for the rematerialization pseudo), or dying or unused regs. */
374 static int
375 operand_to_remat (rtx_insn *insn)
377 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
378 struct lra_static_insn_data *static_id = id->insn_static_data;
379 struct lra_insn_reg *reg, *found_reg = NULL;
381 /* Don't rematerialize insns which can change PC. */
382 if (JUMP_P (insn) || CALL_P (insn))
383 return -1;
384 /* First find a pseudo which can be rematerialized. */
385 for (reg = id->regs; reg != NULL; reg = reg->next)
386 /* True FRAME_POINTER_NEEDED might be because we can not follow
387 changing sp offsets, e.g. alloca is used. If the insn contains
388 stack pointer in such case, we can not rematerialize it as we
389 can not know sp offset at a rematerialization place. */
390 if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
391 return -1;
392 else if (reg->type == OP_OUT && ! reg->subreg_p
393 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
395 /* We permits only one spilled reg. */
396 if (found_reg != NULL)
397 return -1;
398 found_reg = reg;
400 /* IRA calculates conflicts separately for subregs of two words
401 pseudo. Even if the pseudo lives, e.g. one its subreg can be
402 used lately, another subreg hard register can be already used
403 for something else. In such case, it is not safe to
404 rematerialize the insn. */
405 else if (reg->type == OP_IN && reg->subreg_p
406 && reg->regno >= FIRST_PSEUDO_REGISTER
407 && (GET_MODE_SIZE (PSEUDO_REGNO_MODE (reg->regno))
408 == 2 * UNITS_PER_WORD))
409 return -1;
410 if (found_reg == NULL)
411 return -1;
412 if (found_reg->regno < FIRST_PSEUDO_REGISTER)
413 return -1;
414 if (bad_for_rematerialization_p (PATTERN (insn)))
415 return -1;
416 /* Check the other regs are not spilled. */
417 for (reg = id->regs; reg != NULL; reg = reg->next)
418 if (found_reg == reg)
419 continue;
420 else if (reg->type == OP_INOUT)
421 return -1;
422 else if (reg->regno >= FIRST_PSEUDO_REGISTER
423 && reg_renumber[reg->regno] < 0)
424 /* Another spilled reg. */
425 return -1;
426 else if (reg->type == OP_IN)
428 if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
429 /* We don't want to make live ranges longer. */
430 return -1;
431 /* Check that there is no output reg as the input one. */
432 for (struct lra_insn_reg *reg2 = id->regs;
433 reg2 != NULL;
434 reg2 = reg2->next)
435 if (reg2->type == OP_OUT && reg->regno == reg2->regno)
436 return -1;
437 if (reg->regno < FIRST_PSEUDO_REGISTER)
438 for (struct lra_insn_reg *reg2 = static_id->hard_regs;
439 reg2 != NULL;
440 reg2 = reg2->next)
441 if (reg2->type == OP_OUT
442 && reg->regno <= reg2->regno
443 && (reg2->regno
444 < (reg->regno
445 + hard_regno_nregs[reg->regno][reg->biggest_mode])))
446 return -1;
448 /* Find the rematerialization operand. */
449 int nop = static_id->n_operands;
450 for (int i = 0; i < nop; i++)
451 if (REG_P (*id->operand_loc[i])
452 && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
453 return i;
454 return -1;
457 /* Create candidate for INSN with rematerialization operand NOP and
458 REGNO. Insert the candidate into the table and set up the
459 corresponding INSN_TO_CAND element. */
460 static void
461 create_cand (rtx_insn *insn, int nop, int regno)
463 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
464 rtx reg = *id->operand_loc[nop];
465 gcc_assert (REG_P (reg));
466 int op_regno = REGNO (reg);
467 gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
468 cand_t cand = XNEW (struct cand);
469 cand->insn = insn;
470 cand->nop = nop;
471 cand->regno = regno;
472 cand->reload_regno = op_regno == regno ? -1 : op_regno;
473 gcc_assert (cand->regno >= 0);
474 cand_t cand_in_table = insert_cand (cand);
475 insn_to_cand[INSN_UID (insn)] = cand_in_table;
476 if (cand != cand_in_table)
477 free (cand);
478 else
480 /* A new cand. */
481 cand->index = all_cands.length ();
482 all_cands.safe_push (cand);
483 cand->next_regno_cand = regno_cands[cand->regno];
484 regno_cands[cand->regno] = cand;
488 /* Create rematerialization candidates (inserting them into the
489 table). */
490 static void
491 create_cands (void)
493 rtx_insn *insn;
494 struct potential_cand
496 rtx_insn *insn;
497 int nop;
499 struct potential_cand *regno_potential_cand;
501 /* Create candidates. */
502 regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
503 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
504 if (INSN_P (insn))
506 rtx set;
507 int src_regno, dst_regno;
508 rtx_insn *insn2;
509 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
510 int nop = operand_to_remat (insn);
511 int regno = -1;
513 if ((set = single_set (insn)) != NULL
514 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
515 && ((src_regno = REGNO (SET_SRC (set)))
516 >= lra_constraint_new_regno_start)
517 && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
518 && reg_renumber[dst_regno] < 0
519 && (insn2 = regno_potential_cand[src_regno].insn) != NULL
520 && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
521 /* It is an output reload insn after insn can be
522 rematerialized (potential candidate). */
523 create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
524 if (nop < 0)
525 goto fail;
526 gcc_assert (REG_P (*id->operand_loc[nop]));
527 regno = REGNO (*id->operand_loc[nop]);
528 gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
529 if (reg_renumber[regno] < 0)
530 create_cand (insn, nop, regno);
531 else
533 regno_potential_cand[regno].insn = insn;
534 regno_potential_cand[regno].nop = nop;
535 goto fail;
537 regno = -1;
538 fail:
539 for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
540 if (reg->type != OP_IN && reg->regno != regno
541 && reg->regno >= FIRST_PSEUDO_REGISTER)
542 regno_potential_cand[reg->regno].insn = NULL;
544 cands_num = all_cands.length ();
545 free (regno_potential_cand);
550 /* Create and initialize BB data. */
551 static void
552 create_remat_bb_data (void)
554 basic_block bb;
555 remat_bb_data_t bb_info;
557 remat_bb_data = XNEWVEC (struct remat_bb_data,
558 last_basic_block_for_fn (cfun));
559 FOR_ALL_BB_FN (bb, cfun)
561 gcc_checking_assert (bb->index >= 0
562 && bb->index < last_basic_block_for_fn (cfun));
563 bb_info = get_remat_bb_data (bb);
564 bb_info->bb = bb;
565 bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
566 bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
567 bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
568 bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
569 bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
570 bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
571 bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
572 bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
576 /* Dump all candidates to DUMP_FILE. */
577 static void
578 dump_cands (FILE *dump_file)
580 int i;
581 cand_t cand;
583 fprintf (dump_file, "\nCands:\n");
584 for (i = 0; i < (int) cands_num; i++)
586 cand = all_cands[i];
587 fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
588 i, cand->nop, cand->regno, cand->reload_regno);
589 print_inline_rtx (dump_file, cand->insn, 6);
590 fprintf (dump_file, "\n");
594 /* Dump all candidates and BB data. */
595 static void
596 dump_candidates_and_remat_bb_data (void)
598 basic_block bb;
600 if (lra_dump_file == NULL)
601 return;
602 dump_cands (lra_dump_file);
603 FOR_EACH_BB_FN (bb, cfun)
605 fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
606 /* Livein */
607 fprintf (lra_dump_file, " register live in:");
608 dump_regset (df_get_live_in (bb), lra_dump_file);
609 putc ('\n', lra_dump_file);
610 /* Liveout */
611 fprintf (lra_dump_file, " register live out:");
612 dump_regset (df_get_live_out (bb), lra_dump_file);
613 putc ('\n', lra_dump_file);
614 /* Changed/dead regs: */
615 fprintf (lra_dump_file, " changed regs:");
616 dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
617 putc ('\n', lra_dump_file);
618 fprintf (lra_dump_file, " dead regs:");
619 dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
620 putc ('\n', lra_dump_file);
621 lra_dump_bitmap_with_title ("cands generated in BB",
622 &get_remat_bb_data (bb)->gen_cands, bb->index);
623 lra_dump_bitmap_with_title ("livein cands in BB",
624 &get_remat_bb_data (bb)->livein_cands, bb->index);
625 lra_dump_bitmap_with_title ("pavin cands in BB",
626 &get_remat_bb_data (bb)->pavin_cands, bb->index);
627 lra_dump_bitmap_with_title ("pavout cands in BB",
628 &get_remat_bb_data (bb)->pavout_cands, bb->index);
629 lra_dump_bitmap_with_title ("avin cands in BB",
630 &get_remat_bb_data (bb)->avin_cands, bb->index);
631 lra_dump_bitmap_with_title ("avout cands in BB",
632 &get_remat_bb_data (bb)->avout_cands, bb->index);
636 /* Free all BB data. */
637 static void
638 finish_remat_bb_data (void)
640 basic_block bb;
642 FOR_EACH_BB_FN (bb, cfun)
644 bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
645 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
646 bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
647 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
648 bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
649 bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
650 bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
651 bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
653 free (remat_bb_data);
658 /* Update changed_regs and dead_regs of BB from INSN. */
659 static void
660 set_bb_regs (basic_block bb, rtx_insn *insn)
662 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
663 struct lra_insn_reg *reg;
665 for (reg = id->regs; reg != NULL; reg = reg->next)
666 if (reg->type != OP_IN)
667 bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
668 else
670 if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
671 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
673 if (CALL_P (insn))
674 for (int i = 0; i < call_used_regs_arr_len; i++)
675 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
676 call_used_regs_arr[i]);
679 /* Calculate changed_regs and dead_regs for each BB. */
680 static void
681 calculate_local_reg_remat_bb_data (void)
683 basic_block bb;
684 rtx_insn *insn;
686 FOR_EACH_BB_FN (bb, cfun)
687 FOR_BB_INSNS (bb, insn)
688 if (INSN_P (insn))
689 set_bb_regs (bb, insn);
694 /* Return true if REGNO is an input operand of INSN. */
695 static bool
696 input_regno_present_p (rtx_insn *insn, int regno)
698 int iter;
699 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
700 struct lra_static_insn_data *static_id = id->insn_static_data;
701 struct lra_insn_reg *reg;
703 for (iter = 0; iter < 2; iter++)
704 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
705 reg != NULL;
706 reg = reg->next)
707 if (reg->type == OP_IN && reg->regno == regno)
708 return true;
709 return false;
712 /* Return true if a call used register is an input operand of INSN. */
713 static bool
714 call_used_input_regno_present_p (rtx_insn *insn)
716 int iter;
717 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
718 struct lra_static_insn_data *static_id = id->insn_static_data;
719 struct lra_insn_reg *reg;
721 for (iter = 0; iter < 2; iter++)
722 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
723 reg != NULL;
724 reg = reg->next)
725 if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
726 && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
727 return true;
728 return false;
731 /* Calculate livein_cands for each BB. */
732 static void
733 calculate_livein_cands (void)
735 basic_block bb;
737 FOR_EACH_BB_FN (bb, cfun)
739 bitmap livein_regs = df_get_live_in (bb);
740 bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
741 for (unsigned int i = 0; i < cands_num; i++)
743 cand_t cand = all_cands[i];
744 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
745 struct lra_insn_reg *reg;
747 for (reg = id->regs; reg != NULL; reg = reg->next)
748 if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
749 break;
750 if (reg == NULL)
751 bitmap_set_bit (livein_cands, i);
756 /* Calculate gen_cands for each BB. */
757 static void
758 calculate_gen_cands (void)
760 basic_block bb;
761 bitmap gen_cands;
762 bitmap_head gen_insns;
763 rtx_insn *insn;
765 bitmap_initialize (&gen_insns, &reg_obstack);
766 FOR_EACH_BB_FN (bb, cfun)
768 gen_cands = &get_remat_bb_data (bb)->gen_cands;
769 bitmap_clear (&gen_insns);
770 FOR_BB_INSNS (bb, insn)
771 if (INSN_P (insn))
773 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
774 struct lra_static_insn_data *static_id = id->insn_static_data;
775 struct lra_insn_reg *reg;
776 unsigned int uid;
777 bitmap_iterator bi;
778 cand_t cand;
779 rtx set;
780 int iter;
781 int src_regno = -1, dst_regno = -1;
783 if ((set = single_set (insn)) != NULL
784 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
786 src_regno = REGNO (SET_SRC (set));
787 dst_regno = REGNO (SET_DEST (set));
790 /* Update gen_cands: */
791 bitmap_clear (&temp_bitmap);
792 for (iter = 0; iter < 2; iter++)
793 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
794 reg != NULL;
795 reg = reg->next)
796 if (reg->type != OP_IN
797 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
798 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
800 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
802 cand = insn_to_cand[INSN_UID (insn2)];
803 gcc_assert (cand != NULL);
804 /* Ignore the reload insn. */
805 if (src_regno == cand->reload_regno
806 && dst_regno == cand->regno)
807 continue;
808 if (cand->regno == reg->regno
809 || input_regno_present_p (insn2, reg->regno))
811 bitmap_clear_bit (gen_cands, cand->index);
812 bitmap_set_bit (&temp_bitmap, uid);
816 if (CALL_P (insn))
817 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
819 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
821 cand = insn_to_cand[INSN_UID (insn2)];
822 gcc_assert (cand != NULL);
823 if (call_used_input_regno_present_p (insn2))
825 bitmap_clear_bit (gen_cands, cand->index);
826 bitmap_set_bit (&temp_bitmap, uid);
829 bitmap_and_compl_into (&gen_insns, &temp_bitmap);
831 cand = insn_to_cand[INSN_UID (insn)];
832 if (cand != NULL)
834 bitmap_set_bit (gen_cands, cand->index);
835 bitmap_set_bit (&gen_insns, INSN_UID (insn));
839 bitmap_clear (&gen_insns);
844 /* The common transfer function used by the DF equation solver to
845 propagate (partial) availability info BB_IN to BB_OUT through block
846 with BB_INDEX according to the following equation:
848 bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
850 static bool
851 cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
853 remat_bb_data_t bb_info;
854 bitmap bb_livein, bb_changed_regs, bb_dead_regs;
855 unsigned int cid;
856 bitmap_iterator bi;
858 bb_info = get_remat_bb_data_by_index (bb_index);
859 bb_livein = &bb_info->livein_cands;
860 bb_changed_regs = &bb_info->changed_regs;
861 bb_dead_regs = &bb_info->dead_regs;
862 /* Calculate killed avin cands -- cands whose regs are changed or
863 becoming dead in the BB. We calculate it here as we hope that
864 repeated calculations are compensated by smaller size of BB_IN in
865 comparison with all candidates number. */
866 bitmap_clear (&temp_bitmap);
867 EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
869 cand_t cand = all_cands[cid];
870 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
871 struct lra_insn_reg *reg;
873 if (! bitmap_bit_p (bb_livein, cid))
875 bitmap_set_bit (&temp_bitmap, cid);
876 continue;
878 for (reg = id->regs; reg != NULL; reg = reg->next)
879 /* Ignore all outputs which are not the regno for
880 rematerialization. */
881 if (reg->type == OP_OUT && reg->regno != cand->regno)
882 continue;
883 else if (bitmap_bit_p (bb_changed_regs, reg->regno)
884 || bitmap_bit_p (bb_dead_regs, reg->regno))
886 bitmap_set_bit (&temp_bitmap, cid);
887 break;
889 /* Check regno for rematerialization. */
890 if (bitmap_bit_p (bb_changed_regs, cand->regno)
891 || bitmap_bit_p (bb_dead_regs, cand->regno))
892 bitmap_set_bit (&temp_bitmap, cid);
894 return bitmap_ior_and_compl (bb_out,
895 &bb_info->gen_cands, bb_in, &temp_bitmap);
900 /* The transfer function used by the DF equation solver to propagate
901 partial candidate availability info through block with BB_INDEX
902 according to the following equation:
904 bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
906 static bool
907 cand_pav_trans_fun (int bb_index)
909 remat_bb_data_t bb_info;
911 bb_info = get_remat_bb_data_by_index (bb_index);
912 return cand_trans_fun (bb_index, &bb_info->pavin_cands,
913 &bb_info->pavout_cands);
916 /* The confluence function used by the DF equation solver to set up
917 cand_pav info for a block BB without predecessor. */
918 static void
919 cand_pav_con_fun_0 (basic_block bb)
921 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
924 /* The confluence function used by the DF equation solver to propagate
925 partial candidate availability info from predecessor to successor
926 on edge E (pred->bb) according to the following equation:
928 bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
930 static bool
931 cand_pav_con_fun_n (edge e)
933 basic_block pred = e->src;
934 basic_block bb = e->dest;
935 remat_bb_data_t bb_info;
936 bitmap bb_pavin, pred_pavout;
938 bb_info = get_remat_bb_data (bb);
939 bb_pavin = &bb_info->pavin_cands;
940 pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
941 return bitmap_ior_into (bb_pavin, pred_pavout);
946 /* The transfer function used by the DF equation solver to propagate
947 candidate availability info through block with BB_INDEX according
948 to the following equation:
950 bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
952 static bool
953 cand_av_trans_fun (int bb_index)
955 remat_bb_data_t bb_info;
957 bb_info = get_remat_bb_data_by_index (bb_index);
958 return cand_trans_fun (bb_index, &bb_info->avin_cands,
959 &bb_info->avout_cands);
962 /* The confluence function used by the DF equation solver to set up
963 cand_av info for a block BB without predecessor. */
964 static void
965 cand_av_con_fun_0 (basic_block bb)
967 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
970 /* The confluence function used by the DF equation solver to propagate
971 cand_av info from predecessor to successor on edge E (pred->bb)
972 according to the following equation:
974 bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
976 static bool
977 cand_av_con_fun_n (edge e)
979 basic_block pred = e->src;
980 basic_block bb = e->dest;
981 remat_bb_data_t bb_info;
982 bitmap bb_avin, pred_avout;
984 bb_info = get_remat_bb_data (bb);
985 bb_avin = &bb_info->avin_cands;
986 pred_avout = &get_remat_bb_data (pred)->avout_cands;
987 return bitmap_and_into (bb_avin, pred_avout);
990 /* Calculate available candidates for each BB. */
991 static void
992 calculate_global_remat_bb_data (void)
994 basic_block bb;
996 df_simple_dataflow
997 (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
998 cand_pav_trans_fun, &all_blocks,
999 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1000 /* Initialize avin by pavin. */
1001 FOR_EACH_BB_FN (bb, cfun)
1002 bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
1003 &get_remat_bb_data (bb)->pavin_cands);
1004 df_simple_dataflow
1005 (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
1006 cand_av_trans_fun, &all_blocks,
1007 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1012 /* Setup sp offset attribute to SP_OFFSET for all INSNS. */
1013 static void
1014 change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
1016 for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
1017 eliminate_regs_in_insn (insn, false, false, sp_offset);
1020 /* Return start hard register of REG (can be a hard or a pseudo reg)
1021 or -1 (if it is a spilled pseudo). Return number of hard registers
1022 occupied by REG through parameter NREGS if the start hard reg is
1023 not negative. */
1024 static int
1025 get_hard_regs (struct lra_insn_reg *reg, int &nregs)
1027 int regno = reg->regno;
1028 int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
1030 if (hard_regno >= 0)
1031 nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
1032 return hard_regno;
1035 /* Make copy of and register scratch pseudos in rematerialized insn
1036 REMAT_INSN. */
1037 static void
1038 update_scratch_ops (rtx_insn *remat_insn)
1040 lra_insn_recog_data_t id = lra_get_insn_recog_data (remat_insn);
1041 struct lra_static_insn_data *static_id = id->insn_static_data;
1042 for (int i = 0; i < static_id->n_operands; i++)
1044 rtx *loc = id->operand_loc[i];
1045 if (! REG_P (*loc))
1046 continue;
1047 int regno = REGNO (*loc);
1048 if (! lra_former_scratch_p (regno))
1049 continue;
1050 *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
1051 lra_get_allocno_class (regno),
1052 "scratch pseudo copy");
1053 lra_register_new_scratch_op (remat_insn, i);
1058 /* Insert rematerialization insns using the data-flow data calculated
1059 earlier. */
1060 static bool
1061 do_remat (void)
1063 rtx_insn *insn;
1064 basic_block bb;
1065 bitmap_head avail_cands;
1066 bool changed_p = false;
1067 /* Living hard regs and hard registers of living pseudos. */
1068 HARD_REG_SET live_hard_regs;
1070 bitmap_initialize (&avail_cands, &reg_obstack);
1071 FOR_EACH_BB_FN (bb, cfun)
1073 REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
1074 bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
1075 &get_remat_bb_data (bb)->livein_cands);
1076 FOR_BB_INSNS (bb, insn)
1078 if (!NONDEBUG_INSN_P (insn))
1079 continue;
1081 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
1082 struct lra_static_insn_data *static_id = id->insn_static_data;
1083 struct lra_insn_reg *reg;
1084 cand_t cand;
1085 unsigned int cid;
1086 bitmap_iterator bi;
1087 rtx set;
1088 int iter;
1089 int src_regno = -1, dst_regno = -1;
1091 if ((set = single_set (insn)) != NULL
1092 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
1094 src_regno = REGNO (SET_SRC (set));
1095 dst_regno = REGNO (SET_DEST (set));
1098 cand = NULL;
1099 /* Check possibility of rematerialization (hard reg or
1100 unpsilled pseudo <- spilled pseudo): */
1101 if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
1102 && reg_renumber[src_regno] < 0
1103 && (dst_regno < FIRST_PSEUDO_REGISTER
1104 || reg_renumber[dst_regno] >= 0))
1106 for (cand = regno_cands[src_regno];
1107 cand != NULL;
1108 cand = cand->next_regno_cand)
1109 if (bitmap_bit_p (&avail_cands, cand->index))
1110 break;
1112 int i, hard_regno, nregs;
1113 rtx_insn *remat_insn = NULL;
1114 HOST_WIDE_INT cand_sp_offset = 0;
1115 if (cand != NULL)
1117 lra_insn_recog_data_t cand_id
1118 = lra_get_insn_recog_data (cand->insn);
1119 struct lra_static_insn_data *static_cand_id
1120 = cand_id->insn_static_data;
1121 rtx saved_op = *cand_id->operand_loc[cand->nop];
1123 /* Check clobbers do not kill something living. */
1124 gcc_assert (REG_P (saved_op));
1125 int ignore_regno = REGNO (saved_op);
1127 for (reg = cand_id->regs; reg != NULL; reg = reg->next)
1128 if (reg->type != OP_IN && reg->regno != ignore_regno)
1130 hard_regno = get_hard_regs (reg, nregs);
1131 gcc_assert (hard_regno >= 0);
1132 for (i = 0; i < nregs; i++)
1133 if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
1134 break;
1135 if (i < nregs)
1136 break;
1139 if (reg == NULL)
1141 for (reg = static_cand_id->hard_regs;
1142 reg != NULL;
1143 reg = reg->next)
1144 if (reg->type != OP_IN
1145 && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
1146 break;
1149 if (reg == NULL)
1151 *cand_id->operand_loc[cand->nop] = SET_DEST (set);
1152 lra_update_insn_regno_info (cand->insn);
1153 bool ok_p = lra_constrain_insn (cand->insn);
1154 if (ok_p)
1156 rtx remat_pat = copy_insn (PATTERN (cand->insn));
1158 start_sequence ();
1159 emit_insn (remat_pat);
1160 remat_insn = get_insns ();
1161 end_sequence ();
1162 if (recog_memoized (remat_insn) < 0)
1163 remat_insn = NULL;
1164 cand_sp_offset = cand_id->sp_offset;
1166 *cand_id->operand_loc[cand->nop] = saved_op;
1167 lra_update_insn_regno_info (cand->insn);
1171 bitmap_clear (&temp_bitmap);
1172 /* Update avail_cands (see analogous code for
1173 calculate_gen_cands). */
1174 for (iter = 0; iter < 2; iter++)
1175 for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
1176 reg != NULL;
1177 reg = reg->next)
1178 if (reg->type != OP_IN
1179 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1180 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1182 cand = all_cands[cid];
1184 /* Ignore the reload insn. */
1185 if (src_regno == cand->reload_regno
1186 && dst_regno == cand->regno)
1187 continue;
1188 if (cand->regno == reg->regno
1189 || input_regno_present_p (cand->insn, reg->regno))
1190 bitmap_set_bit (&temp_bitmap, cand->index);
1193 if (CALL_P (insn))
1194 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1196 cand = all_cands[cid];
1198 if (call_used_input_regno_present_p (cand->insn))
1199 bitmap_set_bit (&temp_bitmap, cand->index);
1202 bitmap_and_compl_into (&avail_cands, &temp_bitmap);
1203 if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
1204 bitmap_set_bit (&avail_cands, cand->index);
1206 if (remat_insn != NULL)
1208 HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
1209 if (sp_offset_change != 0)
1210 change_sp_offset (remat_insn, sp_offset_change);
1211 update_scratch_ops (remat_insn);
1212 lra_process_new_insns (insn, remat_insn, NULL,
1213 "Inserting rematerialization insn");
1214 lra_set_insn_deleted (insn);
1215 changed_p = true;
1216 continue;
1219 /* Update live hard regs: */
1220 for (reg = id->regs; reg != NULL; reg = reg->next)
1221 if (reg->type == OP_IN
1222 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1224 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1225 continue;
1226 for (i = 0; i < nregs; i++)
1227 CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1229 /* Process also hard regs (e.g. CC register) which are part
1230 of insn definition. */
1231 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1232 if (reg->type == OP_IN
1233 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1234 CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
1235 /* Inputs have been processed, now process outputs. */
1236 for (reg = id->regs; reg != NULL; reg = reg->next)
1237 if (reg->type != OP_IN
1238 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1240 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1241 continue;
1242 for (i = 0; i < nregs; i++)
1243 SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1245 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1246 if (reg->type != OP_IN
1247 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1248 SET_HARD_REG_BIT (live_hard_regs, reg->regno);
1251 bitmap_clear (&avail_cands);
1252 return changed_p;
1257 /* Current number of rematerialization iteration. */
1258 int lra_rematerialization_iter;
1260 /* Entry point of the rematerialization sub-pass. Return true if we
1261 did any rematerialization. */
1262 bool
1263 lra_remat (void)
1265 basic_block bb;
1266 bool result;
1267 int max_regno = max_reg_num ();
1269 if (! flag_lra_remat)
1270 return false;
1271 lra_rematerialization_iter++;
1272 if (lra_rematerialization_iter > LRA_MAX_REMATERIALIZATION_PASSES)
1273 return false;
1274 if (lra_dump_file != NULL)
1275 fprintf (lra_dump_file,
1276 "\n******** Rematerialization #%d: ********\n\n",
1277 lra_rematerialization_iter);
1278 timevar_push (TV_LRA_REMAT);
1279 insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
1280 regno_cands = XCNEWVEC (cand_t, max_regno);
1281 all_cands.create (8000);
1282 call_used_regs_arr_len = 0;
1283 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1284 if (call_used_regs[i])
1285 call_used_regs_arr[call_used_regs_arr_len++] = i;
1286 initiate_cand_table ();
1287 create_cands ();
1288 create_remat_bb_data ();
1289 bitmap_initialize (&temp_bitmap, &reg_obstack);
1290 calculate_local_reg_remat_bb_data ();
1291 calculate_livein_cands ();
1292 calculate_gen_cands ();
1293 bitmap_initialize (&all_blocks, &reg_obstack);
1294 FOR_ALL_BB_FN (bb, cfun)
1295 bitmap_set_bit (&all_blocks, bb->index);
1296 calculate_global_remat_bb_data ();
1297 dump_candidates_and_remat_bb_data ();
1298 result = do_remat ();
1299 all_cands.release ();
1300 bitmap_clear (&temp_bitmap);
1301 finish_remat_bb_data ();
1302 finish_cand_table ();
1303 bitmap_clear (&all_blocks);
1304 free (regno_cands);
1305 free (insn_to_cand);
1306 timevar_pop (TV_LRA_REMAT);
1307 return result;