Check int_size_in_bytes in ix86_return_in_memory
[official-gcc.git] / gcc / lra-remat.c
blob1c259a3b76686d54c3c17ce080f0a1ab0ffd23ba
1 /* Rematerialize pseudos values.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This code objective is to rematerialize spilled pseudo values. To
22 do this we calculate available insn candidates. The candidate is
23 available at some point if there is dominated set of insns with the
24 same pattern, the insn inputs are not dying or modified on any path
25 from the set, the outputs are not modified.
27 The insns containing memory or spilled pseudos (except for the
28 rematerialized pseudo) are not considered as such insns are not
29 profitable in comparison with regular loads of spilled pseudo
30 values. That simplifies the implementation as we don't need to
31 deal with memory aliasing.
33 To speed up available candidate calculation, we calculate partially
34 available candidates first and use them for initialization of the
35 availability. That is because (partial) availability sets are
36 sparse.
38 The rematerialization sub-pass could be improved further in the
39 following ways:
41 o We could make longer live ranges of inputs in the
42 rematerialization candidates if their hard registers are not used
43 for other purposes. This could be complicated if we need to
44 update BB live info information as LRA does not use
45 DF-infrastructure for compile-time reasons. This problem could
46 be overcome if constrain making live ranges longer only in BB/EBB
47 scope.
48 o We could use cost-based decision to choose rematerialization insn
49 (currently all insns without memory is can be used).
50 o We could use other free hard regs for unused output pseudos in
51 rematerialization candidates although such cases probably will
52 be very rare. */
55 #include "config.h"
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "rtl.h"
61 #include "df.h"
62 #include "rtl-error.h"
63 #include "tm_p.h"
64 #include "target.h"
65 #include "insn-config.h"
66 #include "recog.h"
67 #include "output.h"
68 #include "regs.h"
69 #include "flags.h"
70 #include "alias.h"
71 #include "expmed.h"
72 #include "dojump.h"
73 #include "explow.h"
74 #include "calls.h"
75 #include "emit-rtl.h"
76 #include "varasm.h"
77 #include "stmt.h"
78 #include "expr.h"
79 #include "except.h"
80 #include "ira.h"
81 #include "sparseset.h"
82 #include "params.h"
83 #include "lra.h"
84 #include "insn-attr.h"
85 #include "insn-codes.h"
86 #include "lra-int.h"
88 /* Number of candidates for rematerialization. */
89 static unsigned int cands_num;
91 /* The following is used for representation of call_used_reg_set in
92 form array whose elements are hard register numbers with nonzero bit
93 in CALL_USED_REG_SET. */
94 static int call_used_regs_arr_len;
95 static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
97 /* Bitmap used for different calculations. */
98 static bitmap_head temp_bitmap;
100 typedef struct cand *cand_t;
101 typedef const struct cand *const_cand_t;
103 /* Insn candidates for rematerialization. The candidate insn should
104 have the following properies:
105 o no any memory (as access to memory is non-profitable)
106 o no INOUT regs (it means no non-paradoxical subreg of output reg)
107 o one output spilled pseudo (or reload pseudo of a spilled pseudo)
108 o all other pseudos are with assigned hard regs. */
109 struct cand
111 /* Index of the candidates in all_cands. */
112 int index;
113 /* The candidate insn. */
114 rtx_insn *insn;
115 /* Insn pseudo regno for rematerialization. */
116 int regno;
117 /* Non-negative if a reload pseudo is in the insn instead of the
118 pseudo for rematerialization. */
119 int reload_regno;
120 /* Number of the operand containing the regno or its reload
121 regno. */
122 int nop;
123 /* Next candidate for the same regno. */
124 cand_t next_regno_cand;
127 /* Vector containing all candidates. */
128 static vec<cand_t> all_cands;
129 /* Map: insn -> candidate representing it. It is null if the insn can
130 not be used for rematerialization. */
131 static cand_t *insn_to_cand;
133 /* Map regno -> candidates can be used for the regno
134 rematerialization. */
135 static cand_t *regno_cands;
137 /* Data about basic blocks used for the rematerialization
138 sub-pass. */
139 struct remat_bb_data
141 /* Basic block about which the below data are. */
142 basic_block bb;
143 /* Registers changed in the basic block: */
144 bitmap_head changed_regs;
145 /* Registers becoming dead in the BB. */
146 bitmap_head dead_regs;
147 /* Cands present in the BB whose in/out regs are not changed after
148 the cands occurence and are not dead (except the reload
149 regno). */
150 bitmap_head gen_cands;
151 bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
152 bitmap_head pavin_cands; /* cands partially available at BB entry. */
153 bitmap_head pavout_cands; /* cands partially available at BB exit. */
154 bitmap_head avin_cands; /* cands available at the entry of the BB. */
155 bitmap_head avout_cands; /* cands available at the exit of the BB. */
158 /* Array for all BB data. Indexed by the corresponding BB index. */
159 typedef struct remat_bb_data *remat_bb_data_t;
161 /* Basic blocks for data flow problems -- all bocks except the special
162 ones. */
163 static bitmap_head all_blocks;
165 /* All basic block data are referred through the following array. */
166 static remat_bb_data_t remat_bb_data;
168 /* Two small functions for access to the bb data. */
169 static inline remat_bb_data_t
170 get_remat_bb_data (basic_block bb)
172 return &remat_bb_data[(bb)->index];
175 static inline remat_bb_data_t
176 get_remat_bb_data_by_index (int index)
178 return &remat_bb_data[index];
183 /* Recursive hash function for RTL X. */
184 static hashval_t
185 rtx_hash (rtx x)
187 int i, j;
188 enum rtx_code code;
189 const char *fmt;
190 hashval_t val = 0;
192 if (x == 0)
193 return val;
195 code = GET_CODE (x);
196 val += (int) code + 4095;
198 /* Some RTL can be compared nonrecursively. */
199 switch (code)
201 case REG:
202 return val + REGNO (x);
204 case LABEL_REF:
205 return iterative_hash_object (XEXP (x, 0), val);
207 case SYMBOL_REF:
208 return iterative_hash_object (XSTR (x, 0), val);
210 case SCRATCH:
211 case CONST_DOUBLE:
212 case CONST_INT:
213 case CONST_VECTOR:
214 return val;
216 default:
217 break;
220 /* Hash the elements. */
221 fmt = GET_RTX_FORMAT (code);
222 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
224 switch (fmt[i])
226 case 'w':
227 val += XWINT (x, i);
228 break;
230 case 'n':
231 case 'i':
232 val += XINT (x, i);
233 break;
235 case 'V':
236 case 'E':
237 val += XVECLEN (x, i);
239 for (j = 0; j < XVECLEN (x, i); j++)
240 val += rtx_hash (XVECEXP (x, i, j));
241 break;
243 case 'e':
244 val += rtx_hash (XEXP (x, i));
245 break;
247 case 'S':
248 case 's':
249 val += htab_hash_string (XSTR (x, i));
250 break;
252 case 'u':
253 case '0':
254 case 't':
255 break;
257 /* It is believed that rtx's at this level will never
258 contain anything but integers and other rtx's, except for
259 within LABEL_REFs and SYMBOL_REFs. */
260 default:
261 abort ();
264 return val;
269 /* Hash table for the candidates. Different insns (e.g. structurally
270 the same insns or even insns with different unused output regs) can
271 be represented by the same candidate in the table. */
272 static htab_t cand_table;
274 /* Hash function for candidate CAND. */
275 static hashval_t
276 cand_hash (const void *cand)
278 const_cand_t c = (const_cand_t) cand;
279 lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
280 struct lra_static_insn_data *static_id = id->insn_static_data;
281 int nops = static_id->n_operands;
282 hashval_t hash = 0;
284 for (int i = 0; i < nops; i++)
285 if (i == c->nop)
286 hash = iterative_hash_object (c->regno, hash);
287 else if (static_id->operand[i].type == OP_IN)
288 hash = iterative_hash_object (*id->operand_loc[i], hash);
289 return hash;
292 /* Equal function for candidates CAND1 and CAND2. They are equal if
293 the corresponding candidate insns have the same code, the same
294 regno for rematerialization, the same input operands. */
295 static int
296 cand_eq_p (const void *cand1, const void *cand2)
298 const_cand_t c1 = (const_cand_t) cand1;
299 const_cand_t c2 = (const_cand_t) cand2;
300 lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
301 lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
302 struct lra_static_insn_data *static_id1 = id1->insn_static_data;
303 int nops = static_id1->n_operands;
305 if (c1->regno != c2->regno
306 || INSN_CODE (c1->insn) < 0
307 || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
308 return false;
309 gcc_assert (c1->nop == c2->nop);
310 for (int i = 0; i < nops; i++)
311 if (i != c1->nop && static_id1->operand[i].type == OP_IN
312 && *id1->operand_loc[i] != *id2->operand_loc[i])
313 return false;
314 return true;
317 /* Insert candidate CAND into the table if it is not there yet.
318 Return candidate which is in the table. */
319 static cand_t
320 insert_cand (cand_t cand)
322 void **entry_ptr;
324 entry_ptr = htab_find_slot (cand_table, cand, INSERT);
325 if (*entry_ptr == NULL)
326 *entry_ptr = (void *) cand;
327 return (cand_t) *entry_ptr;
330 /* Free candidate CAND memory. */
331 static void
332 free_cand (void *cand)
334 free (cand);
337 /* Initiate the candidate table. */
338 static void
339 initiate_cand_table (void)
341 cand_table = htab_create (8000, cand_hash, cand_eq_p,
342 (htab_del) free_cand);
345 /* Finish the candidate table. */
346 static void
347 finish_cand_table (void)
349 htab_delete (cand_table);
354 /* Return true if X contains memory or some UNSPEC. We can not just
355 check insn operands as memory or unspec might be not an operand
356 itself but contain an operand. Insn with memory access is not
357 profitable for rematerialization. Rematerialization of UNSPEC
358 might result in wrong code generation as the UNPEC effect is
359 unknown (e.g. generating a label). */
360 static bool
361 bad_for_rematerialization_p (rtx x)
363 int i, j;
364 const char *fmt;
365 enum rtx_code code;
367 if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
368 return true;
369 code = GET_CODE (x);
370 fmt = GET_RTX_FORMAT (code);
371 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
373 if (fmt[i] == 'e')
375 if (bad_for_rematerialization_p (XEXP (x, i)))
376 return true;
378 else if (fmt[i] == 'E')
380 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
381 if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
382 return true;
385 return false;
388 /* If INSN can not be used for rematerialization, return negative
389 value. If INSN can be considered as a candidate for
390 rematerialization, return value which is the operand number of the
391 pseudo for which the insn can be used for rematerialization. Here
392 we consider the insns without any memory, spilled pseudo (except
393 for the rematerialization pseudo), or dying or unused regs. */
394 static int
395 operand_to_remat (rtx_insn *insn)
397 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
398 struct lra_static_insn_data *static_id = id->insn_static_data;
399 struct lra_insn_reg *reg, *found_reg = NULL;
401 /* Don't rematerialize insns which can change PC. */
402 if (JUMP_P (insn) || CALL_P (insn))
403 return -1;
404 /* First find a pseudo which can be rematerialized. */
405 for (reg = id->regs; reg != NULL; reg = reg->next)
406 /* True FRAME_POINTER_NEEDED might be because we can not follow
407 changing sp offsets, e.g. alloca is used. If the insn contains
408 stack pointer in such case, we can not rematerialize it as we
409 can not know sp offset at a rematerialization place. */
410 if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
411 return -1;
412 else if (reg->type == OP_OUT && ! reg->subreg_p
413 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
415 /* We permits only one spilled reg. */
416 if (found_reg != NULL)
417 return -1;
418 found_reg = reg;
420 if (found_reg == NULL)
421 return -1;
422 if (found_reg->regno < FIRST_PSEUDO_REGISTER)
423 return -1;
424 if (bad_for_rematerialization_p (PATTERN (insn)))
425 return -1;
426 /* Check the other regs are not spilled. */
427 for (reg = id->regs; reg != NULL; reg = reg->next)
428 if (found_reg == reg)
429 continue;
430 else if (reg->type == OP_INOUT)
431 return -1;
432 else if (reg->regno >= FIRST_PSEUDO_REGISTER
433 && reg_renumber[reg->regno] < 0)
434 /* Another spilled reg. */
435 return -1;
436 else if (reg->type == OP_IN)
438 if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
439 /* We don't want to make live ranges longer. */
440 return -1;
441 /* Check that there is no output reg as the input one. */
442 for (struct lra_insn_reg *reg2 = id->regs;
443 reg2 != NULL;
444 reg2 = reg2->next)
445 if (reg2->type == OP_OUT && reg->regno == reg2->regno)
446 return -1;
447 if (reg->regno < FIRST_PSEUDO_REGISTER)
448 for (struct lra_insn_reg *reg2 = static_id->hard_regs;
449 reg2 != NULL;
450 reg2 = reg2->next)
451 if (reg2->type == OP_OUT
452 && reg->regno <= reg2->regno
453 && (reg2->regno
454 < (reg->regno
455 + hard_regno_nregs[reg->regno][reg->biggest_mode])))
456 return -1;
458 /* Find the rematerialization operand. */
459 int nop = static_id->n_operands;
460 for (int i = 0; i < nop; i++)
461 if (REG_P (*id->operand_loc[i])
462 && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
463 return i;
464 return -1;
467 /* Create candidate for INSN with rematerialization operand NOP and
468 REGNO. Insert the candidate into the table and set up the
469 corresponding INSN_TO_CAND element. */
470 static void
471 create_cand (rtx_insn *insn, int nop, int regno)
473 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
474 rtx reg = *id->operand_loc[nop];
475 gcc_assert (REG_P (reg));
476 int op_regno = REGNO (reg);
477 gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
478 cand_t cand = XNEW (struct cand);
479 cand->insn = insn;
480 cand->nop = nop;
481 cand->regno = regno;
482 cand->reload_regno = op_regno == regno ? -1 : op_regno;
483 gcc_assert (cand->regno >= 0);
484 cand_t cand_in_table = insert_cand (cand);
485 insn_to_cand[INSN_UID (insn)] = cand_in_table;
486 if (cand != cand_in_table)
487 free (cand);
488 else
490 /* A new cand. */
491 cand->index = all_cands.length ();
492 all_cands.safe_push (cand);
493 cand->next_regno_cand = regno_cands[cand->regno];
494 regno_cands[cand->regno] = cand;
498 /* Create rematerialization candidates (inserting them into the
499 table). */
500 static void
501 create_cands (void)
503 rtx_insn *insn;
504 struct potential_cand
506 rtx_insn *insn;
507 int nop;
509 struct potential_cand *regno_potential_cand;
511 /* Create candidates. */
512 regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
513 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
514 if (INSN_P (insn))
516 rtx set;
517 int src_regno, dst_regno;
518 rtx_insn *insn2;
519 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
520 int nop = operand_to_remat (insn);
521 int regno = -1;
523 if ((set = single_set (insn)) != NULL
524 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
525 && ((src_regno = REGNO (SET_SRC (set)))
526 >= lra_constraint_new_regno_start)
527 && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
528 && reg_renumber[dst_regno] < 0
529 && (insn2 = regno_potential_cand[src_regno].insn) != NULL
530 && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
531 /* It is an output reload insn after insn can be
532 rematerialized (potential candidate). */
533 create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
534 if (nop < 0)
535 goto fail;
536 gcc_assert (REG_P (*id->operand_loc[nop]));
537 regno = REGNO (*id->operand_loc[nop]);
538 gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
539 if (reg_renumber[regno] < 0)
540 create_cand (insn, nop, regno);
541 else
543 regno_potential_cand[regno].insn = insn;
544 regno_potential_cand[regno].nop = nop;
545 goto fail;
547 regno = -1;
548 fail:
549 for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
550 if (reg->type != OP_IN && reg->regno != regno
551 && reg->regno >= FIRST_PSEUDO_REGISTER)
552 regno_potential_cand[reg->regno].insn = NULL;
554 cands_num = all_cands.length ();
555 free (regno_potential_cand);
560 /* Create and initialize BB data. */
561 static void
562 create_remat_bb_data (void)
564 basic_block bb;
565 remat_bb_data_t bb_info;
567 remat_bb_data = XNEWVEC (struct remat_bb_data,
568 last_basic_block_for_fn (cfun));
569 FOR_ALL_BB_FN (bb, cfun)
571 #ifdef ENABLE_CHECKING
572 if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun))
573 abort ();
574 #endif
575 bb_info = get_remat_bb_data (bb);
576 bb_info->bb = bb;
577 bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
578 bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
579 bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
580 bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
581 bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
582 bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
583 bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
584 bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
588 /* Dump all candidates to DUMP_FILE. */
589 static void
590 dump_cands (FILE *dump_file)
592 int i;
593 cand_t cand;
595 fprintf (dump_file, "\nCands:\n");
596 for (i = 0; i < (int) cands_num; i++)
598 cand = all_cands[i];
599 fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
600 i, cand->nop, cand->regno, cand->reload_regno);
601 print_inline_rtx (dump_file, cand->insn, 6);
602 fprintf (dump_file, "\n");
606 /* Dump all candidates and BB data. */
607 static void
608 dump_candidates_and_remat_bb_data (void)
610 basic_block bb;
612 if (lra_dump_file == NULL)
613 return;
614 dump_cands (lra_dump_file);
615 FOR_EACH_BB_FN (bb, cfun)
617 fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
618 /* Livein */
619 fprintf (lra_dump_file, " register live in:");
620 dump_regset (df_get_live_in (bb), lra_dump_file);
621 putc ('\n', lra_dump_file);
622 /* Liveout */
623 fprintf (lra_dump_file, " register live out:");
624 dump_regset (df_get_live_out (bb), lra_dump_file);
625 putc ('\n', lra_dump_file);
626 /* Changed/dead regs: */
627 fprintf (lra_dump_file, " changed regs:");
628 dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
629 putc ('\n', lra_dump_file);
630 fprintf (lra_dump_file, " dead regs:");
631 dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
632 putc ('\n', lra_dump_file);
633 lra_dump_bitmap_with_title ("cands generated in BB",
634 &get_remat_bb_data (bb)->gen_cands, bb->index);
635 lra_dump_bitmap_with_title ("livein cands in BB",
636 &get_remat_bb_data (bb)->livein_cands, bb->index);
637 lra_dump_bitmap_with_title ("pavin cands in BB",
638 &get_remat_bb_data (bb)->pavin_cands, bb->index);
639 lra_dump_bitmap_with_title ("pavout cands in BB",
640 &get_remat_bb_data (bb)->pavout_cands, bb->index);
641 lra_dump_bitmap_with_title ("avin cands in BB",
642 &get_remat_bb_data (bb)->avin_cands, bb->index);
643 lra_dump_bitmap_with_title ("avout cands in BB",
644 &get_remat_bb_data (bb)->avout_cands, bb->index);
648 /* Free all BB data. */
649 static void
650 finish_remat_bb_data (void)
652 basic_block bb;
654 FOR_EACH_BB_FN (bb, cfun)
656 bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
657 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
658 bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
659 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
660 bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
661 bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
662 bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
663 bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
665 free (remat_bb_data);
670 /* Update changed_regs and dead_regs of BB from INSN. */
671 static void
672 set_bb_regs (basic_block bb, rtx_insn *insn)
674 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
675 struct lra_insn_reg *reg;
677 for (reg = id->regs; reg != NULL; reg = reg->next)
678 if (reg->type != OP_IN)
679 bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
680 else
682 if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
683 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
685 if (CALL_P (insn))
686 for (int i = 0; i < call_used_regs_arr_len; i++)
687 bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
688 call_used_regs_arr[i]);
691 /* Calculate changed_regs and dead_regs for each BB. */
692 static void
693 calculate_local_reg_remat_bb_data (void)
695 basic_block bb;
696 rtx_insn *insn;
698 FOR_EACH_BB_FN (bb, cfun)
699 FOR_BB_INSNS (bb, insn)
700 if (INSN_P (insn))
701 set_bb_regs (bb, insn);
706 /* Return true if REGNO is an input operand of INSN. */
707 static bool
708 input_regno_present_p (rtx_insn *insn, int regno)
710 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
711 struct lra_insn_reg *reg;
713 for (reg = id->regs; reg != NULL; reg = reg->next)
714 if (reg->type == OP_IN && reg->regno == regno)
715 return true;
716 return false;
719 /* Return true if a call used register is an input operand of INSN. */
720 static bool
721 call_used_input_regno_present_p (rtx_insn *insn)
723 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
724 struct lra_insn_reg *reg;
726 for (reg = id->regs; reg != NULL; reg = reg->next)
727 if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
728 && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
729 return true;
730 return false;
733 /* Calculate livein_cands for each BB. */
734 static void
735 calculate_livein_cands (void)
737 basic_block bb;
739 FOR_EACH_BB_FN (bb, cfun)
741 bitmap livein_regs = df_get_live_in (bb);
742 bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
743 for (unsigned int i = 0; i < cands_num; i++)
745 cand_t cand = all_cands[i];
746 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
747 struct lra_insn_reg *reg;
749 for (reg = id->regs; reg != NULL; reg = reg->next)
750 if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
751 break;
752 if (reg == NULL)
753 bitmap_set_bit (livein_cands, i);
758 /* Calculate gen_cands for each BB. */
759 static void
760 calculate_gen_cands (void)
762 basic_block bb;
763 bitmap gen_cands;
764 bitmap_head gen_insns;
765 rtx_insn *insn;
767 bitmap_initialize (&gen_insns, &reg_obstack);
768 FOR_EACH_BB_FN (bb, cfun)
770 gen_cands = &get_remat_bb_data (bb)->gen_cands;
771 bitmap_clear (&gen_insns);
772 FOR_BB_INSNS (bb, insn)
773 if (INSN_P (insn))
775 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
776 struct lra_insn_reg *reg;
777 unsigned int uid;
778 bitmap_iterator bi;
779 cand_t cand;
780 rtx set;
781 int src_regno = -1, dst_regno = -1;
783 if ((set = single_set (insn)) != NULL
784 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
786 src_regno = REGNO (SET_SRC (set));
787 dst_regno = REGNO (SET_DEST (set));
790 /* Update gen_cands: */
791 bitmap_clear (&temp_bitmap);
792 for (reg = id->regs; reg != NULL; reg = reg->next)
793 if (reg->type != OP_IN
794 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
795 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
797 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
799 cand = insn_to_cand[INSN_UID (insn2)];
800 gcc_assert (cand != NULL);
801 /* Ignore the reload insn. */
802 if (src_regno == cand->reload_regno
803 && dst_regno == cand->regno)
804 continue;
805 if (cand->regno == reg->regno
806 || input_regno_present_p (insn2, reg->regno))
808 bitmap_clear_bit (gen_cands, cand->index);
809 bitmap_set_bit (&temp_bitmap, uid);
813 if (CALL_P (insn))
814 EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
816 rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
818 cand = insn_to_cand[INSN_UID (insn2)];
819 gcc_assert (cand != NULL);
820 if (call_used_input_regno_present_p (insn2))
822 bitmap_clear_bit (gen_cands, cand->index);
823 bitmap_set_bit (&temp_bitmap, uid);
826 bitmap_and_compl_into (&gen_insns, &temp_bitmap);
828 cand = insn_to_cand[INSN_UID (insn)];
829 if (cand != NULL)
831 bitmap_set_bit (gen_cands, cand->index);
832 bitmap_set_bit (&gen_insns, INSN_UID (insn));
836 bitmap_clear (&gen_insns);
841 /* The common transfer function used by the DF equation solver to
842 propagate (partial) availability info BB_IN to BB_OUT through block
843 with BB_INDEX according to the following equation:
845 bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
847 static bool
848 cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
850 remat_bb_data_t bb_info;
851 bitmap bb_livein, bb_changed_regs, bb_dead_regs;
852 unsigned int cid;
853 bitmap_iterator bi;
855 bb_info = get_remat_bb_data_by_index (bb_index);
856 bb_livein = &bb_info->livein_cands;
857 bb_changed_regs = &bb_info->changed_regs;
858 bb_dead_regs = &bb_info->dead_regs;
859 /* Calculate killed avin cands -- cands whose regs are changed or
860 becoming dead in the BB. We calculate it here as we hope that
861 repeated calculations are compensated by smaller size of BB_IN in
862 comparison with all candidates number. */
863 bitmap_clear (&temp_bitmap);
864 EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
866 cand_t cand = all_cands[cid];
867 lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
868 struct lra_insn_reg *reg;
870 if (! bitmap_bit_p (bb_livein, cid))
872 bitmap_set_bit (&temp_bitmap, cid);
873 continue;
875 for (reg = id->regs; reg != NULL; reg = reg->next)
876 /* Ignore all outputs which are not the regno for
877 rematerialization. */
878 if (reg->type == OP_OUT && reg->regno != cand->regno)
879 continue;
880 else if (bitmap_bit_p (bb_changed_regs, reg->regno)
881 || bitmap_bit_p (bb_dead_regs, reg->regno))
883 bitmap_set_bit (&temp_bitmap, cid);
884 break;
886 /* Check regno for rematerialization. */
887 if (bitmap_bit_p (bb_changed_regs, cand->regno)
888 || bitmap_bit_p (bb_dead_regs, cand->regno))
889 bitmap_set_bit (&temp_bitmap, cid);
891 return bitmap_ior_and_compl (bb_out,
892 &bb_info->gen_cands, bb_in, &temp_bitmap);
897 /* The transfer function used by the DF equation solver to propagate
898 partial candidate availability info through block with BB_INDEX
899 according to the following equation:
901 bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
903 static bool
904 cand_pav_trans_fun (int bb_index)
906 remat_bb_data_t bb_info;
908 bb_info = get_remat_bb_data_by_index (bb_index);
909 return cand_trans_fun (bb_index, &bb_info->pavin_cands,
910 &bb_info->pavout_cands);
913 /* The confluence function used by the DF equation solver to set up
914 cand_pav info for a block BB without predecessor. */
915 static void
916 cand_pav_con_fun_0 (basic_block bb)
918 bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
921 /* The confluence function used by the DF equation solver to propagate
922 partial candidate availability info from predecessor to successor
923 on edge E (pred->bb) according to the following equation:
925 bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
927 static bool
928 cand_pav_con_fun_n (edge e)
930 basic_block pred = e->src;
931 basic_block bb = e->dest;
932 remat_bb_data_t bb_info;
933 bitmap bb_pavin, pred_pavout;
935 bb_info = get_remat_bb_data (bb);
936 bb_pavin = &bb_info->pavin_cands;
937 pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
938 return bitmap_ior_into (bb_pavin, pred_pavout);
943 /* The transfer function used by the DF equation solver to propagate
944 candidate availability info through block with BB_INDEX according
945 to the following equation:
947 bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
949 static bool
950 cand_av_trans_fun (int bb_index)
952 remat_bb_data_t bb_info;
954 bb_info = get_remat_bb_data_by_index (bb_index);
955 return cand_trans_fun (bb_index, &bb_info->avin_cands,
956 &bb_info->avout_cands);
959 /* The confluence function used by the DF equation solver to set up
960 cand_av info for a block BB without predecessor. */
961 static void
962 cand_av_con_fun_0 (basic_block bb)
964 bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
967 /* The confluence function used by the DF equation solver to propagate
968 cand_av info from predecessor to successor on edge E (pred->bb)
969 according to the following equation:
971 bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
973 static bool
974 cand_av_con_fun_n (edge e)
976 basic_block pred = e->src;
977 basic_block bb = e->dest;
978 remat_bb_data_t bb_info;
979 bitmap bb_avin, pred_avout;
981 bb_info = get_remat_bb_data (bb);
982 bb_avin = &bb_info->avin_cands;
983 pred_avout = &get_remat_bb_data (pred)->avout_cands;
984 return bitmap_and_into (bb_avin, pred_avout);
987 /* Calculate available candidates for each BB. */
988 static void
989 calculate_global_remat_bb_data (void)
991 basic_block bb;
993 df_simple_dataflow
994 (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
995 cand_pav_trans_fun, &all_blocks,
996 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
997 /* Initialize avin by pavin. */
998 FOR_EACH_BB_FN (bb, cfun)
999 bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
1000 &get_remat_bb_data (bb)->pavin_cands);
1001 df_simple_dataflow
1002 (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
1003 cand_av_trans_fun, &all_blocks,
1004 df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
1009 /* Setup sp offset attribute to SP_OFFSET for all INSNS. */
1010 static void
1011 change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
1013 for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
1014 eliminate_regs_in_insn (insn, false, false, sp_offset);
1017 /* Return start hard register of REG (can be a hard or a pseudo reg)
1018 or -1 (if it is a spilled pseudo). Return number of hard registers
1019 occupied by REG through parameter NREGS if the start hard reg is
1020 not negative. */
1021 static int
1022 get_hard_regs (struct lra_insn_reg *reg, int &nregs)
1024 int regno = reg->regno;
1025 int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
1027 if (hard_regno >= 0)
1028 nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
1029 return hard_regno;
1032 /* Make copy of and register scratch pseudos in rematerialized insn
1033 REMAT_INSN. */
1034 static void
1035 update_scratch_ops (rtx_insn *remat_insn)
1037 lra_insn_recog_data_t id = lra_get_insn_recog_data (remat_insn);
1038 struct lra_static_insn_data *static_id = id->insn_static_data;
1039 for (int i = 0; i < static_id->n_operands; i++)
1041 rtx *loc = id->operand_loc[i];
1042 if (! REG_P (*loc))
1043 continue;
1044 int regno = REGNO (*loc);
1045 if (! lra_former_scratch_p (regno))
1046 continue;
1047 *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
1048 lra_get_allocno_class (regno),
1049 "scratch pseudo copy");
1050 lra_register_new_scratch_op (remat_insn, i);
1055 /* Insert rematerialization insns using the data-flow data calculated
1056 earlier. */
1057 static bool
1058 do_remat (void)
1060 rtx_insn *insn;
1061 basic_block bb;
1062 bitmap_head avail_cands;
1063 bool changed_p = false;
1064 /* Living hard regs and hard registers of living pseudos. */
1065 HARD_REG_SET live_hard_regs;
1067 bitmap_initialize (&avail_cands, &reg_obstack);
1068 FOR_EACH_BB_FN (bb, cfun)
1070 REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
1071 bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
1072 &get_remat_bb_data (bb)->livein_cands);
1073 FOR_BB_INSNS (bb, insn)
1075 if (!NONDEBUG_INSN_P (insn))
1076 continue;
1078 lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
1079 struct lra_static_insn_data *static_id = id->insn_static_data;
1080 struct lra_insn_reg *reg;
1081 cand_t cand;
1082 unsigned int cid;
1083 bitmap_iterator bi;
1084 rtx set;
1085 int src_regno = -1, dst_regno = -1;
1087 if ((set = single_set (insn)) != NULL
1088 && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
1090 src_regno = REGNO (SET_SRC (set));
1091 dst_regno = REGNO (SET_DEST (set));
1094 cand = NULL;
1095 /* Check possibility of rematerialization (hard reg or
1096 unpsilled pseudo <- spilled pseudo): */
1097 if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
1098 && reg_renumber[src_regno] < 0
1099 && (dst_regno < FIRST_PSEUDO_REGISTER
1100 || reg_renumber[dst_regno] >= 0))
1102 for (cand = regno_cands[src_regno];
1103 cand != NULL;
1104 cand = cand->next_regno_cand)
1105 if (bitmap_bit_p (&avail_cands, cand->index))
1106 break;
1108 int i, hard_regno, nregs;
1109 rtx_insn *remat_insn = NULL;
1110 HOST_WIDE_INT cand_sp_offset = 0;
1111 if (cand != NULL)
1113 lra_insn_recog_data_t cand_id
1114 = lra_get_insn_recog_data (cand->insn);
1115 struct lra_static_insn_data *static_cand_id
1116 = cand_id->insn_static_data;
1117 rtx saved_op = *cand_id->operand_loc[cand->nop];
1119 /* Check clobbers do not kill something living. */
1120 gcc_assert (REG_P (saved_op));
1121 int ignore_regno = REGNO (saved_op);
1123 for (reg = cand_id->regs; reg != NULL; reg = reg->next)
1124 if (reg->type != OP_IN && reg->regno != ignore_regno)
1126 hard_regno = get_hard_regs (reg, nregs);
1127 gcc_assert (hard_regno >= 0);
1128 for (i = 0; i < nregs; i++)
1129 if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
1130 break;
1131 if (i < nregs)
1132 break;
1135 if (reg == NULL)
1137 for (reg = static_cand_id->hard_regs;
1138 reg != NULL;
1139 reg = reg->next)
1140 if (reg->type != OP_IN
1141 && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
1142 break;
1145 if (reg == NULL)
1147 *cand_id->operand_loc[cand->nop] = SET_DEST (set);
1148 lra_update_insn_regno_info (cand->insn);
1149 bool ok_p = lra_constrain_insn (cand->insn);
1150 if (ok_p)
1152 rtx remat_pat = copy_insn (PATTERN (cand->insn));
1154 start_sequence ();
1155 emit_insn (remat_pat);
1156 remat_insn = get_insns ();
1157 end_sequence ();
1158 if (recog_memoized (remat_insn) < 0)
1159 remat_insn = NULL;
1160 cand_sp_offset = cand_id->sp_offset;
1162 *cand_id->operand_loc[cand->nop] = saved_op;
1163 lra_update_insn_regno_info (cand->insn);
1167 bitmap_clear (&temp_bitmap);
1168 /* Update avail_cands (see analogous code for
1169 calculate_gen_cands). */
1170 for (reg = id->regs; reg != NULL; reg = reg->next)
1171 if (reg->type != OP_IN
1172 || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1173 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1175 cand = all_cands[cid];
1177 /* Ignore the reload insn. */
1178 if (src_regno == cand->reload_regno
1179 && dst_regno == cand->regno)
1180 continue;
1181 if (cand->regno == reg->regno
1182 || input_regno_present_p (cand->insn, reg->regno))
1183 bitmap_set_bit (&temp_bitmap, cand->index);
1186 if (CALL_P (insn))
1187 EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
1189 cand = all_cands[cid];
1191 if (call_used_input_regno_present_p (cand->insn))
1192 bitmap_set_bit (&temp_bitmap, cand->index);
1195 bitmap_and_compl_into (&avail_cands, &temp_bitmap);
1196 if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
1197 bitmap_set_bit (&avail_cands, cand->index);
1199 if (remat_insn != NULL)
1201 HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
1202 if (sp_offset_change != 0)
1203 change_sp_offset (remat_insn, sp_offset_change);
1204 update_scratch_ops (remat_insn);
1205 lra_process_new_insns (insn, remat_insn, NULL,
1206 "Inserting rematerialization insn");
1207 lra_set_insn_deleted (insn);
1208 changed_p = true;
1209 continue;
1212 /* Update live hard regs: */
1213 for (reg = id->regs; reg != NULL; reg = reg->next)
1214 if (reg->type == OP_IN
1215 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1217 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1218 continue;
1219 for (i = 0; i < nregs; i++)
1220 CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1222 /* Process also hard regs (e.g. CC register) which are part
1223 of insn definition. */
1224 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1225 if (reg->type == OP_IN
1226 && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
1227 CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
1228 /* Inputs have been processed, now process outputs. */
1229 for (reg = id->regs; reg != NULL; reg = reg->next)
1230 if (reg->type != OP_IN
1231 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1233 if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
1234 continue;
1235 for (i = 0; i < nregs; i++)
1236 SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
1238 for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
1239 if (reg->type != OP_IN
1240 && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
1241 SET_HARD_REG_BIT (live_hard_regs, reg->regno);
1244 bitmap_clear (&avail_cands);
1245 return changed_p;
1250 /* Current number of rematerialization iteration. */
1251 int lra_rematerialization_iter;
1253 /* Entry point of the rematerialization sub-pass. Return true if we
1254 did any rematerialization. */
1255 bool
1256 lra_remat (void)
1258 basic_block bb;
1259 bool result;
1260 int max_regno = max_reg_num ();
1262 if (! flag_lra_remat)
1263 return false;
1264 lra_rematerialization_iter++;
1265 if (lra_rematerialization_iter > LRA_MAX_REMATERIALIZATION_PASSES)
1266 return false;
1267 if (lra_dump_file != NULL)
1268 fprintf (lra_dump_file,
1269 "\n******** Rematerialization #%d: ********\n\n",
1270 lra_rematerialization_iter);
1271 timevar_push (TV_LRA_REMAT);
1272 insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
1273 regno_cands = XCNEWVEC (cand_t, max_regno);
1274 all_cands.create (8000);
1275 call_used_regs_arr_len = 0;
1276 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1277 if (call_used_regs[i])
1278 call_used_regs_arr[call_used_regs_arr_len++] = i;
1279 initiate_cand_table ();
1280 create_cands ();
1281 create_remat_bb_data ();
1282 bitmap_initialize (&temp_bitmap, &reg_obstack);
1283 calculate_local_reg_remat_bb_data ();
1284 calculate_livein_cands ();
1285 calculate_gen_cands ();
1286 bitmap_initialize (&all_blocks, &reg_obstack);
1287 FOR_ALL_BB_FN (bb, cfun)
1288 bitmap_set_bit (&all_blocks, bb->index);
1289 calculate_global_remat_bb_data ();
1290 dump_candidates_and_remat_bb_data ();
1291 result = do_remat ();
1292 all_cands.release ();
1293 bitmap_clear (&temp_bitmap);
1294 finish_remat_bb_data ();
1295 finish_cand_table ();
1296 bitmap_clear (&all_blocks);
1297 free (regno_cands);
1298 free (insn_to_cand);
1299 timevar_pop (TV_LRA_REMAT);
1300 return result;