[26/77] Use is_a <scalar_int_mode> in subreg/extract simplifications
[official-gcc.git] / gcc / hsa-regalloc.c
blob2a17254c3b26be4ae53bae967c159b3c0433d123
1 /* HSAIL IL Register allocation and out-of-SSA.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Michael Matz <matz@suse.de>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "is-a.h"
26 #include "vec.h"
27 #include "tree.h"
28 #include "dominance.h"
29 #include "basic-block.h"
30 #include "cfg.h"
31 #include "cfganal.h"
32 #include "function.h"
33 #include "bitmap.h"
34 #include "dumpfile.h"
35 #include "cgraph.h"
36 #include "print-tree.h"
37 #include "cfghooks.h"
38 #include "symbol-summary.h"
39 #include "hsa-common.h"
42 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
44 static void
45 naive_process_phi (hsa_insn_phi *phi)
47 unsigned count = phi->operand_count ();
48 for (unsigned i = 0; i < count; i++)
50 gcc_checking_assert (phi->get_op (i));
51 hsa_op_base *op = phi->get_op (i);
52 hsa_bb *hbb;
53 edge e;
55 if (!op)
56 break;
58 e = EDGE_PRED (phi->m_bb, i);
59 if (single_succ_p (e->src))
60 hbb = hsa_bb_for_bb (e->src);
61 else
63 basic_block old_dest = e->dest;
64 hbb = hsa_init_new_bb (split_edge (e));
66 /* If switch insn used this edge, fix jump table. */
67 hsa_bb *source = hsa_bb_for_bb (e->src);
68 hsa_insn_sbr *sbr;
69 if (source->m_last_insn
70 && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
71 sbr->replace_all_labels (old_dest, hbb->m_bb);
74 hsa_build_append_simple_mov (phi->m_dest, op, hbb);
78 /* Naive out-of SSA. */
80 static void
81 naive_outof_ssa (void)
83 basic_block bb;
85 hsa_cfun->m_in_ssa = false;
87 FOR_ALL_BB_FN (bb, cfun)
89 hsa_bb *hbb = hsa_bb_for_bb (bb);
90 hsa_insn_phi *phi;
92 for (phi = hbb->m_first_phi;
93 phi;
94 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
95 naive_process_phi (phi);
97 /* Zap PHI nodes, they will be deallocated when everything else will. */
98 hbb->m_first_phi = NULL;
99 hbb->m_last_phi = NULL;
103 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
104 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
105 and 3 for 'q' 128 bit class. */
107 static int
108 m_reg_class_for_type (BrigType16_t type)
110 switch (type)
112 case BRIG_TYPE_B1:
113 return 0;
115 case BRIG_TYPE_U8:
116 case BRIG_TYPE_U16:
117 case BRIG_TYPE_U32:
118 case BRIG_TYPE_S8:
119 case BRIG_TYPE_S16:
120 case BRIG_TYPE_S32:
121 case BRIG_TYPE_F16:
122 case BRIG_TYPE_F32:
123 case BRIG_TYPE_B8:
124 case BRIG_TYPE_B16:
125 case BRIG_TYPE_B32:
126 case BRIG_TYPE_U8X4:
127 case BRIG_TYPE_S8X4:
128 case BRIG_TYPE_U16X2:
129 case BRIG_TYPE_S16X2:
130 case BRIG_TYPE_F16X2:
131 return 1;
133 case BRIG_TYPE_U64:
134 case BRIG_TYPE_S64:
135 case BRIG_TYPE_F64:
136 case BRIG_TYPE_B64:
137 case BRIG_TYPE_U8X8:
138 case BRIG_TYPE_S8X8:
139 case BRIG_TYPE_U16X4:
140 case BRIG_TYPE_S16X4:
141 case BRIG_TYPE_F16X4:
142 case BRIG_TYPE_U32X2:
143 case BRIG_TYPE_S32X2:
144 case BRIG_TYPE_F32X2:
145 return 2;
147 case BRIG_TYPE_B128:
148 case BRIG_TYPE_U8X16:
149 case BRIG_TYPE_S8X16:
150 case BRIG_TYPE_U16X8:
151 case BRIG_TYPE_S16X8:
152 case BRIG_TYPE_F16X8:
153 case BRIG_TYPE_U32X4:
154 case BRIG_TYPE_U64X2:
155 case BRIG_TYPE_S32X4:
156 case BRIG_TYPE_S64X2:
157 case BRIG_TYPE_F32X4:
158 case BRIG_TYPE_F64X2:
159 return 3;
161 default:
162 gcc_unreachable ();
166 /* If the Ith operands of INSN is or contains a register (in an address),
167 return the address of that register operand. If not return NULL. */
169 static hsa_op_reg **
170 insn_reg_addr (hsa_insn_basic *insn, int i)
172 hsa_op_base *op = insn->get_op (i);
173 if (!op)
174 return NULL;
175 hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
176 if (reg)
177 return (hsa_op_reg **) insn->get_op_addr (i);
178 hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
179 if (addr && addr->m_reg)
180 return &addr->m_reg;
181 return NULL;
184 struct m_reg_class_desc
186 unsigned next_avail, max_num;
187 unsigned used_num, max_used;
188 uint64_t used[2];
189 char cl_char;
192 /* Rewrite the instructions in BB to observe spilled live ranges.
193 CLASSES is the global register class state. */
195 static void
196 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
198 hsa_bb *hbb = hsa_bb_for_bb (bb);
199 hsa_insn_basic *insn, *next_insn;
201 for (insn = hbb->m_first_insn; insn; insn = next_insn)
203 next_insn = insn->m_next;
204 unsigned count = insn->operand_count ();
205 for (unsigned i = 0; i < count; i++)
207 gcc_checking_assert (insn->get_op (i));
208 hsa_op_reg **regaddr = insn_reg_addr (insn, i);
210 if (regaddr)
212 hsa_op_reg *reg = *regaddr;
213 if (reg->m_reg_class)
214 continue;
215 gcc_assert (reg->m_spill_sym);
217 int cl = m_reg_class_for_type (reg->m_type);
218 hsa_op_reg *tmp, *tmp2;
219 if (insn->op_output_p (i))
220 tmp = hsa_spill_out (insn, reg, &tmp2);
221 else
222 tmp = hsa_spill_in (insn, reg, &tmp2);
224 *regaddr = tmp;
226 tmp->m_reg_class = classes[cl].cl_char;
227 tmp->m_hard_num = (char) (classes[cl].max_num + i);
228 if (tmp2)
230 gcc_assert (cl == 0);
231 tmp2->m_reg_class = classes[1].cl_char;
232 tmp2->m_hard_num = (char) (classes[1].max_num + i);
239 /* Dump current function to dump file F, with info specific
240 to register allocation. */
242 void
243 dump_hsa_cfun_regalloc (FILE *f)
245 basic_block bb;
247 fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
249 FOR_ALL_BB_FN (bb, cfun)
251 hsa_bb *hbb = (struct hsa_bb *) bb->aux;
252 bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
253 dump_hsa_bb (f, hbb);
254 bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
258 /* Given the global register allocation state CLASSES and a
259 register REG, try to give it a hardware register. If successful,
260 store that hardreg in REG and return it, otherwise return -1.
261 Also changes CLASSES to accommodate for the allocated register. */
263 static int
264 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
266 int cl = m_reg_class_for_type (reg->m_type);
267 int ret = -1;
268 if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
269 >= 128 - 5)
270 return -1;
271 if (classes[cl].used_num < classes[cl].max_num)
273 unsigned int i;
274 classes[cl].used_num++;
275 if (classes[cl].used_num > classes[cl].max_used)
276 classes[cl].max_used = classes[cl].used_num;
277 for (i = 0; i < classes[cl].used_num; i++)
278 if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
279 break;
280 ret = i;
281 classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
282 reg->m_reg_class = classes[cl].cl_char;
283 reg->m_hard_num = i;
285 return ret;
288 /* Free up hardregs used by REG, into allocation state CLASSES. */
290 static void
291 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
293 int cl = m_reg_class_for_type (reg->m_type);
294 int ret = reg->m_hard_num;
295 gcc_assert (reg->m_reg_class == classes[cl].cl_char);
296 classes[cl].used_num--;
297 classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
300 /* Note that the live range for REG ends at least at END. */
302 static void
303 note_lr_end (hsa_op_reg *reg, int end)
305 if (reg->m_lr_end < end)
306 reg->m_lr_end = end;
309 /* Note that the live range for REG starts at least at BEGIN. */
311 static void
312 note_lr_begin (hsa_op_reg *reg, int begin)
314 if (reg->m_lr_begin > begin)
315 reg->m_lr_begin = begin;
318 /* Given two registers A and B, return -1, 0 or 1 if A's live range
319 starts before, at or after B's live range. */
321 static int
322 cmp_begin (const void *a, const void *b)
324 const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
325 const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
326 int ret;
327 if (rega == regb)
328 return 0;
329 ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
330 if (ret)
331 return ret;
332 return ((*rega)->m_order - (*regb)->m_order);
335 /* Given two registers REGA and REGB, return true if REGA's
336 live range ends after REGB's. This results in a sorting order
337 with earlier end points at the end. */
339 static bool
340 cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
342 int ret;
343 if (rega == regb)
344 return false;
345 ret = (regb)->m_lr_end - (rega)->m_lr_end;
346 if (ret)
347 return ret < 0;
348 return (((regb)->m_order - (rega)->m_order)) < 0;
351 /* Expire all old intervals in ACTIVE (a per-regclass vector),
352 that is, those that end before the interval REG starts. Give
353 back resources freed so into the state CLASSES. */
355 static void
356 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
357 struct m_reg_class_desc *classes)
359 for (int i = 0; i < 4; i++)
360 while (!active[i].is_empty ())
362 hsa_op_reg *a = active[i].pop ();
363 if (a->m_lr_end > reg->m_lr_begin)
365 active[i].quick_push (a);
366 break;
368 free_reg (classes, a);
372 /* The interval REG didn't get a hardreg. Spill it or one of those
373 from ACTIVE (if the latter, then REG will become allocated to the
374 hardreg that formerly was used by it). */
376 static void
377 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
379 int cl = m_reg_class_for_type (reg->m_type);
380 gcc_assert (!active[cl].is_empty ());
381 hsa_op_reg *cand = active[cl][0];
382 if (cand->m_lr_end > reg->m_lr_end)
384 reg->m_reg_class = cand->m_reg_class;
385 reg->m_hard_num = cand->m_hard_num;
386 active[cl].ordered_remove (0);
387 unsigned place = active[cl].lower_bound (reg, cmp_end);
388 active[cl].quick_insert (place, reg);
390 else
391 cand = reg;
393 gcc_assert (!cand->m_spill_sym);
394 BrigType16_t type = cand->m_type;
395 if (type == BRIG_TYPE_B1)
396 type = BRIG_TYPE_U8;
397 cand->m_reg_class = 0;
398 cand->m_spill_sym = hsa_get_spill_symbol (type);
399 cand->m_spill_sym->m_name_number = cand->m_order;
402 /* Given the global register state CLASSES allocate all HSA virtual
403 registers either to hardregs or to a spill symbol. */
405 static void
406 linear_scan_regalloc (struct m_reg_class_desc *classes)
408 /* Compute liveness. */
409 bool changed;
410 int i, n;
411 int insn_order;
412 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
413 bitmap work = BITMAP_ALLOC (NULL);
414 vec<hsa_op_reg*> ind2reg = vNULL;
415 vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
416 hsa_insn_basic *m_last_insn;
418 /* We will need the reverse post order for linearization,
419 and the post order for liveness analysis, which is the same
420 backward. */
421 n = pre_and_rev_post_order_compute (NULL, bbs, true);
422 ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
424 /* Give all instructions a linearized number, at the same time
425 build a mapping from register index to register. */
426 insn_order = 1;
427 for (i = 0; i < n; i++)
429 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
430 hsa_bb *hbb = hsa_bb_for_bb (bb);
431 hsa_insn_basic *insn;
432 for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
434 unsigned opi;
435 insn->m_number = insn_order++;
436 for (opi = 0; opi < insn->operand_count (); opi++)
438 gcc_checking_assert (insn->get_op (opi));
439 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
440 if (regaddr)
441 ind2reg[(*regaddr)->m_order] = *regaddr;
446 /* Initialize all live ranges to [after-end, 0). */
447 for (i = 0; i < hsa_cfun->m_reg_count; i++)
448 if (ind2reg[i])
449 ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
451 /* Classic liveness analysis, as long as something changes:
452 m_liveout is union (m_livein of successors)
453 m_livein is m_liveout minus defs plus uses. */
456 changed = false;
457 for (i = n - 1; i >= 0; i--)
459 edge e;
460 edge_iterator ei;
461 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
462 hsa_bb *hbb = hsa_bb_for_bb (bb);
464 /* Union of successors m_livein (or empty if none). */
465 bool first = true;
466 FOR_EACH_EDGE (e, ei, bb->succs)
467 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
469 hsa_bb *succ = hsa_bb_for_bb (e->dest);
470 if (first)
472 bitmap_copy (work, succ->m_livein);
473 first = false;
475 else
476 bitmap_ior_into (work, succ->m_livein);
478 if (first)
479 bitmap_clear (work);
481 bitmap_copy (hbb->m_liveout, work);
483 /* Remove defs, include uses in a backward insn walk. */
484 hsa_insn_basic *insn;
485 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
487 unsigned opi;
488 unsigned ndefs = insn->input_count ();
489 for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
491 gcc_checking_assert (insn->get_op (opi));
492 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
493 if (regaddr)
494 bitmap_clear_bit (work, (*regaddr)->m_order);
496 for (; opi < insn->operand_count (); opi++)
498 gcc_checking_assert (insn->get_op (opi));
499 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
500 if (regaddr)
501 bitmap_set_bit (work, (*regaddr)->m_order);
505 /* Note if that changed something. */
506 if (bitmap_ior_into (hbb->m_livein, work))
507 changed = true;
510 while (changed);
512 /* Make one pass through all instructions in linear order,
513 noting and merging possible live range start and end points. */
514 m_last_insn = NULL;
515 for (i = n - 1; i >= 0; i--)
517 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
518 hsa_bb *hbb = hsa_bb_for_bb (bb);
519 hsa_insn_basic *insn;
520 int after_end_number;
521 unsigned bit;
522 bitmap_iterator bi;
524 if (m_last_insn)
525 after_end_number = m_last_insn->m_number;
526 else
527 after_end_number = insn_order;
528 /* Everything live-out in this BB has at least an end point
529 after us. */
530 EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
531 note_lr_end (ind2reg[bit], after_end_number);
533 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
535 unsigned opi;
536 unsigned ndefs = insn->input_count ();
537 for (opi = 0; opi < insn->operand_count (); opi++)
539 gcc_checking_assert (insn->get_op (opi));
540 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
541 if (regaddr)
543 hsa_op_reg *reg = *regaddr;
544 if (opi < ndefs)
545 note_lr_begin (reg, insn->m_number);
546 else
547 note_lr_end (reg, insn->m_number);
552 /* Everything live-in in this BB has a start point before
553 our first insn. */
554 int before_start_number;
555 if (hbb->m_first_insn)
556 before_start_number = hbb->m_first_insn->m_number;
557 else
558 before_start_number = after_end_number;
559 before_start_number--;
560 EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
561 note_lr_begin (ind2reg[bit], before_start_number);
563 if (hbb->m_first_insn)
564 m_last_insn = hbb->m_first_insn;
567 for (i = 0; i < hsa_cfun->m_reg_count; i++)
568 if (ind2reg[i])
570 /* All regs that have still their start at after all code actually
571 are defined at the start of the routine (prologue). */
572 if (ind2reg[i]->m_lr_begin == insn_order)
573 ind2reg[i]->m_lr_begin = 0;
574 /* All regs that have no use but a def will have lr_end == 0,
575 they are actually live from def until after the insn they are
576 defined in. */
577 if (ind2reg[i]->m_lr_end == 0)
578 ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
581 /* Sort all intervals by increasing start point. */
582 gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
584 if (flag_checking)
585 for (unsigned i = 0; i < ind2reg.length (); i++)
586 gcc_assert (ind2reg[i]);
588 ind2reg.qsort (cmp_begin);
589 for (i = 0; i < 4; i++)
590 active[i].reserve_exact (hsa_cfun->m_reg_count);
592 /* Now comes the linear scan allocation. */
593 for (i = 0; i < hsa_cfun->m_reg_count; i++)
595 hsa_op_reg *reg = ind2reg[i];
596 if (!reg)
597 continue;
598 expire_old_intervals (reg, active, classes);
599 int cl = m_reg_class_for_type (reg->m_type);
600 if (try_alloc_reg (classes, reg) >= 0)
602 unsigned place = active[cl].lower_bound (reg, cmp_end);
603 active[cl].quick_insert (place, reg);
605 else
606 spill_at_interval (reg, active);
608 /* Some interesting dumping as we go. */
609 if (dump_file && (dump_flags & TDF_DETAILS))
611 fprintf (dump_file, " reg%d: [%5d, %5d)->",
612 reg->m_order, reg->m_lr_begin, reg->m_lr_end);
613 if (reg->m_reg_class)
614 fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
615 else
616 fprintf (dump_file, "[%%__%s_%i]",
617 hsa_seg_name (reg->m_spill_sym->m_segment),
618 reg->m_spill_sym->m_name_number);
619 for (int cl = 0; cl < 4; cl++)
621 bool first = true;
622 hsa_op_reg *r;
623 fprintf (dump_file, " {");
624 for (int j = 0; active[cl].iterate (j, &r); j++)
625 if (first)
627 fprintf (dump_file, "%d", r->m_order);
628 first = false;
630 else
631 fprintf (dump_file, ", %d", r->m_order);
632 fprintf (dump_file, "}");
634 fprintf (dump_file, "\n");
638 BITMAP_FREE (work);
639 free (bbs);
641 if (dump_file && (dump_flags & TDF_DETAILS))
643 fprintf (dump_file, "------- After liveness: -------\n");
644 dump_hsa_cfun_regalloc (dump_file);
645 fprintf (dump_file, " ----- Intervals:\n");
646 for (i = 0; i < hsa_cfun->m_reg_count; i++)
648 hsa_op_reg *reg = ind2reg[i];
649 if (!reg)
650 continue;
651 fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
652 reg->m_lr_begin, reg->m_lr_end);
653 if (reg->m_reg_class)
654 fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
655 else
656 fprintf (dump_file, "[%%__%s_%i]\n",
657 hsa_seg_name (reg->m_spill_sym->m_segment),
658 reg->m_spill_sym->m_name_number);
662 for (i = 0; i < 4; i++)
663 active[i].release ();
664 ind2reg.release ();
667 /* Entry point for register allocation. */
669 static void
670 regalloc (void)
672 basic_block bb;
673 m_reg_class_desc classes[4];
675 /* If there are no registers used in the function, exit right away. */
676 if (hsa_cfun->m_reg_count == 0)
677 return;
679 memset (classes, 0, sizeof (classes));
680 classes[0].next_avail = 0;
681 classes[0].max_num = 7;
682 classes[0].cl_char = 'c';
683 classes[1].cl_char = 's';
684 classes[2].cl_char = 'd';
685 classes[3].cl_char = 'q';
687 for (int i = 1; i < 4; i++)
689 classes[i].next_avail = 0;
690 classes[i].max_num = 20;
693 linear_scan_regalloc (classes);
695 FOR_ALL_BB_FN (bb, cfun)
696 rewrite_code_bb (bb, classes);
699 /* Out of SSA and register allocation on HSAIL IL. */
701 void
702 hsa_regalloc (void)
704 hsa_cfun->update_dominance ();
705 naive_outof_ssa ();
707 if (dump_file && (dump_flags & TDF_DETAILS))
709 fprintf (dump_file, "------- After out-of-SSA: -------\n");
710 dump_hsa_cfun (dump_file);
713 regalloc ();
715 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "------- After register allocation: -------\n");
718 dump_hsa_cfun (dump_file);