Relocation (= move+destroy)
[official-gcc.git] / gcc / hsa-regalloc.c
blob819f680d1bcae34530f98de99c01a978964e310c
1 /* HSAIL IL Register allocation and out-of-SSA.
2 Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 Contributed by Michael Matz <matz@suse.de>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "is-a.h"
26 #include "vec.h"
27 #include "tree.h"
28 #include "dominance.h"
29 #include "basic-block.h"
30 #include "function.h"
31 #include "cfganal.h"
32 #include "cfg.h"
33 #include "bitmap.h"
34 #include "dumpfile.h"
35 #include "cgraph.h"
36 #include "print-tree.h"
37 #include "cfghooks.h"
38 #include "symbol-summary.h"
39 #include "hsa-common.h"
42 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
44 static void
45 naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
47 unsigned count = phi->operand_count ();
48 for (unsigned i = 0; i < count; i++)
50 gcc_checking_assert (phi->get_op (i));
51 hsa_op_base *op = phi->get_op (i);
52 hsa_bb *hbb;
53 edge e;
55 if (!op)
56 break;
58 e = predecessors[i];
59 if (single_succ_p (e->src))
60 hbb = hsa_bb_for_bb (e->src);
61 else
63 basic_block old_dest = e->dest;
64 hbb = hsa_init_new_bb (split_edge (e));
66 /* If switch insn used this edge, fix jump table. */
67 hsa_bb *source = hsa_bb_for_bb (e->src);
68 hsa_insn_sbr *sbr;
69 if (source->m_last_insn
70 && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
71 sbr->replace_all_labels (old_dest, hbb->m_bb);
74 hsa_build_append_simple_mov (phi->m_dest, op, hbb);
78 /* Naive out-of SSA. */
80 static void
81 naive_outof_ssa (void)
83 basic_block bb;
85 hsa_cfun->m_in_ssa = false;
87 FOR_ALL_BB_FN (bb, cfun)
89 hsa_bb *hbb = hsa_bb_for_bb (bb);
90 hsa_insn_phi *phi;
92 /* naive_process_phi can call split_edge on an incoming edge which order if
93 the incoming edges to the basic block and thus make it inconsistent with
94 the ordering of PHI arguments, so we collect them in advance. */
95 auto_vec<edge, 8> predecessors;
96 unsigned pred_count = EDGE_COUNT (bb->preds);
97 for (unsigned i = 0; i < pred_count; i++)
98 predecessors.safe_push (EDGE_PRED (bb, i));
100 for (phi = hbb->m_first_phi;
101 phi;
102 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
103 naive_process_phi (phi, predecessors);
105 /* Zap PHI nodes, they will be deallocated when everything else will. */
106 hbb->m_first_phi = NULL;
107 hbb->m_last_phi = NULL;
111 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
112 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
113 and 3 for 'q' 128 bit class. */
115 static int
116 m_reg_class_for_type (BrigType16_t type)
118 switch (type)
120 case BRIG_TYPE_B1:
121 return 0;
123 case BRIG_TYPE_U8:
124 case BRIG_TYPE_U16:
125 case BRIG_TYPE_U32:
126 case BRIG_TYPE_S8:
127 case BRIG_TYPE_S16:
128 case BRIG_TYPE_S32:
129 case BRIG_TYPE_F16:
130 case BRIG_TYPE_F32:
131 case BRIG_TYPE_B8:
132 case BRIG_TYPE_B16:
133 case BRIG_TYPE_B32:
134 case BRIG_TYPE_U8X4:
135 case BRIG_TYPE_S8X4:
136 case BRIG_TYPE_U16X2:
137 case BRIG_TYPE_S16X2:
138 case BRIG_TYPE_F16X2:
139 return 1;
141 case BRIG_TYPE_U64:
142 case BRIG_TYPE_S64:
143 case BRIG_TYPE_F64:
144 case BRIG_TYPE_B64:
145 case BRIG_TYPE_U8X8:
146 case BRIG_TYPE_S8X8:
147 case BRIG_TYPE_U16X4:
148 case BRIG_TYPE_S16X4:
149 case BRIG_TYPE_F16X4:
150 case BRIG_TYPE_U32X2:
151 case BRIG_TYPE_S32X2:
152 case BRIG_TYPE_F32X2:
153 return 2;
155 case BRIG_TYPE_B128:
156 case BRIG_TYPE_U8X16:
157 case BRIG_TYPE_S8X16:
158 case BRIG_TYPE_U16X8:
159 case BRIG_TYPE_S16X8:
160 case BRIG_TYPE_F16X8:
161 case BRIG_TYPE_U32X4:
162 case BRIG_TYPE_U64X2:
163 case BRIG_TYPE_S32X4:
164 case BRIG_TYPE_S64X2:
165 case BRIG_TYPE_F32X4:
166 case BRIG_TYPE_F64X2:
167 return 3;
169 default:
170 gcc_unreachable ();
174 /* If the Ith operands of INSN is or contains a register (in an address),
175 return the address of that register operand. If not return NULL. */
177 static hsa_op_reg **
178 insn_reg_addr (hsa_insn_basic *insn, int i)
180 hsa_op_base *op = insn->get_op (i);
181 if (!op)
182 return NULL;
183 hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
184 if (reg)
185 return (hsa_op_reg **) insn->get_op_addr (i);
186 hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
187 if (addr && addr->m_reg)
188 return &addr->m_reg;
189 return NULL;
192 struct m_reg_class_desc
194 unsigned next_avail, max_num;
195 unsigned used_num, max_used;
196 uint64_t used[2];
197 char cl_char;
200 /* Rewrite the instructions in BB to observe spilled live ranges.
201 CLASSES is the global register class state. */
203 static void
204 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
206 hsa_bb *hbb = hsa_bb_for_bb (bb);
207 hsa_insn_basic *insn, *next_insn;
209 for (insn = hbb->m_first_insn; insn; insn = next_insn)
211 next_insn = insn->m_next;
212 unsigned count = insn->operand_count ();
213 for (unsigned i = 0; i < count; i++)
215 gcc_checking_assert (insn->get_op (i));
216 hsa_op_reg **regaddr = insn_reg_addr (insn, i);
218 if (regaddr)
220 hsa_op_reg *reg = *regaddr;
221 if (reg->m_reg_class)
222 continue;
223 gcc_assert (reg->m_spill_sym);
225 int cl = m_reg_class_for_type (reg->m_type);
226 hsa_op_reg *tmp, *tmp2;
227 if (insn->op_output_p (i))
228 tmp = hsa_spill_out (insn, reg, &tmp2);
229 else
230 tmp = hsa_spill_in (insn, reg, &tmp2);
232 *regaddr = tmp;
234 tmp->m_reg_class = classes[cl].cl_char;
235 tmp->m_hard_num = (char) (classes[cl].max_num + i);
236 if (tmp2)
238 gcc_assert (cl == 0);
239 tmp2->m_reg_class = classes[1].cl_char;
240 tmp2->m_hard_num = (char) (classes[1].max_num + i);
247 /* Dump current function to dump file F, with info specific
248 to register allocation. */
250 void
251 dump_hsa_cfun_regalloc (FILE *f)
253 basic_block bb;
255 fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
257 FOR_ALL_BB_FN (bb, cfun)
259 hsa_bb *hbb = (struct hsa_bb *) bb->aux;
260 bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
261 dump_hsa_bb (f, hbb);
262 bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
266 /* Given the global register allocation state CLASSES and a
267 register REG, try to give it a hardware register. If successful,
268 store that hardreg in REG and return it, otherwise return -1.
269 Also changes CLASSES to accommodate for the allocated register. */
271 static int
272 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
274 int cl = m_reg_class_for_type (reg->m_type);
275 int ret = -1;
276 if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
277 >= 128 - 5)
278 return -1;
279 if (classes[cl].used_num < classes[cl].max_num)
281 unsigned int i;
282 classes[cl].used_num++;
283 if (classes[cl].used_num > classes[cl].max_used)
284 classes[cl].max_used = classes[cl].used_num;
285 for (i = 0; i < classes[cl].used_num; i++)
286 if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
287 break;
288 ret = i;
289 classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
290 reg->m_reg_class = classes[cl].cl_char;
291 reg->m_hard_num = i;
293 return ret;
296 /* Free up hardregs used by REG, into allocation state CLASSES. */
298 static void
299 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
301 int cl = m_reg_class_for_type (reg->m_type);
302 int ret = reg->m_hard_num;
303 gcc_assert (reg->m_reg_class == classes[cl].cl_char);
304 classes[cl].used_num--;
305 classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
308 /* Note that the live range for REG ends at least at END. */
310 static void
311 note_lr_end (hsa_op_reg *reg, int end)
313 if (reg->m_lr_end < end)
314 reg->m_lr_end = end;
317 /* Note that the live range for REG starts at least at BEGIN. */
319 static void
320 note_lr_begin (hsa_op_reg *reg, int begin)
322 if (reg->m_lr_begin > begin)
323 reg->m_lr_begin = begin;
326 /* Given two registers A and B, return -1, 0 or 1 if A's live range
327 starts before, at or after B's live range. */
329 static int
330 cmp_begin (const void *a, const void *b)
332 const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
333 const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
334 int ret;
335 if (rega == regb)
336 return 0;
337 ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
338 if (ret)
339 return ret;
340 return ((*rega)->m_order - (*regb)->m_order);
343 /* Given two registers REGA and REGB, return true if REGA's
344 live range ends after REGB's. This results in a sorting order
345 with earlier end points at the end. */
347 static bool
348 cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
350 int ret;
351 if (rega == regb)
352 return false;
353 ret = (regb)->m_lr_end - (rega)->m_lr_end;
354 if (ret)
355 return ret < 0;
356 return (((regb)->m_order - (rega)->m_order)) < 0;
359 /* Expire all old intervals in ACTIVE (a per-regclass vector),
360 that is, those that end before the interval REG starts. Give
361 back resources freed so into the state CLASSES. */
363 static void
364 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
365 struct m_reg_class_desc *classes)
367 for (int i = 0; i < 4; i++)
368 while (!active[i].is_empty ())
370 hsa_op_reg *a = active[i].pop ();
371 if (a->m_lr_end > reg->m_lr_begin)
373 active[i].quick_push (a);
374 break;
376 free_reg (classes, a);
380 /* The interval REG didn't get a hardreg. Spill it or one of those
381 from ACTIVE (if the latter, then REG will become allocated to the
382 hardreg that formerly was used by it). */
384 static void
385 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
387 int cl = m_reg_class_for_type (reg->m_type);
388 gcc_assert (!active[cl].is_empty ());
389 hsa_op_reg *cand = active[cl][0];
390 if (cand->m_lr_end > reg->m_lr_end)
392 reg->m_reg_class = cand->m_reg_class;
393 reg->m_hard_num = cand->m_hard_num;
394 active[cl].ordered_remove (0);
395 unsigned place = active[cl].lower_bound (reg, cmp_end);
396 active[cl].quick_insert (place, reg);
398 else
399 cand = reg;
401 gcc_assert (!cand->m_spill_sym);
402 BrigType16_t type = cand->m_type;
403 if (type == BRIG_TYPE_B1)
404 type = BRIG_TYPE_U8;
405 cand->m_reg_class = 0;
406 cand->m_spill_sym = hsa_get_spill_symbol (type);
407 cand->m_spill_sym->m_name_number = cand->m_order;
410 /* Given the global register state CLASSES allocate all HSA virtual
411 registers either to hardregs or to a spill symbol. */
413 static void
414 linear_scan_regalloc (struct m_reg_class_desc *classes)
416 /* Compute liveness. */
417 bool changed;
418 int i, n;
419 int insn_order;
420 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
421 bitmap work = BITMAP_ALLOC (NULL);
422 vec<hsa_op_reg*> ind2reg = vNULL;
423 vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
424 hsa_insn_basic *m_last_insn;
426 /* We will need the reverse post order for linearization,
427 and the post order for liveness analysis, which is the same
428 backward. */
429 n = pre_and_rev_post_order_compute (NULL, bbs, true);
430 ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
432 /* Give all instructions a linearized number, at the same time
433 build a mapping from register index to register. */
434 insn_order = 1;
435 for (i = 0; i < n; i++)
437 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
438 hsa_bb *hbb = hsa_bb_for_bb (bb);
439 hsa_insn_basic *insn;
440 for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
442 unsigned opi;
443 insn->m_number = insn_order++;
444 for (opi = 0; opi < insn->operand_count (); opi++)
446 gcc_checking_assert (insn->get_op (opi));
447 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
448 if (regaddr)
449 ind2reg[(*regaddr)->m_order] = *regaddr;
454 /* Initialize all live ranges to [after-end, 0). */
455 for (i = 0; i < hsa_cfun->m_reg_count; i++)
456 if (ind2reg[i])
457 ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
459 /* Classic liveness analysis, as long as something changes:
460 m_liveout is union (m_livein of successors)
461 m_livein is m_liveout minus defs plus uses. */
464 changed = false;
465 for (i = n - 1; i >= 0; i--)
467 edge e;
468 edge_iterator ei;
469 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
470 hsa_bb *hbb = hsa_bb_for_bb (bb);
472 /* Union of successors m_livein (or empty if none). */
473 bool first = true;
474 FOR_EACH_EDGE (e, ei, bb->succs)
475 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
477 hsa_bb *succ = hsa_bb_for_bb (e->dest);
478 if (first)
480 bitmap_copy (work, succ->m_livein);
481 first = false;
483 else
484 bitmap_ior_into (work, succ->m_livein);
486 if (first)
487 bitmap_clear (work);
489 bitmap_copy (hbb->m_liveout, work);
491 /* Remove defs, include uses in a backward insn walk. */
492 hsa_insn_basic *insn;
493 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
495 unsigned opi;
496 unsigned ndefs = insn->input_count ();
497 for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
499 gcc_checking_assert (insn->get_op (opi));
500 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
501 if (regaddr)
502 bitmap_clear_bit (work, (*regaddr)->m_order);
504 for (; opi < insn->operand_count (); opi++)
506 gcc_checking_assert (insn->get_op (opi));
507 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
508 if (regaddr)
509 bitmap_set_bit (work, (*regaddr)->m_order);
513 /* Note if that changed something. */
514 if (bitmap_ior_into (hbb->m_livein, work))
515 changed = true;
518 while (changed);
520 /* Make one pass through all instructions in linear order,
521 noting and merging possible live range start and end points. */
522 m_last_insn = NULL;
523 for (i = n - 1; i >= 0; i--)
525 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
526 hsa_bb *hbb = hsa_bb_for_bb (bb);
527 hsa_insn_basic *insn;
528 int after_end_number;
529 unsigned bit;
530 bitmap_iterator bi;
532 if (m_last_insn)
533 after_end_number = m_last_insn->m_number;
534 else
535 after_end_number = insn_order;
536 /* Everything live-out in this BB has at least an end point
537 after us. */
538 EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
539 note_lr_end (ind2reg[bit], after_end_number);
541 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
543 unsigned opi;
544 unsigned ndefs = insn->input_count ();
545 for (opi = 0; opi < insn->operand_count (); opi++)
547 gcc_checking_assert (insn->get_op (opi));
548 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
549 if (regaddr)
551 hsa_op_reg *reg = *regaddr;
552 if (opi < ndefs)
553 note_lr_begin (reg, insn->m_number);
554 else
555 note_lr_end (reg, insn->m_number);
560 /* Everything live-in in this BB has a start point before
561 our first insn. */
562 int before_start_number;
563 if (hbb->m_first_insn)
564 before_start_number = hbb->m_first_insn->m_number;
565 else
566 before_start_number = after_end_number;
567 before_start_number--;
568 EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
569 note_lr_begin (ind2reg[bit], before_start_number);
571 if (hbb->m_first_insn)
572 m_last_insn = hbb->m_first_insn;
575 for (i = 0; i < hsa_cfun->m_reg_count; i++)
576 if (ind2reg[i])
578 /* All regs that have still their start at after all code actually
579 are defined at the start of the routine (prologue). */
580 if (ind2reg[i]->m_lr_begin == insn_order)
581 ind2reg[i]->m_lr_begin = 0;
582 /* All regs that have no use but a def will have lr_end == 0,
583 they are actually live from def until after the insn they are
584 defined in. */
585 if (ind2reg[i]->m_lr_end == 0)
586 ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
589 /* Sort all intervals by increasing start point. */
590 gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
592 if (flag_checking)
593 for (unsigned i = 0; i < ind2reg.length (); i++)
594 gcc_assert (ind2reg[i]);
596 ind2reg.qsort (cmp_begin);
597 for (i = 0; i < 4; i++)
598 active[i].reserve_exact (hsa_cfun->m_reg_count);
600 /* Now comes the linear scan allocation. */
601 for (i = 0; i < hsa_cfun->m_reg_count; i++)
603 hsa_op_reg *reg = ind2reg[i];
604 if (!reg)
605 continue;
606 expire_old_intervals (reg, active, classes);
607 int cl = m_reg_class_for_type (reg->m_type);
608 if (try_alloc_reg (classes, reg) >= 0)
610 unsigned place = active[cl].lower_bound (reg, cmp_end);
611 active[cl].quick_insert (place, reg);
613 else
614 spill_at_interval (reg, active);
616 /* Some interesting dumping as we go. */
617 if (dump_file && (dump_flags & TDF_DETAILS))
619 fprintf (dump_file, " reg%d: [%5d, %5d)->",
620 reg->m_order, reg->m_lr_begin, reg->m_lr_end);
621 if (reg->m_reg_class)
622 fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
623 else
624 fprintf (dump_file, "[%%__%s_%i]",
625 hsa_seg_name (reg->m_spill_sym->m_segment),
626 reg->m_spill_sym->m_name_number);
627 for (int cl = 0; cl < 4; cl++)
629 bool first = true;
630 hsa_op_reg *r;
631 fprintf (dump_file, " {");
632 for (int j = 0; active[cl].iterate (j, &r); j++)
633 if (first)
635 fprintf (dump_file, "%d", r->m_order);
636 first = false;
638 else
639 fprintf (dump_file, ", %d", r->m_order);
640 fprintf (dump_file, "}");
642 fprintf (dump_file, "\n");
646 BITMAP_FREE (work);
647 free (bbs);
649 if (dump_file && (dump_flags & TDF_DETAILS))
651 fprintf (dump_file, "------- After liveness: -------\n");
652 dump_hsa_cfun_regalloc (dump_file);
653 fprintf (dump_file, " ----- Intervals:\n");
654 for (i = 0; i < hsa_cfun->m_reg_count; i++)
656 hsa_op_reg *reg = ind2reg[i];
657 if (!reg)
658 continue;
659 fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
660 reg->m_lr_begin, reg->m_lr_end);
661 if (reg->m_reg_class)
662 fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
663 else
664 fprintf (dump_file, "[%%__%s_%i]\n",
665 hsa_seg_name (reg->m_spill_sym->m_segment),
666 reg->m_spill_sym->m_name_number);
670 for (i = 0; i < 4; i++)
671 active[i].release ();
672 ind2reg.release ();
675 /* Entry point for register allocation. */
677 static void
678 regalloc (void)
680 basic_block bb;
681 m_reg_class_desc classes[4];
683 /* If there are no registers used in the function, exit right away. */
684 if (hsa_cfun->m_reg_count == 0)
685 return;
687 memset (classes, 0, sizeof (classes));
688 classes[0].next_avail = 0;
689 classes[0].max_num = 7;
690 classes[0].cl_char = 'c';
691 classes[1].cl_char = 's';
692 classes[2].cl_char = 'd';
693 classes[3].cl_char = 'q';
695 for (int i = 1; i < 4; i++)
697 classes[i].next_avail = 0;
698 classes[i].max_num = 20;
701 linear_scan_regalloc (classes);
703 FOR_ALL_BB_FN (bb, cfun)
704 rewrite_code_bb (bb, classes);
707 /* Out of SSA and register allocation on HSAIL IL. */
709 void
710 hsa_regalloc (void)
712 hsa_cfun->update_dominance ();
713 naive_outof_ssa ();
715 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "------- After out-of-SSA: -------\n");
718 dump_hsa_cfun (dump_file);
721 regalloc ();
723 if (dump_file && (dump_flags & TDF_DETAILS))
725 fprintf (dump_file, "------- After register allocation: -------\n");
726 dump_hsa_cfun (dump_file);