[4/7] Add a gather_scatter_info structure
[official-gcc.git] / gcc / hsa-regalloc.c
blob50bf51155dfe52174f5c4e147c536a28c481df89
1 /* HSAIL IL Register allocation and out-of-SSA.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Michael Matz <matz@suse.de>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "is-a.h"
26 #include "vec.h"
27 #include "tree.h"
28 #include "dominance.h"
29 #include "cfg.h"
30 #include "cfganal.h"
31 #include "function.h"
32 #include "bitmap.h"
33 #include "dumpfile.h"
34 #include "cgraph.h"
35 #include "print-tree.h"
36 #include "cfghooks.h"
37 #include "symbol-summary.h"
38 #include "hsa.h"
41 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
43 static void
44 naive_process_phi (hsa_insn_phi *phi)
46 unsigned count = phi->operand_count ();
47 for (unsigned i = 0; i < count; i++)
49 gcc_checking_assert (phi->get_op (i));
50 hsa_op_base *op = phi->get_op (i);
51 hsa_bb *hbb;
52 edge e;
54 if (!op)
55 break;
57 e = EDGE_PRED (phi->m_bb, i);
58 if (single_succ_p (e->src))
59 hbb = hsa_bb_for_bb (e->src);
60 else
62 basic_block old_dest = e->dest;
63 hbb = hsa_init_new_bb (split_edge (e));
65 /* If switch insn used this edge, fix jump table. */
66 hsa_bb *source = hsa_bb_for_bb (e->src);
67 hsa_insn_sbr *sbr;
68 if (source->m_last_insn
69 && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
70 sbr->replace_all_labels (old_dest, hbb->m_bb);
73 hsa_build_append_simple_mov (phi->m_dest, op, hbb);
77 /* Naive out-of SSA. */
79 static void
80 naive_outof_ssa (void)
82 basic_block bb;
84 hsa_cfun->m_in_ssa = false;
86 FOR_ALL_BB_FN (bb, cfun)
88 hsa_bb *hbb = hsa_bb_for_bb (bb);
89 hsa_insn_phi *phi;
91 for (phi = hbb->m_first_phi;
92 phi;
93 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
94 naive_process_phi (phi);
96 /* Zap PHI nodes, they will be deallocated when everything else will. */
97 hbb->m_first_phi = NULL;
98 hbb->m_last_phi = NULL;
102 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
103 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
104 and 3 for 'q' 128 bit class. */
106 static int
107 m_reg_class_for_type (BrigType16_t type)
109 switch (type)
111 case BRIG_TYPE_B1:
112 return 0;
114 case BRIG_TYPE_U8:
115 case BRIG_TYPE_U16:
116 case BRIG_TYPE_U32:
117 case BRIG_TYPE_S8:
118 case BRIG_TYPE_S16:
119 case BRIG_TYPE_S32:
120 case BRIG_TYPE_F16:
121 case BRIG_TYPE_F32:
122 case BRIG_TYPE_B8:
123 case BRIG_TYPE_B16:
124 case BRIG_TYPE_B32:
125 case BRIG_TYPE_U8X4:
126 case BRIG_TYPE_S8X4:
127 case BRIG_TYPE_U16X2:
128 case BRIG_TYPE_S16X2:
129 case BRIG_TYPE_F16X2:
130 return 1;
132 case BRIG_TYPE_U64:
133 case BRIG_TYPE_S64:
134 case BRIG_TYPE_F64:
135 case BRIG_TYPE_B64:
136 case BRIG_TYPE_U8X8:
137 case BRIG_TYPE_S8X8:
138 case BRIG_TYPE_U16X4:
139 case BRIG_TYPE_S16X4:
140 case BRIG_TYPE_F16X4:
141 case BRIG_TYPE_U32X2:
142 case BRIG_TYPE_S32X2:
143 case BRIG_TYPE_F32X2:
144 return 2;
146 case BRIG_TYPE_B128:
147 case BRIG_TYPE_U8X16:
148 case BRIG_TYPE_S8X16:
149 case BRIG_TYPE_U16X8:
150 case BRIG_TYPE_S16X8:
151 case BRIG_TYPE_F16X8:
152 case BRIG_TYPE_U32X4:
153 case BRIG_TYPE_U64X2:
154 case BRIG_TYPE_S32X4:
155 case BRIG_TYPE_S64X2:
156 case BRIG_TYPE_F32X4:
157 case BRIG_TYPE_F64X2:
158 return 3;
160 default:
161 gcc_unreachable ();
165 /* If the Ith operands of INSN is or contains a register (in an address),
166 return the address of that register operand. If not return NULL. */
168 static hsa_op_reg **
169 insn_reg_addr (hsa_insn_basic *insn, int i)
171 hsa_op_base *op = insn->get_op (i);
172 if (!op)
173 return NULL;
174 hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
175 if (reg)
176 return (hsa_op_reg **) insn->get_op_addr (i);
177 hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
178 if (addr && addr->m_reg)
179 return &addr->m_reg;
180 return NULL;
183 struct m_reg_class_desc
185 unsigned next_avail, max_num;
186 unsigned used_num, max_used;
187 uint64_t used[2];
188 char cl_char;
191 /* Rewrite the instructions in BB to observe spilled live ranges.
192 CLASSES is the global register class state. */
194 static void
195 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
197 hsa_bb *hbb = hsa_bb_for_bb (bb);
198 hsa_insn_basic *insn, *next_insn;
200 for (insn = hbb->m_first_insn; insn; insn = next_insn)
202 next_insn = insn->m_next;
203 unsigned count = insn->operand_count ();
204 for (unsigned i = 0; i < count; i++)
206 gcc_checking_assert (insn->get_op (i));
207 hsa_op_reg **regaddr = insn_reg_addr (insn, i);
209 if (regaddr)
211 hsa_op_reg *reg = *regaddr;
212 if (reg->m_reg_class)
213 continue;
214 gcc_assert (reg->m_spill_sym);
216 int cl = m_reg_class_for_type (reg->m_type);
217 hsa_op_reg *tmp, *tmp2;
218 if (insn->op_output_p (i))
219 tmp = hsa_spill_out (insn, reg, &tmp2);
220 else
221 tmp = hsa_spill_in (insn, reg, &tmp2);
223 *regaddr = tmp;
225 tmp->m_reg_class = classes[cl].cl_char;
226 tmp->m_hard_num = (char) (classes[cl].max_num + i);
227 if (tmp2)
229 gcc_assert (cl == 0);
230 tmp2->m_reg_class = classes[1].cl_char;
231 tmp2->m_hard_num = (char) (classes[1].max_num + i);
238 /* Dump current function to dump file F, with info specific
239 to register allocation. */
241 void
242 dump_hsa_cfun_regalloc (FILE *f)
244 basic_block bb;
246 fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
248 FOR_ALL_BB_FN (bb, cfun)
250 hsa_bb *hbb = (struct hsa_bb *) bb->aux;
251 bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
252 dump_hsa_bb (f, hbb);
253 bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
257 /* Given the global register allocation state CLASSES and a
258 register REG, try to give it a hardware register. If successful,
259 store that hardreg in REG and return it, otherwise return -1.
260 Also changes CLASSES to accommodate for the allocated register. */
262 static int
263 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
265 int cl = m_reg_class_for_type (reg->m_type);
266 int ret = -1;
267 if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
268 >= 128 - 5)
269 return -1;
270 if (classes[cl].used_num < classes[cl].max_num)
272 unsigned int i;
273 classes[cl].used_num++;
274 if (classes[cl].used_num > classes[cl].max_used)
275 classes[cl].max_used = classes[cl].used_num;
276 for (i = 0; i < classes[cl].used_num; i++)
277 if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
278 break;
279 ret = i;
280 classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
281 reg->m_reg_class = classes[cl].cl_char;
282 reg->m_hard_num = i;
284 return ret;
287 /* Free up hardregs used by REG, into allocation state CLASSES. */
289 static void
290 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
292 int cl = m_reg_class_for_type (reg->m_type);
293 int ret = reg->m_hard_num;
294 gcc_assert (reg->m_reg_class == classes[cl].cl_char);
295 classes[cl].used_num--;
296 classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
299 /* Note that the live range for REG ends at least at END. */
301 static void
302 note_lr_end (hsa_op_reg *reg, int end)
304 if (reg->m_lr_end < end)
305 reg->m_lr_end = end;
308 /* Note that the live range for REG starts at least at BEGIN. */
310 static void
311 note_lr_begin (hsa_op_reg *reg, int begin)
313 if (reg->m_lr_begin > begin)
314 reg->m_lr_begin = begin;
317 /* Given two registers A and B, return -1, 0 or 1 if A's live range
318 starts before, at or after B's live range. */
320 static int
321 cmp_begin (const void *a, const void *b)
323 const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
324 const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
325 int ret;
326 if (rega == regb)
327 return 0;
328 ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
329 if (ret)
330 return ret;
331 return ((*rega)->m_order - (*regb)->m_order);
334 /* Given two registers REGA and REGB, return true if REGA's
335 live range ends after REGB's. This results in a sorting order
336 with earlier end points at the end. */
338 static bool
339 cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
341 int ret;
342 if (rega == regb)
343 return false;
344 ret = (regb)->m_lr_end - (rega)->m_lr_end;
345 if (ret)
346 return ret < 0;
347 return (((regb)->m_order - (rega)->m_order)) < 0;
350 /* Expire all old intervals in ACTIVE (a per-regclass vector),
351 that is, those that end before the interval REG starts. Give
352 back resources freed so into the state CLASSES. */
354 static void
355 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
356 struct m_reg_class_desc *classes)
358 for (int i = 0; i < 4; i++)
359 while (!active[i].is_empty ())
361 hsa_op_reg *a = active[i].pop ();
362 if (a->m_lr_end > reg->m_lr_begin)
364 active[i].quick_push (a);
365 break;
367 free_reg (classes, a);
371 /* The interval REG didn't get a hardreg. Spill it or one of those
372 from ACTIVE (if the latter, then REG will become allocated to the
373 hardreg that formerly was used by it). */
375 static void
376 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
378 int cl = m_reg_class_for_type (reg->m_type);
379 gcc_assert (!active[cl].is_empty ());
380 hsa_op_reg *cand = active[cl][0];
381 if (cand->m_lr_end > reg->m_lr_end)
383 reg->m_reg_class = cand->m_reg_class;
384 reg->m_hard_num = cand->m_hard_num;
385 active[cl].ordered_remove (0);
386 unsigned place = active[cl].lower_bound (reg, cmp_end);
387 active[cl].quick_insert (place, reg);
389 else
390 cand = reg;
392 gcc_assert (!cand->m_spill_sym);
393 BrigType16_t type = cand->m_type;
394 if (type == BRIG_TYPE_B1)
395 type = BRIG_TYPE_U8;
396 cand->m_reg_class = 0;
397 cand->m_spill_sym = hsa_get_spill_symbol (type);
398 cand->m_spill_sym->m_name_number = cand->m_order;
401 /* Given the global register state CLASSES allocate all HSA virtual
402 registers either to hardregs or to a spill symbol. */
404 static void
405 linear_scan_regalloc (struct m_reg_class_desc *classes)
407 /* Compute liveness. */
408 bool changed;
409 int i, n;
410 int insn_order;
411 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
412 bitmap work = BITMAP_ALLOC (NULL);
413 vec<hsa_op_reg*> ind2reg = vNULL;
414 vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
415 hsa_insn_basic *m_last_insn;
417 /* We will need the reverse post order for linearization,
418 and the post order for liveness analysis, which is the same
419 backward. */
420 n = pre_and_rev_post_order_compute (NULL, bbs, true);
421 ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
423 /* Give all instructions a linearized number, at the same time
424 build a mapping from register index to register. */
425 insn_order = 1;
426 for (i = 0; i < n; i++)
428 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
429 hsa_bb *hbb = hsa_bb_for_bb (bb);
430 hsa_insn_basic *insn;
431 for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
433 unsigned opi;
434 insn->m_number = insn_order++;
435 for (opi = 0; opi < insn->operand_count (); opi++)
437 gcc_checking_assert (insn->get_op (opi));
438 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
439 if (regaddr)
440 ind2reg[(*regaddr)->m_order] = *regaddr;
445 /* Initialize all live ranges to [after-end, 0). */
446 for (i = 0; i < hsa_cfun->m_reg_count; i++)
447 if (ind2reg[i])
448 ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
450 /* Classic liveness analysis, as long as something changes:
451 m_liveout is union (m_livein of successors)
452 m_livein is m_liveout minus defs plus uses. */
455 changed = false;
456 for (i = n - 1; i >= 0; i--)
458 edge e;
459 edge_iterator ei;
460 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
461 hsa_bb *hbb = hsa_bb_for_bb (bb);
463 /* Union of successors m_livein (or empty if none). */
464 bool first = true;
465 FOR_EACH_EDGE (e, ei, bb->succs)
466 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
468 hsa_bb *succ = hsa_bb_for_bb (e->dest);
469 if (first)
471 bitmap_copy (work, succ->m_livein);
472 first = false;
474 else
475 bitmap_ior_into (work, succ->m_livein);
477 if (first)
478 bitmap_clear (work);
480 bitmap_copy (hbb->m_liveout, work);
482 /* Remove defs, include uses in a backward insn walk. */
483 hsa_insn_basic *insn;
484 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
486 unsigned opi;
487 unsigned ndefs = insn->input_count ();
488 for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
490 gcc_checking_assert (insn->get_op (opi));
491 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
492 if (regaddr)
493 bitmap_clear_bit (work, (*regaddr)->m_order);
495 for (; opi < insn->operand_count (); opi++)
497 gcc_checking_assert (insn->get_op (opi));
498 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
499 if (regaddr)
500 bitmap_set_bit (work, (*regaddr)->m_order);
504 /* Note if that changed something. */
505 if (bitmap_ior_into (hbb->m_livein, work))
506 changed = true;
509 while (changed);
511 /* Make one pass through all instructions in linear order,
512 noting and merging possible live range start and end points. */
513 m_last_insn = NULL;
514 for (i = n - 1; i >= 0; i--)
516 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
517 hsa_bb *hbb = hsa_bb_for_bb (bb);
518 hsa_insn_basic *insn;
519 int after_end_number;
520 unsigned bit;
521 bitmap_iterator bi;
523 if (m_last_insn)
524 after_end_number = m_last_insn->m_number;
525 else
526 after_end_number = insn_order;
527 /* Everything live-out in this BB has at least an end point
528 after us. */
529 EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
530 note_lr_end (ind2reg[bit], after_end_number);
532 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
534 unsigned opi;
535 unsigned ndefs = insn->input_count ();
536 for (opi = 0; opi < insn->operand_count (); opi++)
538 gcc_checking_assert (insn->get_op (opi));
539 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
540 if (regaddr)
542 hsa_op_reg *reg = *regaddr;
543 if (opi < ndefs)
544 note_lr_begin (reg, insn->m_number);
545 else
546 note_lr_end (reg, insn->m_number);
551 /* Everything live-in in this BB has a start point before
552 our first insn. */
553 int before_start_number;
554 if (hbb->m_first_insn)
555 before_start_number = hbb->m_first_insn->m_number;
556 else
557 before_start_number = after_end_number;
558 before_start_number--;
559 EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
560 note_lr_begin (ind2reg[bit], before_start_number);
562 if (hbb->m_first_insn)
563 m_last_insn = hbb->m_first_insn;
566 for (i = 0; i < hsa_cfun->m_reg_count; i++)
567 if (ind2reg[i])
569 /* All regs that have still their start at after all code actually
570 are defined at the start of the routine (prologue). */
571 if (ind2reg[i]->m_lr_begin == insn_order)
572 ind2reg[i]->m_lr_begin = 0;
573 /* All regs that have no use but a def will have lr_end == 0,
574 they are actually live from def until after the insn they are
575 defined in. */
576 if (ind2reg[i]->m_lr_end == 0)
577 ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
580 /* Sort all intervals by increasing start point. */
581 gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
583 if (flag_checking)
584 for (unsigned i = 0; i < ind2reg.length (); i++)
585 gcc_assert (ind2reg[i]);
587 ind2reg.qsort (cmp_begin);
588 for (i = 0; i < 4; i++)
589 active[i].reserve_exact (hsa_cfun->m_reg_count);
591 /* Now comes the linear scan allocation. */
592 for (i = 0; i < hsa_cfun->m_reg_count; i++)
594 hsa_op_reg *reg = ind2reg[i];
595 if (!reg)
596 continue;
597 expire_old_intervals (reg, active, classes);
598 int cl = m_reg_class_for_type (reg->m_type);
599 if (try_alloc_reg (classes, reg) >= 0)
601 unsigned place = active[cl].lower_bound (reg, cmp_end);
602 active[cl].quick_insert (place, reg);
604 else
605 spill_at_interval (reg, active);
607 /* Some interesting dumping as we go. */
608 if (dump_file && (dump_flags & TDF_DETAILS))
610 fprintf (dump_file, " reg%d: [%5d, %5d)->",
611 reg->m_order, reg->m_lr_begin, reg->m_lr_end);
612 if (reg->m_reg_class)
613 fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
614 else
615 fprintf (dump_file, "[%%__%s_%i]",
616 hsa_seg_name (reg->m_spill_sym->m_segment),
617 reg->m_spill_sym->m_name_number);
618 for (int cl = 0; cl < 4; cl++)
620 bool first = true;
621 hsa_op_reg *r;
622 fprintf (dump_file, " {");
623 for (int j = 0; active[cl].iterate (j, &r); j++)
624 if (first)
626 fprintf (dump_file, "%d", r->m_order);
627 first = false;
629 else
630 fprintf (dump_file, ", %d", r->m_order);
631 fprintf (dump_file, "}");
633 fprintf (dump_file, "\n");
637 BITMAP_FREE (work);
638 free (bbs);
640 if (dump_file && (dump_flags & TDF_DETAILS))
642 fprintf (dump_file, "------- After liveness: -------\n");
643 dump_hsa_cfun_regalloc (dump_file);
644 fprintf (dump_file, " ----- Intervals:\n");
645 for (i = 0; i < hsa_cfun->m_reg_count; i++)
647 hsa_op_reg *reg = ind2reg[i];
648 if (!reg)
649 continue;
650 fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
651 reg->m_lr_begin, reg->m_lr_end);
652 if (reg->m_reg_class)
653 fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
654 else
655 fprintf (dump_file, "[%%__%s_%i]\n",
656 hsa_seg_name (reg->m_spill_sym->m_segment),
657 reg->m_spill_sym->m_name_number);
661 for (i = 0; i < 4; i++)
662 active[i].release ();
663 ind2reg.release ();
666 /* Entry point for register allocation. */
668 static void
669 regalloc (void)
671 basic_block bb;
672 m_reg_class_desc classes[4];
674 /* If there are no registers used in the function, exit right away. */
675 if (hsa_cfun->m_reg_count == 0)
676 return;
678 memset (classes, 0, sizeof (classes));
679 classes[0].next_avail = 0;
680 classes[0].max_num = 7;
681 classes[0].cl_char = 'c';
682 classes[1].cl_char = 's';
683 classes[2].cl_char = 'd';
684 classes[3].cl_char = 'q';
686 for (int i = 1; i < 4; i++)
688 classes[i].next_avail = 0;
689 classes[i].max_num = 20;
692 linear_scan_regalloc (classes);
694 FOR_ALL_BB_FN (bb, cfun)
695 rewrite_code_bb (bb, classes);
698 /* Out of SSA and register allocation on HSAIL IL. */
700 void
701 hsa_regalloc (void)
703 hsa_cfun->update_dominance ();
704 naive_outof_ssa ();
706 if (dump_file && (dump_flags & TDF_DETAILS))
708 fprintf (dump_file, "------- After out-of-SSA: -------\n");
709 dump_hsa_cfun (dump_file);
712 regalloc ();
714 if (dump_file && (dump_flags & TDF_DETAILS))
716 fprintf (dump_file, "------- After register allocation: -------\n");
717 dump_hsa_cfun (dump_file);