1 /* HSAIL IL Register allocation and out-of-SSA.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Michael Matz <matz@suse.de>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "dominance.h"
29 #include "basic-block.h"
36 #include "print-tree.h"
38 #include "symbol-summary.h"
39 #include "hsa-common.h"
42 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
45 naive_process_phi (hsa_insn_phi
*phi
)
47 unsigned count
= phi
->operand_count ();
48 for (unsigned i
= 0; i
< count
; i
++)
50 gcc_checking_assert (phi
->get_op (i
));
51 hsa_op_base
*op
= phi
->get_op (i
);
58 e
= EDGE_PRED (phi
->m_bb
, i
);
59 if (single_succ_p (e
->src
))
60 hbb
= hsa_bb_for_bb (e
->src
);
63 basic_block old_dest
= e
->dest
;
64 hbb
= hsa_init_new_bb (split_edge (e
));
66 /* If switch insn used this edge, fix jump table. */
67 hsa_bb
*source
= hsa_bb_for_bb (e
->src
);
69 if (source
->m_last_insn
70 && (sbr
= dyn_cast
<hsa_insn_sbr
*> (source
->m_last_insn
)))
71 sbr
->replace_all_labels (old_dest
, hbb
->m_bb
);
74 hsa_build_append_simple_mov (phi
->m_dest
, op
, hbb
);
78 /* Naive out-of SSA. */
81 naive_outof_ssa (void)
85 hsa_cfun
->m_in_ssa
= false;
87 FOR_ALL_BB_FN (bb
, cfun
)
89 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
92 for (phi
= hbb
->m_first_phi
;
94 phi
= phi
->m_next
? as_a
<hsa_insn_phi
*> (phi
->m_next
) : NULL
)
95 naive_process_phi (phi
);
97 /* Zap PHI nodes, they will be deallocated when everything else will. */
98 hbb
->m_first_phi
= NULL
;
99 hbb
->m_last_phi
= NULL
;
103 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
104 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
105 and 3 for 'q' 128 bit class. */
108 m_reg_class_for_type (BrigType16_t type
)
128 case BRIG_TYPE_U16X2
:
129 case BRIG_TYPE_S16X2
:
130 case BRIG_TYPE_F16X2
:
139 case BRIG_TYPE_U16X4
:
140 case BRIG_TYPE_S16X4
:
141 case BRIG_TYPE_F16X4
:
142 case BRIG_TYPE_U32X2
:
143 case BRIG_TYPE_S32X2
:
144 case BRIG_TYPE_F32X2
:
148 case BRIG_TYPE_U8X16
:
149 case BRIG_TYPE_S8X16
:
150 case BRIG_TYPE_U16X8
:
151 case BRIG_TYPE_S16X8
:
152 case BRIG_TYPE_F16X8
:
153 case BRIG_TYPE_U32X4
:
154 case BRIG_TYPE_U64X2
:
155 case BRIG_TYPE_S32X4
:
156 case BRIG_TYPE_S64X2
:
157 case BRIG_TYPE_F32X4
:
158 case BRIG_TYPE_F64X2
:
166 /* If the Ith operands of INSN is or contains a register (in an address),
167 return the address of that register operand. If not return NULL. */
170 insn_reg_addr (hsa_insn_basic
*insn
, int i
)
172 hsa_op_base
*op
= insn
->get_op (i
);
175 hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
);
177 return (hsa_op_reg
**) insn
->get_op_addr (i
);
178 hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
);
179 if (addr
&& addr
->m_reg
)
184 struct m_reg_class_desc
186 unsigned next_avail
, max_num
;
187 unsigned used_num
, max_used
;
192 /* Rewrite the instructions in BB to observe spilled live ranges.
193 CLASSES is the global register class state. */
196 rewrite_code_bb (basic_block bb
, struct m_reg_class_desc
*classes
)
198 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
199 hsa_insn_basic
*insn
, *next_insn
;
201 for (insn
= hbb
->m_first_insn
; insn
; insn
= next_insn
)
203 next_insn
= insn
->m_next
;
204 unsigned count
= insn
->operand_count ();
205 for (unsigned i
= 0; i
< count
; i
++)
207 gcc_checking_assert (insn
->get_op (i
));
208 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, i
);
212 hsa_op_reg
*reg
= *regaddr
;
213 if (reg
->m_reg_class
)
215 gcc_assert (reg
->m_spill_sym
);
217 int cl
= m_reg_class_for_type (reg
->m_type
);
218 hsa_op_reg
*tmp
, *tmp2
;
219 if (insn
->op_output_p (i
))
220 tmp
= hsa_spill_out (insn
, reg
, &tmp2
);
222 tmp
= hsa_spill_in (insn
, reg
, &tmp2
);
226 tmp
->m_reg_class
= classes
[cl
].cl_char
;
227 tmp
->m_hard_num
= (char) (classes
[cl
].max_num
+ i
);
230 gcc_assert (cl
== 0);
231 tmp2
->m_reg_class
= classes
[1].cl_char
;
232 tmp2
->m_hard_num
= (char) (classes
[1].max_num
+ i
);
239 /* Dump current function to dump file F, with info specific
240 to register allocation. */
243 dump_hsa_cfun_regalloc (FILE *f
)
247 fprintf (f
, "\nHSAIL IL for %s\n", hsa_cfun
->m_name
);
249 FOR_ALL_BB_FN (bb
, cfun
)
251 hsa_bb
*hbb
= (struct hsa_bb
*) bb
->aux
;
252 bitmap_print (dump_file
, hbb
->m_livein
, "m_livein ", "\n");
253 dump_hsa_bb (f
, hbb
);
254 bitmap_print (dump_file
, hbb
->m_liveout
, "m_liveout ", "\n");
258 /* Given the global register allocation state CLASSES and a
259 register REG, try to give it a hardware register. If successful,
260 store that hardreg in REG and return it, otherwise return -1.
261 Also changes CLASSES to accommodate for the allocated register. */
264 try_alloc_reg (struct m_reg_class_desc
*classes
, hsa_op_reg
*reg
)
266 int cl
= m_reg_class_for_type (reg
->m_type
);
268 if (classes
[1].used_num
+ classes
[2].used_num
* 2 + classes
[3].used_num
* 4
271 if (classes
[cl
].used_num
< classes
[cl
].max_num
)
274 classes
[cl
].used_num
++;
275 if (classes
[cl
].used_num
> classes
[cl
].max_used
)
276 classes
[cl
].max_used
= classes
[cl
].used_num
;
277 for (i
= 0; i
< classes
[cl
].used_num
; i
++)
278 if (! (classes
[cl
].used
[i
/ 64] & (((uint64_t)1) << (i
& 63))))
281 classes
[cl
].used
[i
/ 64] |= (((uint64_t)1) << (i
& 63));
282 reg
->m_reg_class
= classes
[cl
].cl_char
;
288 /* Free up hardregs used by REG, into allocation state CLASSES. */
291 free_reg (struct m_reg_class_desc
*classes
, hsa_op_reg
*reg
)
293 int cl
= m_reg_class_for_type (reg
->m_type
);
294 int ret
= reg
->m_hard_num
;
295 gcc_assert (reg
->m_reg_class
== classes
[cl
].cl_char
);
296 classes
[cl
].used_num
--;
297 classes
[cl
].used
[ret
/ 64] &= ~(((uint64_t)1) << (ret
& 63));
300 /* Note that the live range for REG ends at least at END. */
303 note_lr_end (hsa_op_reg
*reg
, int end
)
305 if (reg
->m_lr_end
< end
)
309 /* Note that the live range for REG starts at least at BEGIN. */
312 note_lr_begin (hsa_op_reg
*reg
, int begin
)
314 if (reg
->m_lr_begin
> begin
)
315 reg
->m_lr_begin
= begin
;
318 /* Given two registers A and B, return -1, 0 or 1 if A's live range
319 starts before, at or after B's live range. */
322 cmp_begin (const void *a
, const void *b
)
324 const hsa_op_reg
* const *rega
= (const hsa_op_reg
* const *)a
;
325 const hsa_op_reg
* const *regb
= (const hsa_op_reg
* const *)b
;
329 ret
= (*rega
)->m_lr_begin
- (*regb
)->m_lr_begin
;
332 return ((*rega
)->m_order
- (*regb
)->m_order
);
335 /* Given two registers REGA and REGB, return true if REGA's
336 live range ends after REGB's. This results in a sorting order
337 with earlier end points at the end. */
340 cmp_end (hsa_op_reg
* const ®a
, hsa_op_reg
* const ®b
)
345 ret
= (regb
)->m_lr_end
- (rega
)->m_lr_end
;
348 return (((regb
)->m_order
- (rega
)->m_order
)) < 0;
351 /* Expire all old intervals in ACTIVE (a per-regclass vector),
352 that is, those that end before the interval REG starts. Give
353 back resources freed so into the state CLASSES. */
356 expire_old_intervals (hsa_op_reg
*reg
, vec
<hsa_op_reg
*> *active
,
357 struct m_reg_class_desc
*classes
)
359 for (int i
= 0; i
< 4; i
++)
360 while (!active
[i
].is_empty ())
362 hsa_op_reg
*a
= active
[i
].pop ();
363 if (a
->m_lr_end
> reg
->m_lr_begin
)
365 active
[i
].quick_push (a
);
368 free_reg (classes
, a
);
372 /* The interval REG didn't get a hardreg. Spill it or one of those
373 from ACTIVE (if the latter, then REG will become allocated to the
374 hardreg that formerly was used by it). */
377 spill_at_interval (hsa_op_reg
*reg
, vec
<hsa_op_reg
*> *active
)
379 int cl
= m_reg_class_for_type (reg
->m_type
);
380 gcc_assert (!active
[cl
].is_empty ());
381 hsa_op_reg
*cand
= active
[cl
][0];
382 if (cand
->m_lr_end
> reg
->m_lr_end
)
384 reg
->m_reg_class
= cand
->m_reg_class
;
385 reg
->m_hard_num
= cand
->m_hard_num
;
386 active
[cl
].ordered_remove (0);
387 unsigned place
= active
[cl
].lower_bound (reg
, cmp_end
);
388 active
[cl
].quick_insert (place
, reg
);
393 gcc_assert (!cand
->m_spill_sym
);
394 BrigType16_t type
= cand
->m_type
;
395 if (type
== BRIG_TYPE_B1
)
397 cand
->m_reg_class
= 0;
398 cand
->m_spill_sym
= hsa_get_spill_symbol (type
);
399 cand
->m_spill_sym
->m_name_number
= cand
->m_order
;
402 /* Given the global register state CLASSES allocate all HSA virtual
403 registers either to hardregs or to a spill symbol. */
406 linear_scan_regalloc (struct m_reg_class_desc
*classes
)
408 /* Compute liveness. */
412 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
413 bitmap work
= BITMAP_ALLOC (NULL
);
414 vec
<hsa_op_reg
*> ind2reg
= vNULL
;
415 vec
<hsa_op_reg
*> active
[4] = {vNULL
, vNULL
, vNULL
, vNULL
};
416 hsa_insn_basic
*m_last_insn
;
418 /* We will need the reverse post order for linearization,
419 and the post order for liveness analysis, which is the same
421 n
= pre_and_rev_post_order_compute (NULL
, bbs
, true);
422 ind2reg
.safe_grow_cleared (hsa_cfun
->m_reg_count
);
424 /* Give all instructions a linearized number, at the same time
425 build a mapping from register index to register. */
427 for (i
= 0; i
< n
; i
++)
429 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
430 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
431 hsa_insn_basic
*insn
;
432 for (insn
= hbb
->m_first_insn
; insn
; insn
= insn
->m_next
)
435 insn
->m_number
= insn_order
++;
436 for (opi
= 0; opi
< insn
->operand_count (); opi
++)
438 gcc_checking_assert (insn
->get_op (opi
));
439 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
441 ind2reg
[(*regaddr
)->m_order
] = *regaddr
;
446 /* Initialize all live ranges to [after-end, 0). */
447 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
449 ind2reg
[i
]->m_lr_begin
= insn_order
, ind2reg
[i
]->m_lr_end
= 0;
451 /* Classic liveness analysis, as long as something changes:
452 m_liveout is union (m_livein of successors)
453 m_livein is m_liveout minus defs plus uses. */
457 for (i
= n
- 1; i
>= 0; i
--)
461 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
462 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
464 /* Union of successors m_livein (or empty if none). */
466 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
467 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
469 hsa_bb
*succ
= hsa_bb_for_bb (e
->dest
);
472 bitmap_copy (work
, succ
->m_livein
);
476 bitmap_ior_into (work
, succ
->m_livein
);
481 bitmap_copy (hbb
->m_liveout
, work
);
483 /* Remove defs, include uses in a backward insn walk. */
484 hsa_insn_basic
*insn
;
485 for (insn
= hbb
->m_last_insn
; insn
; insn
= insn
->m_prev
)
488 unsigned ndefs
= insn
->input_count ();
489 for (opi
= 0; opi
< ndefs
&& insn
->get_op (opi
); opi
++)
491 gcc_checking_assert (insn
->get_op (opi
));
492 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
494 bitmap_clear_bit (work
, (*regaddr
)->m_order
);
496 for (; opi
< insn
->operand_count (); opi
++)
498 gcc_checking_assert (insn
->get_op (opi
));
499 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
501 bitmap_set_bit (work
, (*regaddr
)->m_order
);
505 /* Note if that changed something. */
506 if (bitmap_ior_into (hbb
->m_livein
, work
))
512 /* Make one pass through all instructions in linear order,
513 noting and merging possible live range start and end points. */
515 for (i
= n
- 1; i
>= 0; i
--)
517 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
518 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
519 hsa_insn_basic
*insn
;
520 int after_end_number
;
525 after_end_number
= m_last_insn
->m_number
;
527 after_end_number
= insn_order
;
528 /* Everything live-out in this BB has at least an end point
530 EXECUTE_IF_SET_IN_BITMAP (hbb
->m_liveout
, 0, bit
, bi
)
531 note_lr_end (ind2reg
[bit
], after_end_number
);
533 for (insn
= hbb
->m_last_insn
; insn
; insn
= insn
->m_prev
)
536 unsigned ndefs
= insn
->input_count ();
537 for (opi
= 0; opi
< insn
->operand_count (); opi
++)
539 gcc_checking_assert (insn
->get_op (opi
));
540 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
543 hsa_op_reg
*reg
= *regaddr
;
545 note_lr_begin (reg
, insn
->m_number
);
547 note_lr_end (reg
, insn
->m_number
);
552 /* Everything live-in in this BB has a start point before
554 int before_start_number
;
555 if (hbb
->m_first_insn
)
556 before_start_number
= hbb
->m_first_insn
->m_number
;
558 before_start_number
= after_end_number
;
559 before_start_number
--;
560 EXECUTE_IF_SET_IN_BITMAP (hbb
->m_livein
, 0, bit
, bi
)
561 note_lr_begin (ind2reg
[bit
], before_start_number
);
563 if (hbb
->m_first_insn
)
564 m_last_insn
= hbb
->m_first_insn
;
567 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
570 /* All regs that have still their start at after all code actually
571 are defined at the start of the routine (prologue). */
572 if (ind2reg
[i
]->m_lr_begin
== insn_order
)
573 ind2reg
[i
]->m_lr_begin
= 0;
574 /* All regs that have no use but a def will have lr_end == 0,
575 they are actually live from def until after the insn they are
577 if (ind2reg
[i
]->m_lr_end
== 0)
578 ind2reg
[i
]->m_lr_end
= ind2reg
[i
]->m_lr_begin
+ 1;
581 /* Sort all intervals by increasing start point. */
582 gcc_assert (ind2reg
.length () == (size_t) hsa_cfun
->m_reg_count
);
585 for (unsigned i
= 0; i
< ind2reg
.length (); i
++)
586 gcc_assert (ind2reg
[i
]);
588 ind2reg
.qsort (cmp_begin
);
589 for (i
= 0; i
< 4; i
++)
590 active
[i
].reserve_exact (hsa_cfun
->m_reg_count
);
592 /* Now comes the linear scan allocation. */
593 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
595 hsa_op_reg
*reg
= ind2reg
[i
];
598 expire_old_intervals (reg
, active
, classes
);
599 int cl
= m_reg_class_for_type (reg
->m_type
);
600 if (try_alloc_reg (classes
, reg
) >= 0)
602 unsigned place
= active
[cl
].lower_bound (reg
, cmp_end
);
603 active
[cl
].quick_insert (place
, reg
);
606 spill_at_interval (reg
, active
);
608 /* Some interesting dumping as we go. */
609 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
611 fprintf (dump_file
, " reg%d: [%5d, %5d)->",
612 reg
->m_order
, reg
->m_lr_begin
, reg
->m_lr_end
);
613 if (reg
->m_reg_class
)
614 fprintf (dump_file
, "$%c%i", reg
->m_reg_class
, reg
->m_hard_num
);
616 fprintf (dump_file
, "[%%__%s_%i]",
617 hsa_seg_name (reg
->m_spill_sym
->m_segment
),
618 reg
->m_spill_sym
->m_name_number
);
619 for (int cl
= 0; cl
< 4; cl
++)
623 fprintf (dump_file
, " {");
624 for (int j
= 0; active
[cl
].iterate (j
, &r
); j
++)
627 fprintf (dump_file
, "%d", r
->m_order
);
631 fprintf (dump_file
, ", %d", r
->m_order
);
632 fprintf (dump_file
, "}");
634 fprintf (dump_file
, "\n");
641 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
643 fprintf (dump_file
, "------- After liveness: -------\n");
644 dump_hsa_cfun_regalloc (dump_file
);
645 fprintf (dump_file
, " ----- Intervals:\n");
646 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
648 hsa_op_reg
*reg
= ind2reg
[i
];
651 fprintf (dump_file
, " reg%d: [%5d, %5d)->", reg
->m_order
,
652 reg
->m_lr_begin
, reg
->m_lr_end
);
653 if (reg
->m_reg_class
)
654 fprintf (dump_file
, "$%c%i\n", reg
->m_reg_class
, reg
->m_hard_num
);
656 fprintf (dump_file
, "[%%__%s_%i]\n",
657 hsa_seg_name (reg
->m_spill_sym
->m_segment
),
658 reg
->m_spill_sym
->m_name_number
);
662 for (i
= 0; i
< 4; i
++)
663 active
[i
].release ();
667 /* Entry point for register allocation. */
673 m_reg_class_desc classes
[4];
675 /* If there are no registers used in the function, exit right away. */
676 if (hsa_cfun
->m_reg_count
== 0)
679 memset (classes
, 0, sizeof (classes
));
680 classes
[0].next_avail
= 0;
681 classes
[0].max_num
= 7;
682 classes
[0].cl_char
= 'c';
683 classes
[1].cl_char
= 's';
684 classes
[2].cl_char
= 'd';
685 classes
[3].cl_char
= 'q';
687 for (int i
= 1; i
< 4; i
++)
689 classes
[i
].next_avail
= 0;
690 classes
[i
].max_num
= 20;
693 linear_scan_regalloc (classes
);
695 FOR_ALL_BB_FN (bb
, cfun
)
696 rewrite_code_bb (bb
, classes
);
699 /* Out of SSA and register allocation on HSAIL IL. */
704 hsa_cfun
->update_dominance ();
707 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
709 fprintf (dump_file
, "------- After out-of-SSA: -------\n");
710 dump_hsa_cfun (dump_file
);
715 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
717 fprintf (dump_file
, "------- After register allocation: -------\n");
718 dump_hsa_cfun (dump_file
);