1 /* HSAIL IL Register allocation and out-of-SSA.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Michael Matz <matz@suse.de>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "dominance.h"
35 #include "print-tree.h"
37 #include "symbol-summary.h"
41 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
44 naive_process_phi (hsa_insn_phi
*phi
)
46 unsigned count
= phi
->operand_count ();
47 for (unsigned i
= 0; i
< count
; i
++)
49 gcc_checking_assert (phi
->get_op (i
));
50 hsa_op_base
*op
= phi
->get_op (i
);
57 e
= EDGE_PRED (phi
->m_bb
, i
);
58 if (single_succ_p (e
->src
))
59 hbb
= hsa_bb_for_bb (e
->src
);
62 basic_block old_dest
= e
->dest
;
63 hbb
= hsa_init_new_bb (split_edge (e
));
65 /* If switch insn used this edge, fix jump table. */
66 hsa_bb
*source
= hsa_bb_for_bb (e
->src
);
68 if (source
->m_last_insn
69 && (sbr
= dyn_cast
<hsa_insn_sbr
*> (source
->m_last_insn
)))
70 sbr
->replace_all_labels (old_dest
, hbb
->m_bb
);
73 hsa_build_append_simple_mov (phi
->m_dest
, op
, hbb
);
77 /* Naive out-of SSA. */
80 naive_outof_ssa (void)
84 hsa_cfun
->m_in_ssa
= false;
86 FOR_ALL_BB_FN (bb
, cfun
)
88 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
91 for (phi
= hbb
->m_first_phi
;
93 phi
= phi
->m_next
? as_a
<hsa_insn_phi
*> (phi
->m_next
) : NULL
)
94 naive_process_phi (phi
);
96 /* Zap PHI nodes, they will be deallocated when everything else will. */
97 hbb
->m_first_phi
= NULL
;
98 hbb
->m_last_phi
= NULL
;
102 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
103 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
104 and 3 for 'q' 128 bit class. */
107 m_reg_class_for_type (BrigType16_t type
)
127 case BRIG_TYPE_U16X2
:
128 case BRIG_TYPE_S16X2
:
129 case BRIG_TYPE_F16X2
:
138 case BRIG_TYPE_U16X4
:
139 case BRIG_TYPE_S16X4
:
140 case BRIG_TYPE_F16X4
:
141 case BRIG_TYPE_U32X2
:
142 case BRIG_TYPE_S32X2
:
143 case BRIG_TYPE_F32X2
:
147 case BRIG_TYPE_U8X16
:
148 case BRIG_TYPE_S8X16
:
149 case BRIG_TYPE_U16X8
:
150 case BRIG_TYPE_S16X8
:
151 case BRIG_TYPE_F16X8
:
152 case BRIG_TYPE_U32X4
:
153 case BRIG_TYPE_U64X2
:
154 case BRIG_TYPE_S32X4
:
155 case BRIG_TYPE_S64X2
:
156 case BRIG_TYPE_F32X4
:
157 case BRIG_TYPE_F64X2
:
165 /* If the Ith operands of INSN is or contains a register (in an address),
166 return the address of that register operand. If not return NULL. */
169 insn_reg_addr (hsa_insn_basic
*insn
, int i
)
171 hsa_op_base
*op
= insn
->get_op (i
);
174 hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
);
176 return (hsa_op_reg
**) insn
->get_op_addr (i
);
177 hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
);
178 if (addr
&& addr
->m_reg
)
183 struct m_reg_class_desc
185 unsigned next_avail
, max_num
;
186 unsigned used_num
, max_used
;
191 /* Rewrite the instructions in BB to observe spilled live ranges.
192 CLASSES is the global register class state. */
195 rewrite_code_bb (basic_block bb
, struct m_reg_class_desc
*classes
)
197 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
198 hsa_insn_basic
*insn
, *next_insn
;
200 for (insn
= hbb
->m_first_insn
; insn
; insn
= next_insn
)
202 next_insn
= insn
->m_next
;
203 unsigned count
= insn
->operand_count ();
204 for (unsigned i
= 0; i
< count
; i
++)
206 gcc_checking_assert (insn
->get_op (i
));
207 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, i
);
211 hsa_op_reg
*reg
= *regaddr
;
212 if (reg
->m_reg_class
)
214 gcc_assert (reg
->m_spill_sym
);
216 int cl
= m_reg_class_for_type (reg
->m_type
);
217 hsa_op_reg
*tmp
, *tmp2
;
218 if (insn
->op_output_p (i
))
219 tmp
= hsa_spill_out (insn
, reg
, &tmp2
);
221 tmp
= hsa_spill_in (insn
, reg
, &tmp2
);
225 tmp
->m_reg_class
= classes
[cl
].cl_char
;
226 tmp
->m_hard_num
= (char) (classes
[cl
].max_num
+ i
);
229 gcc_assert (cl
== 0);
230 tmp2
->m_reg_class
= classes
[1].cl_char
;
231 tmp2
->m_hard_num
= (char) (classes
[1].max_num
+ i
);
238 /* Dump current function to dump file F, with info specific
239 to register allocation. */
242 dump_hsa_cfun_regalloc (FILE *f
)
246 fprintf (f
, "\nHSAIL IL for %s\n", hsa_cfun
->m_name
);
248 FOR_ALL_BB_FN (bb
, cfun
)
250 hsa_bb
*hbb
= (struct hsa_bb
*) bb
->aux
;
251 bitmap_print (dump_file
, hbb
->m_livein
, "m_livein ", "\n");
252 dump_hsa_bb (f
, hbb
);
253 bitmap_print (dump_file
, hbb
->m_liveout
, "m_liveout ", "\n");
257 /* Given the global register allocation state CLASSES and a
258 register REG, try to give it a hardware register. If successful,
259 store that hardreg in REG and return it, otherwise return -1.
260 Also changes CLASSES to accommodate for the allocated register. */
263 try_alloc_reg (struct m_reg_class_desc
*classes
, hsa_op_reg
*reg
)
265 int cl
= m_reg_class_for_type (reg
->m_type
);
267 if (classes
[1].used_num
+ classes
[2].used_num
* 2 + classes
[3].used_num
* 4
270 if (classes
[cl
].used_num
< classes
[cl
].max_num
)
273 classes
[cl
].used_num
++;
274 if (classes
[cl
].used_num
> classes
[cl
].max_used
)
275 classes
[cl
].max_used
= classes
[cl
].used_num
;
276 for (i
= 0; i
< classes
[cl
].used_num
; i
++)
277 if (! (classes
[cl
].used
[i
/ 64] & (((uint64_t)1) << (i
& 63))))
280 classes
[cl
].used
[i
/ 64] |= (((uint64_t)1) << (i
& 63));
281 reg
->m_reg_class
= classes
[cl
].cl_char
;
287 /* Free up hardregs used by REG, into allocation state CLASSES. */
290 free_reg (struct m_reg_class_desc
*classes
, hsa_op_reg
*reg
)
292 int cl
= m_reg_class_for_type (reg
->m_type
);
293 int ret
= reg
->m_hard_num
;
294 gcc_assert (reg
->m_reg_class
== classes
[cl
].cl_char
);
295 classes
[cl
].used_num
--;
296 classes
[cl
].used
[ret
/ 64] &= ~(((uint64_t)1) << (ret
& 63));
299 /* Note that the live range for REG ends at least at END. */
302 note_lr_end (hsa_op_reg
*reg
, int end
)
304 if (reg
->m_lr_end
< end
)
308 /* Note that the live range for REG starts at least at BEGIN. */
311 note_lr_begin (hsa_op_reg
*reg
, int begin
)
313 if (reg
->m_lr_begin
> begin
)
314 reg
->m_lr_begin
= begin
;
317 /* Given two registers A and B, return -1, 0 or 1 if A's live range
318 starts before, at or after B's live range. */
321 cmp_begin (const void *a
, const void *b
)
323 const hsa_op_reg
* const *rega
= (const hsa_op_reg
* const *)a
;
324 const hsa_op_reg
* const *regb
= (const hsa_op_reg
* const *)b
;
328 ret
= (*rega
)->m_lr_begin
- (*regb
)->m_lr_begin
;
331 return ((*rega
)->m_order
- (*regb
)->m_order
);
334 /* Given two registers REGA and REGB, return true if REGA's
335 live range ends after REGB's. This results in a sorting order
336 with earlier end points at the end. */
339 cmp_end (hsa_op_reg
* const ®a
, hsa_op_reg
* const ®b
)
344 ret
= (regb
)->m_lr_end
- (rega
)->m_lr_end
;
347 return (((regb
)->m_order
- (rega
)->m_order
)) < 0;
350 /* Expire all old intervals in ACTIVE (a per-regclass vector),
351 that is, those that end before the interval REG starts. Give
352 back resources freed so into the state CLASSES. */
355 expire_old_intervals (hsa_op_reg
*reg
, vec
<hsa_op_reg
*> *active
,
356 struct m_reg_class_desc
*classes
)
358 for (int i
= 0; i
< 4; i
++)
359 while (!active
[i
].is_empty ())
361 hsa_op_reg
*a
= active
[i
].pop ();
362 if (a
->m_lr_end
> reg
->m_lr_begin
)
364 active
[i
].quick_push (a
);
367 free_reg (classes
, a
);
371 /* The interval REG didn't get a hardreg. Spill it or one of those
372 from ACTIVE (if the latter, then REG will become allocated to the
373 hardreg that formerly was used by it). */
376 spill_at_interval (hsa_op_reg
*reg
, vec
<hsa_op_reg
*> *active
)
378 int cl
= m_reg_class_for_type (reg
->m_type
);
379 gcc_assert (!active
[cl
].is_empty ());
380 hsa_op_reg
*cand
= active
[cl
][0];
381 if (cand
->m_lr_end
> reg
->m_lr_end
)
383 reg
->m_reg_class
= cand
->m_reg_class
;
384 reg
->m_hard_num
= cand
->m_hard_num
;
385 active
[cl
].ordered_remove (0);
386 unsigned place
= active
[cl
].lower_bound (reg
, cmp_end
);
387 active
[cl
].quick_insert (place
, reg
);
392 gcc_assert (!cand
->m_spill_sym
);
393 BrigType16_t type
= cand
->m_type
;
394 if (type
== BRIG_TYPE_B1
)
396 cand
->m_reg_class
= 0;
397 cand
->m_spill_sym
= hsa_get_spill_symbol (type
);
398 cand
->m_spill_sym
->m_name_number
= cand
->m_order
;
401 /* Given the global register state CLASSES allocate all HSA virtual
402 registers either to hardregs or to a spill symbol. */
405 linear_scan_regalloc (struct m_reg_class_desc
*classes
)
407 /* Compute liveness. */
411 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
412 bitmap work
= BITMAP_ALLOC (NULL
);
413 vec
<hsa_op_reg
*> ind2reg
= vNULL
;
414 vec
<hsa_op_reg
*> active
[4] = {vNULL
, vNULL
, vNULL
, vNULL
};
415 hsa_insn_basic
*m_last_insn
;
417 /* We will need the reverse post order for linearization,
418 and the post order for liveness analysis, which is the same
420 n
= pre_and_rev_post_order_compute (NULL
, bbs
, true);
421 ind2reg
.safe_grow_cleared (hsa_cfun
->m_reg_count
);
423 /* Give all instructions a linearized number, at the same time
424 build a mapping from register index to register. */
426 for (i
= 0; i
< n
; i
++)
428 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
429 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
430 hsa_insn_basic
*insn
;
431 for (insn
= hbb
->m_first_insn
; insn
; insn
= insn
->m_next
)
434 insn
->m_number
= insn_order
++;
435 for (opi
= 0; opi
< insn
->operand_count (); opi
++)
437 gcc_checking_assert (insn
->get_op (opi
));
438 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
440 ind2reg
[(*regaddr
)->m_order
] = *regaddr
;
445 /* Initialize all live ranges to [after-end, 0). */
446 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
448 ind2reg
[i
]->m_lr_begin
= insn_order
, ind2reg
[i
]->m_lr_end
= 0;
450 /* Classic liveness analysis, as long as something changes:
451 m_liveout is union (m_livein of successors)
452 m_livein is m_liveout minus defs plus uses. */
456 for (i
= n
- 1; i
>= 0; i
--)
460 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
461 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
463 /* Union of successors m_livein (or empty if none). */
465 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
466 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
468 hsa_bb
*succ
= hsa_bb_for_bb (e
->dest
);
471 bitmap_copy (work
, succ
->m_livein
);
475 bitmap_ior_into (work
, succ
->m_livein
);
480 bitmap_copy (hbb
->m_liveout
, work
);
482 /* Remove defs, include uses in a backward insn walk. */
483 hsa_insn_basic
*insn
;
484 for (insn
= hbb
->m_last_insn
; insn
; insn
= insn
->m_prev
)
487 unsigned ndefs
= insn
->input_count ();
488 for (opi
= 0; opi
< ndefs
&& insn
->get_op (opi
); opi
++)
490 gcc_checking_assert (insn
->get_op (opi
));
491 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
493 bitmap_clear_bit (work
, (*regaddr
)->m_order
);
495 for (; opi
< insn
->operand_count (); opi
++)
497 gcc_checking_assert (insn
->get_op (opi
));
498 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
500 bitmap_set_bit (work
, (*regaddr
)->m_order
);
504 /* Note if that changed something. */
505 if (bitmap_ior_into (hbb
->m_livein
, work
))
511 /* Make one pass through all instructions in linear order,
512 noting and merging possible live range start and end points. */
514 for (i
= n
- 1; i
>= 0; i
--)
516 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, bbs
[i
]);
517 hsa_bb
*hbb
= hsa_bb_for_bb (bb
);
518 hsa_insn_basic
*insn
;
519 int after_end_number
;
524 after_end_number
= m_last_insn
->m_number
;
526 after_end_number
= insn_order
;
527 /* Everything live-out in this BB has at least an end point
529 EXECUTE_IF_SET_IN_BITMAP (hbb
->m_liveout
, 0, bit
, bi
)
530 note_lr_end (ind2reg
[bit
], after_end_number
);
532 for (insn
= hbb
->m_last_insn
; insn
; insn
= insn
->m_prev
)
535 unsigned ndefs
= insn
->input_count ();
536 for (opi
= 0; opi
< insn
->operand_count (); opi
++)
538 gcc_checking_assert (insn
->get_op (opi
));
539 hsa_op_reg
**regaddr
= insn_reg_addr (insn
, opi
);
542 hsa_op_reg
*reg
= *regaddr
;
544 note_lr_begin (reg
, insn
->m_number
);
546 note_lr_end (reg
, insn
->m_number
);
551 /* Everything live-in in this BB has a start point before
553 int before_start_number
;
554 if (hbb
->m_first_insn
)
555 before_start_number
= hbb
->m_first_insn
->m_number
;
557 before_start_number
= after_end_number
;
558 before_start_number
--;
559 EXECUTE_IF_SET_IN_BITMAP (hbb
->m_livein
, 0, bit
, bi
)
560 note_lr_begin (ind2reg
[bit
], before_start_number
);
562 if (hbb
->m_first_insn
)
563 m_last_insn
= hbb
->m_first_insn
;
566 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
569 /* All regs that have still their start at after all code actually
570 are defined at the start of the routine (prologue). */
571 if (ind2reg
[i
]->m_lr_begin
== insn_order
)
572 ind2reg
[i
]->m_lr_begin
= 0;
573 /* All regs that have no use but a def will have lr_end == 0,
574 they are actually live from def until after the insn they are
576 if (ind2reg
[i
]->m_lr_end
== 0)
577 ind2reg
[i
]->m_lr_end
= ind2reg
[i
]->m_lr_begin
+ 1;
580 /* Sort all intervals by increasing start point. */
581 gcc_assert (ind2reg
.length () == (size_t) hsa_cfun
->m_reg_count
);
584 for (unsigned i
= 0; i
< ind2reg
.length (); i
++)
585 gcc_assert (ind2reg
[i
]);
587 ind2reg
.qsort (cmp_begin
);
588 for (i
= 0; i
< 4; i
++)
589 active
[i
].reserve_exact (hsa_cfun
->m_reg_count
);
591 /* Now comes the linear scan allocation. */
592 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
594 hsa_op_reg
*reg
= ind2reg
[i
];
597 expire_old_intervals (reg
, active
, classes
);
598 int cl
= m_reg_class_for_type (reg
->m_type
);
599 if (try_alloc_reg (classes
, reg
) >= 0)
601 unsigned place
= active
[cl
].lower_bound (reg
, cmp_end
);
602 active
[cl
].quick_insert (place
, reg
);
605 spill_at_interval (reg
, active
);
607 /* Some interesting dumping as we go. */
608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
610 fprintf (dump_file
, " reg%d: [%5d, %5d)->",
611 reg
->m_order
, reg
->m_lr_begin
, reg
->m_lr_end
);
612 if (reg
->m_reg_class
)
613 fprintf (dump_file
, "$%c%i", reg
->m_reg_class
, reg
->m_hard_num
);
615 fprintf (dump_file
, "[%%__%s_%i]",
616 hsa_seg_name (reg
->m_spill_sym
->m_segment
),
617 reg
->m_spill_sym
->m_name_number
);
618 for (int cl
= 0; cl
< 4; cl
++)
622 fprintf (dump_file
, " {");
623 for (int j
= 0; active
[cl
].iterate (j
, &r
); j
++)
626 fprintf (dump_file
, "%d", r
->m_order
);
630 fprintf (dump_file
, ", %d", r
->m_order
);
631 fprintf (dump_file
, "}");
633 fprintf (dump_file
, "\n");
640 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
642 fprintf (dump_file
, "------- After liveness: -------\n");
643 dump_hsa_cfun_regalloc (dump_file
);
644 fprintf (dump_file
, " ----- Intervals:\n");
645 for (i
= 0; i
< hsa_cfun
->m_reg_count
; i
++)
647 hsa_op_reg
*reg
= ind2reg
[i
];
650 fprintf (dump_file
, " reg%d: [%5d, %5d)->", reg
->m_order
,
651 reg
->m_lr_begin
, reg
->m_lr_end
);
652 if (reg
->m_reg_class
)
653 fprintf (dump_file
, "$%c%i\n", reg
->m_reg_class
, reg
->m_hard_num
);
655 fprintf (dump_file
, "[%%__%s_%i]\n",
656 hsa_seg_name (reg
->m_spill_sym
->m_segment
),
657 reg
->m_spill_sym
->m_name_number
);
661 for (i
= 0; i
< 4; i
++)
662 active
[i
].release ();
666 /* Entry point for register allocation. */
672 m_reg_class_desc classes
[4];
674 /* If there are no registers used in the function, exit right away. */
675 if (hsa_cfun
->m_reg_count
== 0)
678 memset (classes
, 0, sizeof (classes
));
679 classes
[0].next_avail
= 0;
680 classes
[0].max_num
= 7;
681 classes
[0].cl_char
= 'c';
682 classes
[1].cl_char
= 's';
683 classes
[2].cl_char
= 'd';
684 classes
[3].cl_char
= 'q';
686 for (int i
= 1; i
< 4; i
++)
688 classes
[i
].next_avail
= 0;
689 classes
[i
].max_num
= 20;
692 linear_scan_regalloc (classes
);
694 FOR_ALL_BB_FN (bb
, cfun
)
695 rewrite_code_bb (bb
, classes
);
698 /* Out of SSA and register allocation on HSAIL IL. */
703 hsa_cfun
->update_dominance ();
706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
708 fprintf (dump_file
, "------- After out-of-SSA: -------\n");
709 dump_hsa_cfun (dump_file
);
714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
716 fprintf (dump_file
, "------- After register allocation: -------\n");
717 dump_hsa_cfun (dump_file
);