1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
26 - Backward demanded info fusion within block.
28 - Lazy code motion (LCM) based demanded info backward propagation.
30 - RTL_SSA framework for def-use, PHI analysis.
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
38 This pass consists of 5 phases:
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
46 - Phase 3 - LCM Earliest-edge baseed VSETVL demand fusion.
48 - Phase 4 - Lazy code motion including: compute local properties,
49 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
52 used any more and VL operand of VSETVL instruction if it is not used by
53 any non-debug instructions.
55 - Phase 6 - DF based post VSETVL optimizations.
59 - The subroutine of optimize == 0 is simple_vsetvl.
60 This function simplily vsetvl insertion for each RVV
61 instruction. No optimization.
63 - The subroutine of optimize > 0 is lazy_vsetvl.
64 This function optimize vsetvl insertion process by
65 lazy code motion (LCM) layering on RTL_SSA.
67 - get_avl (), get_insn (), get_avl_source ():
69 1. get_insn () is the current instruction, find_access (get_insn
70 ())->def is the same as get_avl_source () if get_insn () demand VL.
71 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
73 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
76 #define IN_TARGET_CODE 1
77 #define INCLUDE_ALGORITHM
78 #define INCLUDE_FUNCTIONAL
82 #include "coretypes.h"
87 #include "tree-pass.h"
90 #include "cfgcleanup.h"
91 #include "insn-config.h"
92 #include "insn-attr.h"
93 #include "insn-opinit.h"
94 #include "tm-constrs.h"
99 #include "profile-count.h"
101 #include "riscv-vsetvl.h"
103 using namespace rtl_ssa
;
104 using namespace riscv_vector
;
106 static CONSTEXPR
const unsigned ALL_SEW
[] = {8, 16, 32, 64};
107 static CONSTEXPR
const vlmul_type ALL_LMUL
[]
108 = {LMUL_1
, LMUL_2
, LMUL_4
, LMUL_8
, LMUL_F8
, LMUL_F4
, LMUL_F2
};
111 debug (const vector_insn_info
*info
)
117 debug (const vector_infos_manager
*info
)
125 return x
&& rtx_equal_p (x
, RVV_VLMAX
);
129 vlmax_avl_insn_p (rtx_insn
*rinsn
)
131 return (INSN_CODE (rinsn
) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn
) == CODE_FOR_vlmax_avldi
);
135 /* Return true if the block is a loop itself:
145 loop_basic_block_p (const basic_block cfg_bb
)
147 if (JUMP_P (BB_END (cfg_bb
)) && any_condjump_p (BB_END (cfg_bb
)))
151 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
152 if (e
->dest
->index
== cfg_bb
->index
)
158 /* Return true if it is an RVV instruction depends on VTYPE global
161 has_vtype_op (rtx_insn
*rinsn
)
163 return recog_memoized (rinsn
) >= 0 && get_attr_has_vtype_op (rinsn
);
166 /* Return true if it is an RVV instruction depends on VL global
169 has_vl_op (rtx_insn
*rinsn
)
171 return recog_memoized (rinsn
) >= 0 && get_attr_has_vl_op (rinsn
);
174 /* Is this a SEW value that can be encoded into the VTYPE format. */
176 valid_sew_p (size_t sew
)
178 return exact_log2 (sew
) && sew
>= 8 && sew
<= 64;
181 /* Return true if the instruction ignores VLMUL field of VTYPE. */
183 ignore_vlmul_insn_p (rtx_insn
*rinsn
)
185 return get_attr_type (rinsn
) == TYPE_VIMOVVX
186 || get_attr_type (rinsn
) == TYPE_VFMOVVF
187 || get_attr_type (rinsn
) == TYPE_VIMOVXV
188 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
191 /* Return true if the instruction is scalar move instruction. */
193 scalar_move_insn_p (rtx_insn
*rinsn
)
195 return get_attr_type (rinsn
) == TYPE_VIMOVXV
196 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
199 /* Return true if the instruction is fault first load instruction. */
201 fault_first_load_p (rtx_insn
*rinsn
)
203 return recog_memoized (rinsn
) >= 0
204 && (get_attr_type (rinsn
) == TYPE_VLDFF
205 || get_attr_type (rinsn
) == TYPE_VLSEGDFF
);
208 /* Return true if the instruction is read vl instruction. */
210 read_vl_insn_p (rtx_insn
*rinsn
)
212 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_RDVL
;
215 /* Return true if it is a vsetvl instruction. */
217 vector_config_insn_p (rtx_insn
*rinsn
)
219 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_VSETVL
;
222 /* Return true if it is vsetvldi or vsetvlsi. */
224 vsetvl_insn_p (rtx_insn
*rinsn
)
226 if (!vector_config_insn_p (rinsn
))
228 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvldi
229 || INSN_CODE (rinsn
) == CODE_FOR_vsetvlsi
);
232 /* Return true if it is vsetvl zero, rs1. */
234 vsetvl_discard_result_insn_p (rtx_insn
*rinsn
)
236 if (!vector_config_insn_p (rinsn
))
238 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultdi
239 || INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultsi
);
242 /* Return true if it is vsetvl zero, zero. */
244 vsetvl_vtype_change_only_p (rtx_insn
*rinsn
)
246 if (!vector_config_insn_p (rinsn
))
248 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvl_vtype_change_only
);
252 after_or_same_p (const insn_info
*insn1
, const insn_info
*insn2
)
254 return insn1
->compare_with (insn2
) >= 0;
258 real_insn_and_same_bb_p (const insn_info
*insn
, const bb_info
*bb
)
260 return insn
!= nullptr && insn
->is_real () && insn
->bb () == bb
;
264 before_p (const insn_info
*insn1
, const insn_info
*insn2
)
266 return insn1
->compare_with (insn2
) < 0;
269 /* Helper function to get VL operand. */
271 get_vl (rtx_insn
*rinsn
)
273 if (has_vl_op (rinsn
))
275 extract_insn_cached (rinsn
);
276 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
278 return SET_DEST (XVECEXP (PATTERN (rinsn
), 0, 0));
281 /* An "anticipatable occurrence" is one that is the first occurrence in the
282 basic block, the operands are not modified in the basic block prior
283 to the occurrence and the output is not used between the start of
284 the block and the occurrence.
286 For VSETVL instruction, we have these following formats:
291 So base on these circumstances, a DEM is considered as a local anticipatable
292 occurrence should satisfy these following conditions:
294 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
295 2). rd (vl) are not modified in the basic block prior to the VSETVL.
296 3). rd (vl) is not used between the start of the block and the occurrence.
298 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
299 is modified prior to the occurrence. This case is already considered as
300 a non-local anticipatable occurrence.
303 anticipatable_occurrence_p (const bb_info
*bb
, const vector_insn_info dem
)
305 insn_info
*insn
= dem
.get_insn ();
306 /* The only possible operand we care of VSETVL is AVL. */
307 if (dem
.has_avl_reg ())
309 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
311 = has_vl_op (insn
->rtl ()) ? get_vl (insn
->rtl ()) : dem
.get_avl ();
314 gcc_assert (!vsetvl_insn_p (insn
->rtl ()));
316 /* Earliest VSETVL will be inserted at the end of the block. */
317 for (const insn_info
*i
: bb
->real_nondebug_insns ())
319 /* rs1 (avl) are not modified in the basic block prior to the
321 if (find_access (i
->defs (), REGNO (avl
)))
323 if (vlmax_avl_p (dem
.get_avl ()))
325 /* rd (avl) is not used between the start of the block and
326 the occurrence. Note: Only for Dirty and VLMAX-avl. */
327 if (find_access (i
->uses (), REGNO (avl
)))
334 else if (!vlmax_avl_p (avl
))
336 set_info
*set
= dem
.get_avl_source ();
337 /* If it's undefined, it's not anticipatable conservatively. */
340 if (real_insn_and_same_bb_p (set
->insn (), bb
)
341 && before_p (set
->insn (), insn
))
343 for (insn_info
*i
= insn
->prev_nondebug_insn ();
344 real_insn_and_same_bb_p (i
, bb
); i
= i
->prev_nondebug_insn ())
346 /* rs1 (avl) are not modified in the basic block prior to the
348 if (find_access (i
->defs (), REGNO (avl
)))
354 /* rd (vl) is not used between the start of the block and the occurrence. */
355 if (vsetvl_insn_p (insn
->rtl ()))
357 rtx dest
= get_vl (insn
->rtl ());
358 for (insn_info
*i
= insn
->prev_nondebug_insn ();
359 real_insn_and_same_bb_p (i
, bb
); i
= i
->prev_nondebug_insn ())
361 /* rd (vl) is not used between the start of the block and the
363 if (find_access (i
->uses (), REGNO (dest
)))
365 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
366 if (find_access (i
->defs (), REGNO (dest
)))
374 /* An "available occurrence" is one that is the last occurrence in the
375 basic block and the operands are not modified by following statements in
376 the basic block [including this insn].
378 For VSETVL instruction, we have these following formats:
383 So base on these circumstances, a DEM is considered as a local available
384 occurrence should satisfy these following conditions:
386 1). rs1 (avl) are not modified by following statements in
388 2). rd (vl) are not modified by following statements in
391 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
392 is modified prior to the occurrence. This case is already considered as
393 a non-local available occurrence.
396 available_occurrence_p (const bb_info
*bb
, const vector_insn_info dem
)
398 insn_info
*insn
= dem
.get_insn ();
399 /* The only possible operand we care of VSETVL is AVL. */
400 if (dem
.has_avl_reg ())
402 if (!vlmax_avl_p (dem
.get_avl ()))
406 if (vsetvl_insn_p (insn
->rtl ()))
408 dest
= get_vl (insn
->rtl ());
409 /* For user vsetvl a2, a2 instruction, we consider it as
410 available even though it modifies "a2". */
411 i
= i
->next_nondebug_insn ();
413 for (; real_insn_and_same_bb_p (i
, bb
); i
= i
->next_nondebug_insn ())
415 if (read_vl_insn_p (i
->rtl ()))
417 /* rs1 (avl) are not modified by following statements in
419 if (find_access (i
->defs (), REGNO (dem
.get_avl ())))
421 /* rd (vl) are not modified by following statements in
423 if (dest
&& find_access (i
->defs (), REGNO (dest
)))
432 insn_should_be_added_p (const insn_info
*insn
, unsigned int types
)
434 if (insn
->is_real () && (types
& REAL_SET
))
436 if (insn
->is_phi () && (types
& PHI_SET
))
438 if (insn
->is_bb_head () && (types
& BB_HEAD_SET
))
440 if (insn
->is_bb_end () && (types
& BB_END_SET
))
445 /* Recursively find all define instructions. The kind of instruction is
446 specified by the DEF_TYPE. */
447 static hash_set
<set_info
*>
448 get_all_sets (phi_info
*phi
, unsigned int types
)
450 hash_set
<set_info
*> insns
;
451 auto_vec
<phi_info
*> work_list
;
452 hash_set
<phi_info
*> visited_list
;
454 return hash_set
<set_info
*> ();
455 work_list
.safe_push (phi
);
457 while (!work_list
.is_empty ())
459 phi_info
*phi
= work_list
.pop ();
460 visited_list
.add (phi
);
461 for (use_info
*use
: phi
->inputs ())
463 def_info
*def
= use
->def ();
464 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
466 return hash_set
<set_info
*> ();
468 gcc_assert (!set
->insn ()->is_debug_insn ());
470 if (insn_should_be_added_p (set
->insn (), types
))
472 if (set
->insn ()->is_phi ())
474 phi_info
*new_phi
= as_a
<phi_info
*> (set
);
475 if (!visited_list
.contains (new_phi
))
476 work_list
.safe_push (new_phi
);
483 static hash_set
<set_info
*>
484 get_all_sets (set_info
*set
, bool /* get_real_inst */ real_p
,
485 bool /*get_phi*/ phi_p
, bool /* get_function_parameter*/ param_p
)
487 if (real_p
&& phi_p
&& param_p
)
488 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
489 REAL_SET
| PHI_SET
| BB_HEAD_SET
| BB_END_SET
);
491 else if (real_p
&& param_p
)
492 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
493 REAL_SET
| BB_HEAD_SET
| BB_END_SET
);
496 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
), REAL_SET
);
497 return hash_set
<set_info
*> ();
500 /* Helper function to get AVL operand. */
502 get_avl (rtx_insn
*rinsn
)
504 if (vsetvl_insn_p (rinsn
) || vsetvl_discard_result_insn_p (rinsn
))
505 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn
), 0, 0)), 0, 0);
507 if (!has_vl_op (rinsn
))
509 if (get_attr_avl_type (rinsn
) == VLMAX
)
511 extract_insn_cached (rinsn
);
512 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
516 get_same_bb_set (hash_set
<set_info
*> &sets
, const basic_block cfg_bb
)
518 for (set_info
*set
: sets
)
519 if (set
->bb ()->cfg_bb () == cfg_bb
)
524 /* Helper function to get SEW operand. We always have SEW value for
525 all RVV instructions that have VTYPE OP. */
527 get_sew (rtx_insn
*rinsn
)
529 return get_attr_sew (rinsn
);
532 /* Helper function to get VLMUL operand. We always have VLMUL value for
533 all RVV instructions that have VTYPE OP. */
534 static enum vlmul_type
535 get_vlmul (rtx_insn
*rinsn
)
537 return (enum vlmul_type
) get_attr_vlmul (rinsn
);
540 /* Get default tail policy. */
544 /* For the instruction that doesn't require TA, we still need a default value
545 to emit vsetvl. We pick up the default value according to prefer policy. */
546 return (bool) (get_prefer_tail_policy () & 0x1
547 || (get_prefer_tail_policy () >> 1 & 0x1));
550 /* Get default mask policy. */
554 /* For the instruction that doesn't require MA, we still need a default value
555 to emit vsetvl. We pick up the default value according to prefer policy. */
556 return (bool) (get_prefer_mask_policy () & 0x1
557 || (get_prefer_mask_policy () >> 1 & 0x1));
560 /* Helper function to get TA operand. */
562 tail_agnostic_p (rtx_insn
*rinsn
)
564 /* If it doesn't have TA, we return agnostic by default. */
565 extract_insn_cached (rinsn
);
566 int ta
= get_attr_ta (rinsn
);
567 return ta
== INVALID_ATTRIBUTE
? get_default_ta () : IS_AGNOSTIC (ta
);
570 /* Helper function to get MA operand. */
572 mask_agnostic_p (rtx_insn
*rinsn
)
574 /* If it doesn't have MA, we return agnostic by default. */
575 extract_insn_cached (rinsn
);
576 int ma
= get_attr_ma (rinsn
);
577 return ma
== INVALID_ATTRIBUTE
? get_default_ma () : IS_AGNOSTIC (ma
);
580 /* Return true if FN has a vector instruction that use VL/VTYPE. */
582 has_vector_insn (function
*fn
)
586 FOR_ALL_BB_FN (cfg_bb
, fn
)
587 FOR_BB_INSNS (cfg_bb
, rinsn
)
588 if (NONDEBUG_INSN_P (rinsn
) && has_vtype_op (rinsn
))
593 /* Emit vsetvl instruction. */
595 gen_vsetvl_pat (enum vsetvl_type insn_type
, const vl_vtype_info
&info
, rtx vl
)
597 rtx avl
= info
.get_avl ();
598 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
599 set the value of avl to (const_int 0) so that VSETVL PASS will
600 insert vsetvl correctly.*/
601 if (info
.has_avl_no_reg ())
603 rtx sew
= gen_int_mode (info
.get_sew (), Pmode
);
604 rtx vlmul
= gen_int_mode (info
.get_vlmul (), Pmode
);
605 rtx ta
= gen_int_mode (info
.get_ta (), Pmode
);
606 rtx ma
= gen_int_mode (info
.get_ma (), Pmode
);
608 if (insn_type
== VSETVL_NORMAL
)
610 gcc_assert (vl
!= NULL_RTX
);
611 return gen_vsetvl (Pmode
, vl
, avl
, sew
, vlmul
, ta
, ma
);
613 else if (insn_type
== VSETVL_VTYPE_CHANGE_ONLY
)
614 return gen_vsetvl_vtype_change_only (sew
, vlmul
, ta
, ma
);
616 return gen_vsetvl_discard_result (Pmode
, avl
, sew
, vlmul
, ta
, ma
);
620 gen_vsetvl_pat (rtx_insn
*rinsn
, const vector_insn_info
&info
,
624 vl_vtype_info new_info
= info
;
625 if (info
.get_insn () && info
.get_insn ()->rtl ()
626 && fault_first_load_p (info
.get_insn ()->rtl ()))
627 new_info
.set_avl_info (
628 avl_info (get_avl (info
.get_insn ()->rtl ()), nullptr));
630 new_pat
= gen_vsetvl_pat (VSETVL_NORMAL
, new_info
, vl
);
633 if (vsetvl_insn_p (rinsn
))
634 new_pat
= gen_vsetvl_pat (VSETVL_NORMAL
, new_info
, get_vl (rinsn
));
635 else if (INSN_CODE (rinsn
) == CODE_FOR_vsetvl_vtype_change_only
)
636 new_pat
= gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY
, new_info
, NULL_RTX
);
638 new_pat
= gen_vsetvl_pat (VSETVL_DISCARD_RESULT
, new_info
, NULL_RTX
);
644 emit_vsetvl_insn (enum vsetvl_type insn_type
, enum emit_type emit_type
,
645 const vl_vtype_info
&info
, rtx vl
, rtx_insn
*rinsn
)
647 rtx pat
= gen_vsetvl_pat (insn_type
, info
, vl
);
650 fprintf (dump_file
, "\nInsert vsetvl insn PATTERN:\n");
651 print_rtl_single (dump_file
, pat
);
652 fprintf (dump_file
, "\nfor insn:\n");
653 print_rtl_single (dump_file
, rinsn
);
656 if (emit_type
== EMIT_DIRECT
)
658 else if (emit_type
== EMIT_BEFORE
)
659 emit_insn_before (pat
, rinsn
);
661 emit_insn_after (pat
, rinsn
);
665 eliminate_insn (rtx_insn
*rinsn
)
669 fprintf (dump_file
, "\nEliminate insn %d:\n", INSN_UID (rinsn
));
670 print_rtl_single (dump_file
, rinsn
);
672 if (in_sequence_p ())
679 insert_vsetvl (enum emit_type emit_type
, rtx_insn
*rinsn
,
680 const vector_insn_info
&info
, const vector_insn_info
&prev_info
)
682 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
684 if (prev_info
.valid_or_dirty_p () && !prev_info
.unknown_p ()
685 && info
.compatible_avl_p (prev_info
) && info
.same_vlmax_p (prev_info
))
687 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY
, emit_type
, info
, NULL_RTX
,
689 return VSETVL_VTYPE_CHANGE_ONLY
;
692 if (info
.has_avl_imm ())
694 emit_vsetvl_insn (VSETVL_DISCARD_RESULT
, emit_type
, info
, NULL_RTX
,
696 return VSETVL_DISCARD_RESULT
;
699 if (info
.has_avl_no_reg ())
701 /* We can only use x0, x0 if there's no chance of the vtype change causing
702 the previous vl to become invalid. */
703 if (prev_info
.valid_or_dirty_p () && !prev_info
.unknown_p ()
704 && info
.same_vlmax_p (prev_info
))
706 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY
, emit_type
, info
, NULL_RTX
,
708 return VSETVL_VTYPE_CHANGE_ONLY
;
710 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
711 vl_vtype_info new_info
= info
;
712 new_info
.set_avl_info (avl_info (const0_rtx
, nullptr));
713 emit_vsetvl_insn (VSETVL_DISCARD_RESULT
, emit_type
, new_info
, NULL_RTX
,
715 return VSETVL_DISCARD_RESULT
;
718 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
719 opcode if the AVLReg is X0 as they have different register classes for
721 if (vlmax_avl_p (info
.get_avl ()))
723 gcc_assert (has_vtype_op (rinsn
) || vsetvl_insn_p (rinsn
));
724 /* For user vsetvli a5, zero, we should use get_vl to get the VL
726 rtx vl_op
= info
.get_avl_or_vl_reg ();
727 gcc_assert (!vlmax_avl_p (vl_op
));
728 emit_vsetvl_insn (VSETVL_NORMAL
, emit_type
, info
, vl_op
, rinsn
);
729 return VSETVL_NORMAL
;
732 emit_vsetvl_insn (VSETVL_DISCARD_RESULT
, emit_type
, info
, NULL_RTX
, rinsn
);
736 fprintf (dump_file
, "Update VL/VTYPE info, previous info=");
737 prev_info
.dump (dump_file
);
739 return VSETVL_DISCARD_RESULT
;
742 /* Get VL/VTYPE information for INSN. */
744 get_vl_vtype_info (const insn_info
*insn
)
746 set_info
*set
= nullptr;
747 rtx avl
= ::get_avl (insn
->rtl ());
748 if (avl
&& REG_P (avl
))
750 if (vlmax_avl_p (avl
) && has_vl_op (insn
->rtl ()))
752 = find_access (insn
->uses (), REGNO (get_vl (insn
->rtl ())))->def ();
753 else if (!vlmax_avl_p (avl
))
754 set
= find_access (insn
->uses (), REGNO (avl
))->def ();
759 uint8_t sew
= get_sew (insn
->rtl ());
760 enum vlmul_type vlmul
= get_vlmul (insn
->rtl ());
761 uint8_t ratio
= get_attr_ratio (insn
->rtl ());
762 /* when get_attr_ratio is invalid, this kind of instructions
763 doesn't care about ratio. However, we still need this value
764 in demand info backward analysis. */
765 if (ratio
== INVALID_ATTRIBUTE
)
766 ratio
= calculate_ratio (sew
, vlmul
);
767 bool ta
= tail_agnostic_p (insn
->rtl ());
768 bool ma
= mask_agnostic_p (insn
->rtl ());
770 /* If merge operand is undef value, we prefer agnostic. */
771 int merge_op_idx
= get_attr_merge_op_idx (insn
->rtl ());
772 if (merge_op_idx
!= INVALID_ATTRIBUTE
773 && satisfies_constraint_vu (recog_data
.operand
[merge_op_idx
]))
779 vl_vtype_info
info (avl_info (avl
, set
), sew
, vlmul
, ratio
, ta
, ma
);
783 /* Change insn and Assert the change always happens. */
785 validate_change_or_fail (rtx object
, rtx
*loc
, rtx new_rtx
, bool in_group
)
787 bool change_p
= validate_change (object
, loc
, new_rtx
, in_group
);
788 gcc_assert (change_p
);
792 change_insn (rtx_insn
*rinsn
, rtx new_pat
)
794 /* We don't apply change on RTL_SSA here since it's possible a
795 new INSN we add in the PASS before which doesn't have RTL_SSA
799 fprintf (dump_file
, "\nChange PATTERN of insn %d from:\n",
801 print_rtl_single (dump_file
, PATTERN (rinsn
));
804 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, false);
808 fprintf (dump_file
, "\nto:\n");
809 print_rtl_single (dump_file
, PATTERN (rinsn
));
813 static const insn_info
*
814 get_forward_read_vl_insn (const insn_info
*insn
)
816 const bb_info
*bb
= insn
->bb ();
817 for (const insn_info
*i
= insn
->next_nondebug_insn ();
818 real_insn_and_same_bb_p (i
, bb
); i
= i
->next_nondebug_insn ())
820 if (find_access (i
->defs (), VL_REGNUM
))
822 if (read_vl_insn_p (i
->rtl ()))
828 static const insn_info
*
829 get_backward_fault_first_load_insn (const insn_info
*insn
)
831 const bb_info
*bb
= insn
->bb ();
832 for (const insn_info
*i
= insn
->prev_nondebug_insn ();
833 real_insn_and_same_bb_p (i
, bb
); i
= i
->prev_nondebug_insn ())
835 if (fault_first_load_p (i
->rtl ()))
837 if (find_access (i
->defs (), VL_REGNUM
))
844 change_insn (function_info
*ssa
, insn_change change
, insn_info
*insn
,
847 rtx_insn
*rinsn
= insn
->rtl ();
848 auto attempt
= ssa
->new_change_attempt ();
849 if (!restrict_movement (change
))
854 fprintf (dump_file
, "\nChange PATTERN of insn %d from:\n",
856 print_rtl_single (dump_file
, PATTERN (rinsn
));
859 insn_change_watermark watermark
;
860 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, true);
862 /* These routines report failures themselves. */
863 if (!recog (attempt
, change
) || !change_is_worthwhile (change
, false))
867 (insn 12 34 13 2 (set (reg:RVVM4DI 120 v24 [orig:134 _1 ] [134])
868 (if_then_else:RVVM4DI (unspec:RVVMF8BI [
869 (const_vector:RVVMF8BI repeat [
873 (const_int 2 [0x2]) repeated x2
878 (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
879 (sign_extend:RVVM4DI (vec_duplicate:RVVM4SI (reg:SI 15 a5
880 [140])))) (unspec:RVVM4DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF)))
881 "rvv.c":8:12 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
882 (mem/c:RVVM4DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
883 (expr_list:REG_EQUAL (if_then_else:RVVM4DI (unspec:RVVMF8BI [
884 (const_vector:RVVMF8BI repeat [
887 (reg/v:DI 13 a3 [orig:139 vl ] [139])
888 (const_int 2 [0x2]) repeated x2
893 (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
894 (const_vector:RVVM4DI repeat [
895 (const_int 2730 [0xaaa])
901 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
902 "a3" which made us fail in change_insn. We reference to the
903 'aarch64-cc-fusion.cc' and add this method. */
904 remove_reg_equal_equiv_notes (rinsn
);
905 confirm_change_group ();
906 ssa
->change_insn (change
);
910 fprintf (dump_file
, "\nto:\n");
911 print_rtl_single (dump_file
, PATTERN (rinsn
));
917 change_vsetvl_insn (const insn_info
*insn
, const vector_insn_info
&info
,
921 if (vector_config_insn_p (insn
->rtl ()))
923 rinsn
= insn
->rtl ();
924 gcc_assert (vsetvl_insn_p (rinsn
) && "Can't handle X0, rs1 vsetvli yet");
928 gcc_assert (has_vtype_op (insn
->rtl ()));
929 rinsn
= PREV_INSN (insn
->rtl ());
930 gcc_assert (vector_config_insn_p (rinsn
));
932 rtx new_pat
= gen_vsetvl_pat (rinsn
, info
, vl
);
933 change_insn (rinsn
, new_pat
);
937 avl_source_has_vsetvl_p (set_info
*avl_source
)
941 if (!avl_source
->insn ())
943 if (avl_source
->insn ()->is_real ())
944 return vsetvl_insn_p (avl_source
->insn ()->rtl ());
945 hash_set
<set_info
*> sets
= get_all_sets (avl_source
, true, false, true);
946 for (const auto set
: sets
)
948 if (set
->insn ()->is_real () && vsetvl_insn_p (set
->insn ()->rtl ()))
955 source_equal_p (insn_info
*insn1
, insn_info
*insn2
)
957 if (!insn1
|| !insn2
)
959 rtx_insn
*rinsn1
= insn1
->rtl ();
960 rtx_insn
*rinsn2
= insn2
->rtl ();
961 if (!rinsn1
|| !rinsn2
)
963 rtx note1
= find_reg_equal_equiv_note (rinsn1
);
964 rtx note2
= find_reg_equal_equiv_note (rinsn2
);
965 rtx single_set1
= single_set (rinsn1
);
966 rtx single_set2
= single_set (rinsn2
);
967 if (read_vl_insn_p (rinsn1
) && read_vl_insn_p (rinsn2
))
969 const insn_info
*load1
= get_backward_fault_first_load_insn (insn1
);
970 const insn_info
*load2
= get_backward_fault_first_load_insn (insn2
);
971 return load1
&& load2
&& load1
== load2
;
974 if (note1
&& note2
&& rtx_equal_p (note1
, note2
))
977 /* Since vsetvl instruction is not single SET.
978 We handle this case specially here. */
979 if (vsetvl_insn_p (insn1
->rtl ()) && vsetvl_insn_p (insn2
->rtl ()))
983 RVV 1 (use a6 as AVL)
985 RVV 2 (use a5 as AVL)
986 We consider AVL of RVV 1 and RVV 2 are same so that we can
987 gain more optimization opportunities.
989 Note: insn1_info.compatible_avl_p (insn2_info)
990 will make sure there is no instruction between vsetvl1 and vsetvl2
991 modify a5 since their def will be different if there is instruction
992 modify a5 and compatible_avl_p will return false. */
993 vector_insn_info insn1_info
, insn2_info
;
994 insn1_info
.parse_insn (insn1
);
995 insn2_info
.parse_insn (insn2
);
997 /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli
998 instructions which will complicate the situation. */
999 if (avl_source_has_vsetvl_p (insn1_info
.get_avl_source ())
1000 || avl_source_has_vsetvl_p (insn2_info
.get_avl_source ()))
1003 if (insn1_info
.same_vlmax_p (insn2_info
)
1004 && insn1_info
.compatible_avl_p (insn2_info
))
1008 /* We only handle AVL is set by instructions with no side effects. */
1009 if (!single_set1
|| !single_set2
)
1011 if (!rtx_equal_p (SET_SRC (single_set1
), SET_SRC (single_set2
)))
1013 /* RTL_SSA uses include REG_NOTE. Consider this following case:
1016 (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159])
1017 (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1018 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1019 (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1020 (const_int 2 [0x2]))
1022 The RTL_SSA uses of this instruction has 2 uses:
1023 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice.
1024 2. (reg:DI 14 a4 [276]) - once.
1027 (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160])
1028 (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199])
1029 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1030 (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200])
1031 (const_int 2 [0x2]))
1033 The RTL_SSA uses of this instruction has 3 uses:
1034 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once
1035 2. (reg:DI 14 a4 [276]) - once
1036 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once
1038 Return false when insn1->uses ().size () != insn2->uses ().size ()
1040 if (insn1
->uses ().size () != insn2
->uses ().size ())
1042 for (size_t i
= 0; i
< insn1
->uses ().size (); i
++)
1043 if (insn1
->uses ()[i
] != insn2
->uses ()[i
])
1048 /* Helper function to get single same real RTL source.
1049 return NULL if it is not a single real RTL source. */
1051 extract_single_source (set_info
*set
)
1055 if (set
->insn ()->is_real ())
1056 return set
->insn ();
1057 if (!set
->insn ()->is_phi ())
1059 hash_set
<set_info
*> sets
= get_all_sets (set
, true, false, true);
1061 insn_info
*first_insn
= (*sets
.begin ())->insn ();
1062 if (first_insn
->is_artificial ())
1064 for (const set_info
*set
: sets
)
1066 /* If there is a head or end insn, we conservative return
1067 NULL so that VSETVL PASS will insert vsetvl directly. */
1068 if (set
->insn ()->is_artificial ())
1070 if (!source_equal_p (set
->insn (), first_insn
))
1078 calculate_sew (vlmul_type vlmul
, unsigned int ratio
)
1080 for (const unsigned sew
: ALL_SEW
)
1081 if (calculate_ratio (sew
, vlmul
) == ratio
)
1087 calculate_vlmul (unsigned int sew
, unsigned int ratio
)
1089 for (const vlmul_type vlmul
: ALL_LMUL
)
1090 if (calculate_ratio (sew
, vlmul
) == ratio
)
1092 return LMUL_RESERVED
;
1096 incompatible_avl_p (const vector_insn_info
&info1
,
1097 const vector_insn_info
&info2
)
1099 return !info1
.compatible_avl_p (info2
) && !info2
.compatible_avl_p (info1
);
1103 different_sew_p (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1105 return info1
.get_sew () != info2
.get_sew ();
1109 different_lmul_p (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1111 return info1
.get_vlmul () != info2
.get_vlmul ();
1115 different_ratio_p (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1117 return info1
.get_ratio () != info2
.get_ratio ();
1121 different_tail_policy_p (const vector_insn_info
&info1
,
1122 const vector_insn_info
&info2
)
1124 return info1
.get_ta () != info2
.get_ta ();
1128 different_mask_policy_p (const vector_insn_info
&info1
,
1129 const vector_insn_info
&info2
)
1131 return info1
.get_ma () != info2
.get_ma ();
1135 possible_zero_avl_p (const vector_insn_info
&info1
,
1136 const vector_insn_info
&info2
)
1138 return !info1
.has_non_zero_avl () || !info2
.has_non_zero_avl ();
1142 second_ratio_invalid_for_first_sew_p (const vector_insn_info
&info1
,
1143 const vector_insn_info
&info2
)
1145 return calculate_vlmul (info1
.get_sew (), info2
.get_ratio ())
1150 second_ratio_invalid_for_first_lmul_p (const vector_insn_info
&info1
,
1151 const vector_insn_info
&info2
)
1153 return calculate_sew (info1
.get_vlmul (), info2
.get_ratio ()) == 0;
1157 float_insn_valid_sew_p (const vector_insn_info
&info
, unsigned int sew
)
1159 if (info
.get_insn () && info
.get_insn ()->is_real ()
1160 && get_attr_type (info
.get_insn ()->rtl ()) == TYPE_VFMOVFV
)
1163 return TARGET_VECTOR_ELEN_FP_16
;
1165 return TARGET_VECTOR_ELEN_FP_32
;
1167 return TARGET_VECTOR_ELEN_FP_64
;
1173 second_sew_less_than_first_sew_p (const vector_insn_info
&info1
,
1174 const vector_insn_info
&info2
)
1176 return info2
.get_sew () < info1
.get_sew ()
1177 || !float_insn_valid_sew_p (info1
, info2
.get_sew ());
1181 first_sew_less_than_second_sew_p (const vector_insn_info
&info1
,
1182 const vector_insn_info
&info2
)
1184 return info1
.get_sew () < info2
.get_sew ()
1185 || !float_insn_valid_sew_p (info2
, info1
.get_sew ());
1188 /* return 0 if LMUL1 == LMUL2.
1189 return -1 if LMUL1 < LMUL2.
1190 return 1 if LMUL1 > LMUL2. */
1192 compare_lmul (vlmul_type vlmul1
, vlmul_type vlmul2
)
1194 if (vlmul1
== vlmul2
)
1200 if (vlmul2
== LMUL_2
|| vlmul2
== LMUL_4
|| vlmul2
== LMUL_8
)
1205 if (vlmul2
== LMUL_4
|| vlmul2
== LMUL_8
)
1210 if (vlmul2
== LMUL_8
)
1217 if (vlmul2
== LMUL_1
|| vlmul2
== LMUL_2
|| vlmul2
== LMUL_4
1218 || vlmul2
== LMUL_8
)
1223 if (vlmul2
== LMUL_F2
|| vlmul2
== LMUL_1
|| vlmul2
== LMUL_2
1224 || vlmul2
== LMUL_4
|| vlmul2
== LMUL_8
)
1236 second_lmul_less_than_first_lmul_p (const vector_insn_info
&info1
,
1237 const vector_insn_info
&info2
)
1239 return compare_lmul (info2
.get_vlmul (), info1
.get_vlmul ()) == -1;
1243 second_ratio_less_than_first_ratio_p (const vector_insn_info
&info1
,
1244 const vector_insn_info
&info2
)
1246 return info2
.get_ratio () < info1
.get_ratio ();
1249 static CONSTEXPR
const demands_cond incompatible_conds
[] = {
1250 #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1251 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1252 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1253 TAIL_POLICTY2, MASK_POLICY2, COND) \
1254 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1256 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1259 #include "riscv-vsetvl.def"
1263 greatest_sew (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1265 return std::max (info1
.get_sew (), info2
.get_sew ());
1269 first_sew (const vector_insn_info
&info1
, const vector_insn_info
&)
1271 return info1
.get_sew ();
1275 second_sew (const vector_insn_info
&, const vector_insn_info
&info2
)
1277 return info2
.get_sew ();
1281 first_vlmul (const vector_insn_info
&info1
, const vector_insn_info
&)
1283 return info1
.get_vlmul ();
1287 second_vlmul (const vector_insn_info
&, const vector_insn_info
&info2
)
1289 return info2
.get_vlmul ();
1293 first_ratio (const vector_insn_info
&info1
, const vector_insn_info
&)
1295 return info1
.get_ratio ();
1299 second_ratio (const vector_insn_info
&, const vector_insn_info
&info2
)
1301 return info2
.get_ratio ();
1305 vlmul_for_first_sew_second_ratio (const vector_insn_info
&info1
,
1306 const vector_insn_info
&info2
)
1308 return calculate_vlmul (info1
.get_sew (), info2
.get_ratio ());
1312 vlmul_for_greatest_sew_second_ratio (const vector_insn_info
&info1
,
1313 const vector_insn_info
&info2
)
1315 return calculate_vlmul (MAX (info1
.get_sew (), info2
.get_sew ()),
1316 info2
.get_ratio ());
1320 ratio_for_second_sew_first_vlmul (const vector_insn_info
&info1
,
1321 const vector_insn_info
&info2
)
1323 return calculate_ratio (info2
.get_sew (), info1
.get_vlmul ());
1326 static CONSTEXPR
const demands_fuse_rule fuse_rules
[] = {
1327 #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1328 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1329 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1330 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1331 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1333 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1334 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1335 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1336 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1340 NEW_DEMAND_GE_SEW, \
1344 #include "riscv-vsetvl.def"
1348 always_unavailable (const vector_insn_info
&, const vector_insn_info
&)
1354 avl_unavailable_p (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1356 return !info2
.compatible_avl_p (info1
.get_avl_info ());
1360 sew_unavailable_p (const vector_insn_info
&info1
, const vector_insn_info
&info2
)
1362 if (!info2
.demand_p (DEMAND_LMUL
) && !info2
.demand_p (DEMAND_RATIO
))
1364 if (info2
.demand_p (DEMAND_GE_SEW
))
1365 return info1
.get_sew () < info2
.get_sew ();
1366 return info1
.get_sew () != info2
.get_sew ();
1372 lmul_unavailable_p (const vector_insn_info
&info1
,
1373 const vector_insn_info
&info2
)
1375 if (info1
.get_vlmul () == info2
.get_vlmul () && !info2
.demand_p (DEMAND_SEW
)
1376 && !info2
.demand_p (DEMAND_RATIO
))
1382 ge_sew_unavailable_p (const vector_insn_info
&info1
,
1383 const vector_insn_info
&info2
)
1385 if (!info2
.demand_p (DEMAND_LMUL
) && !info2
.demand_p (DEMAND_RATIO
)
1386 && info2
.demand_p (DEMAND_GE_SEW
))
1387 return info1
.get_sew () < info2
.get_sew ();
1392 ge_sew_lmul_unavailable_p (const vector_insn_info
&info1
,
1393 const vector_insn_info
&info2
)
1395 if (!info2
.demand_p (DEMAND_RATIO
) && info2
.demand_p (DEMAND_GE_SEW
))
1396 return info1
.get_sew () < info2
.get_sew ();
1401 ge_sew_ratio_unavailable_p (const vector_insn_info
&info1
,
1402 const vector_insn_info
&info2
)
1404 if (!info2
.demand_p (DEMAND_LMUL
))
1406 if (info2
.demand_p (DEMAND_GE_SEW
))
1407 return info1
.get_sew () < info2
.get_sew ();
1408 /* Demand GE_SEW should be available for non-demand SEW. */
1409 else if (!info2
.demand_p (DEMAND_SEW
))
1415 static CONSTEXPR
const demands_cond unavailable_conds
[] = {
1416 #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1417 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1418 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1419 MASK_POLICY2, COND) \
1420 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1422 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1425 #include "riscv-vsetvl.def"
1429 same_sew_lmul_demand_p (const bool *dems1
, const bool *dems2
)
1431 return dems1
[DEMAND_SEW
] == dems2
[DEMAND_SEW
]
1432 && dems1
[DEMAND_LMUL
] == dems2
[DEMAND_LMUL
]
1433 && dems1
[DEMAND_RATIO
] == dems2
[DEMAND_RATIO
] && !dems1
[DEMAND_GE_SEW
]
1434 && !dems2
[DEMAND_GE_SEW
];
1438 propagate_avl_across_demands_p (const vector_insn_info
&info1
,
1439 const vector_insn_info
&info2
)
1441 if (info2
.demand_p (DEMAND_AVL
))
1443 if (info2
.demand_p (DEMAND_NONZERO_AVL
))
1444 return info1
.demand_p (DEMAND_AVL
)
1445 && !info1
.demand_p (DEMAND_NONZERO_AVL
) && info1
.has_avl_reg ();
1448 return info1
.demand_p (DEMAND_AVL
) && info1
.has_avl_reg ();
1453 reg_available_p (const insn_info
*insn
, const vector_insn_info
&info
)
1455 if (info
.has_avl_reg () && !info
.get_avl_source ())
1457 insn_info
*def_insn
= info
.get_avl_source ()->insn ();
1458 if (def_insn
->bb () == insn
->bb ())
1459 return before_p (def_insn
, insn
);
1461 return dominated_by_p (CDI_DOMINATORS
, insn
->bb ()->cfg_bb (),
1462 def_insn
->bb ()->cfg_bb ());
1465 /* Return true if the instruction support relaxed compatible check. */
1467 support_relaxed_compatible_p (const vector_insn_info
&info1
,
1468 const vector_insn_info
&info2
)
1470 if (fault_first_load_p (info1
.get_insn ()->rtl ())
1471 && info2
.demand_p (DEMAND_AVL
) && info2
.has_avl_reg ()
1472 && info2
.get_avl_source () && info2
.get_avl_source ()->insn ()->is_phi ())
1474 hash_set
<set_info
*> sets
1475 = get_all_sets (info2
.get_avl_source (), true, false, false);
1476 for (set_info
*set
: sets
)
1478 if (read_vl_insn_p (set
->insn ()->rtl ()))
1480 const insn_info
*insn
1481 = get_backward_fault_first_load_insn (set
->insn ());
1482 if (insn
== info1
.get_insn ())
1483 return info2
.compatible_vtype_p (info1
);
1490 /* Count the number of REGNO in RINSN. */
1492 count_regno_occurrences (rtx_insn
*rinsn
, unsigned int regno
)
1495 extract_insn (rinsn
);
1496 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
1497 if (refers_to_regno_p (regno
, recog_data
.operand
[i
]))
1502 /* Return TRUE if the demands can be fused. */
1504 demands_can_be_fused_p (const vector_insn_info
&be_fused
,
1505 const vector_insn_info
&to_fuse
)
1507 return be_fused
.compatible_p (to_fuse
) && !be_fused
.available_p (to_fuse
);
1510 /* Return true if we can fuse VSETVL demand info into predecessor of earliest
1513 earliest_pred_can_be_fused_p (const bb_info
*earliest_pred
,
1514 const vector_insn_info
&earliest_info
,
1515 const vector_insn_info
&expr
, rtx
*vlmax_vl
)
1517 /* Backward VLMAX VL:
1519 vsetivli zero, 1 ... -> vsetvli t1, zero
1522 vsetvli t1, zero ... -> to be elided.
1525 We should forward "t1". */
1526 if (!earliest_info
.has_avl_reg () && expr
.has_avl_reg ())
1528 rtx avl_or_vl_reg
= expr
.get_avl_or_vl_reg ();
1529 gcc_assert (avl_or_vl_reg
);
1530 const insn_info
*last_insn
= earliest_info
.get_insn ();
1531 /* To fuse demand on earlest edge, we make sure AVL/VL
1532 didn't change from the consume insn to the predecessor
1534 for (insn_info
*i
= earliest_pred
->end_insn ()->prev_nondebug_insn ();
1535 real_insn_and_same_bb_p (i
, earliest_pred
)
1536 && after_or_same_p (i
, last_insn
);
1537 i
= i
->prev_nondebug_insn ())
1539 if (find_access (i
->defs (), REGNO (avl_or_vl_reg
)))
1541 if (find_access (i
->uses (), REGNO (avl_or_vl_reg
)))
1544 if (vlmax_vl
&& vlmax_avl_p (expr
.get_avl ()))
1545 *vlmax_vl
= avl_or_vl_reg
;
1551 /* Return true if the current VSETVL 1 is dominated by preceding VSETVL 2.
1553 VSETVL 2 dominates VSETVL 1 should satisfy this following check:
1555 - VSETVL 2 should have the RATIO (SEW/LMUL) with VSETVL 1.
1556 - VSETVL 2 is user vsetvl (vsetvl VL, AVL)
1557 - VSETVL 2 "VL" result is the "AVL" of VSETL1. */
1559 vsetvl_dominated_by_p (const basic_block cfg_bb
,
1560 const vector_insn_info
&vsetvl1
,
1561 const vector_insn_info
&vsetvl2
, bool fuse_p
)
1563 if (!vsetvl1
.valid_or_dirty_p () || !vsetvl2
.valid_or_dirty_p ())
1565 if (!has_vl_op (vsetvl1
.get_insn ()->rtl ())
1566 || !vsetvl_insn_p (vsetvl2
.get_insn ()->rtl ()))
1569 hash_set
<set_info
*> sets
1570 = get_all_sets (vsetvl1
.get_avl_source (), true, false, false);
1571 set_info
*set
= get_same_bb_set (sets
, cfg_bb
);
1573 if (!vsetvl1
.has_avl_reg () || vlmax_avl_p (vsetvl1
.get_avl ())
1574 || !vsetvl2
.same_vlmax_p (vsetvl1
) || !set
1575 || set
->insn () != vsetvl2
.get_insn ())
1578 if (fuse_p
&& vsetvl2
.same_vtype_p (vsetvl1
))
1580 else if (!fuse_p
&& !vsetvl2
.same_vtype_p (vsetvl1
))
1585 avl_info::avl_info (const avl_info
&other
)
1587 m_value
= other
.get_value ();
1588 m_source
= other
.get_source ();
1591 avl_info::avl_info (rtx value_in
, set_info
*source_in
)
1592 : m_value (value_in
), m_source (source_in
)
1596 avl_info::single_source_equal_p (const avl_info
&other
) const
1598 set_info
*set1
= m_source
;
1599 set_info
*set2
= other
.get_source ();
1600 insn_info
*insn1
= extract_single_source (set1
);
1601 insn_info
*insn2
= extract_single_source (set2
);
1602 if (!insn1
|| !insn2
)
1604 return source_equal_p (insn1
, insn2
);
1608 avl_info::multiple_source_equal_p (const avl_info
&other
) const
1610 /* When the def info is same in RTL_SSA namespace, it's safe
1611 to consider they are avl compatible. */
1612 if (m_source
== other
.get_source ())
1615 /* We only consider handle PHI node. */
1616 if (!m_source
->insn ()->is_phi () || !other
.get_source ()->insn ()->is_phi ())
1619 phi_info
*phi1
= as_a
<phi_info
*> (m_source
);
1620 phi_info
*phi2
= as_a
<phi_info
*> (other
.get_source ());
1622 if (phi1
->is_degenerate () && phi2
->is_degenerate ())
1624 /* Degenerate PHI means the PHI node only have one input. */
1626 /* If both PHI nodes have the same single input in use list.
1627 We consider they are AVL compatible. */
1628 if (phi1
->input_value (0) == phi2
->input_value (0))
1631 /* TODO: We can support more optimization cases in the future. */
1636 avl_info::operator= (const avl_info
&other
)
1638 m_value
= other
.get_value ();
1639 m_source
= other
.get_source ();
1644 avl_info::operator== (const avl_info
&other
) const
1647 return !other
.get_value ();
1648 if (!other
.get_value ())
1651 if (GET_CODE (m_value
) != GET_CODE (other
.get_value ()))
1654 /* Handle CONST_INT AVL. */
1655 if (CONST_INT_P (m_value
))
1656 return INTVAL (m_value
) == INTVAL (other
.get_value ());
1658 /* Handle VLMAX AVL. */
1659 if (vlmax_avl_p (m_value
))
1660 return vlmax_avl_p (other
.get_value ());
1661 if (vlmax_avl_p (other
.get_value ()))
1664 /* If any source is undef value, we think they are not equal. */
1665 if (!m_source
|| !other
.get_source ())
1668 /* If both sources are single source (defined by a single real RTL)
1669 and their definitions are same. */
1670 if (single_source_equal_p (other
))
1673 return multiple_source_equal_p (other
);
1677 avl_info::operator!= (const avl_info
&other
) const
1679 return !(*this == other
);
1683 avl_info::has_non_zero_avl () const
1686 return INTVAL (get_value ()) > 0;
1688 return vlmax_avl_p (get_value ());
1692 /* Initialize VL/VTYPE information. */
1693 vl_vtype_info::vl_vtype_info (avl_info avl_in
, uint8_t sew_in
,
1694 enum vlmul_type vlmul_in
, uint8_t ratio_in
,
1695 bool ta_in
, bool ma_in
)
1696 : m_avl (avl_in
), m_sew (sew_in
), m_vlmul (vlmul_in
), m_ratio (ratio_in
),
1697 m_ta (ta_in
), m_ma (ma_in
)
1699 gcc_assert (valid_sew_p (m_sew
) && "Unexpected SEW");
1703 vl_vtype_info::operator== (const vl_vtype_info
&other
) const
1705 return same_avl_p (other
) && m_sew
== other
.get_sew ()
1706 && m_vlmul
== other
.get_vlmul () && m_ta
== other
.get_ta ()
1707 && m_ma
== other
.get_ma () && m_ratio
== other
.get_ratio ();
1711 vl_vtype_info::operator!= (const vl_vtype_info
&other
) const
1713 return !(*this == other
);
1717 vl_vtype_info::same_avl_p (const vl_vtype_info
&other
) const
1719 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1720 For example, const_int 3 and const_int 4, we need to compare
1721 RTL. If both AVL are REG and their REGNO are same, we need to
1723 return get_avl () == other
.get_avl ()
1724 && get_avl_source () == other
.get_avl_source ();
1728 vl_vtype_info::same_vtype_p (const vl_vtype_info
&other
) const
1730 return get_sew () == other
.get_sew () && get_vlmul () == other
.get_vlmul ()
1731 && get_ta () == other
.get_ta () && get_ma () == other
.get_ma ();
1735 vl_vtype_info::same_vlmax_p (const vl_vtype_info
&other
) const
1737 return get_ratio () == other
.get_ratio ();
1740 /* Compare the compatibility between Dem1 and Dem2.
1741 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1742 meaning Dem1 is easier be compatible with others than Dem2
1743 or Dem2 is stricter than Dem1.
1744 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
1746 vector_insn_info::operator>= (const vector_insn_info
&other
) const
1748 if (support_relaxed_compatible_p (*this, other
))
1750 unsigned array_size
= sizeof (unavailable_conds
) / sizeof (demands_cond
);
1751 /* Bypass AVL unavailable cases. */
1752 for (unsigned i
= 2; i
< array_size
; i
++)
1753 if (unavailable_conds
[i
].pair
.match_cond_p (this->get_demands (),
1754 other
.get_demands ())
1755 && unavailable_conds
[i
].incompatible_p (*this, other
))
1760 if (!other
.compatible_p (static_cast<const vl_vtype_info
&> (*this)))
1762 if (!this->compatible_p (static_cast<const vl_vtype_info
&> (other
)))
1768 for (const auto &cond
: unavailable_conds
)
1769 if (cond
.pair
.match_cond_p (this->get_demands (), other
.get_demands ())
1770 && cond
.incompatible_p (*this, other
))
1777 vector_insn_info::operator== (const vector_insn_info
&other
) const
1779 gcc_assert (!uninit_p () && !other
.uninit_p ()
1780 && "Uninitialization should not happen");
1782 /* Empty is only equal to another Empty. */
1784 return other
.empty_p ();
1785 if (other
.empty_p ())
1788 /* Unknown is only equal to another Unknown. */
1790 return other
.unknown_p ();
1791 if (other
.unknown_p ())
1792 return unknown_p ();
1794 for (size_t i
= 0; i
< NUM_DEMAND
; i
++)
1795 if (m_demands
[i
] != other
.demand_p ((enum demand_type
) i
))
1798 /* We should consider different INSN demands as different
1799 expression. Otherwise, we will be doing incorrect vsetvl
1801 if (m_insn
!= other
.get_insn ())
1804 if (!same_avl_p (other
))
1807 /* If the full VTYPE is valid, check that it is the same. */
1808 return same_vtype_p (other
);
1812 vector_insn_info::parse_insn (rtx_insn
*rinsn
)
1814 *this = vector_insn_info ();
1815 if (!NONDEBUG_INSN_P (rinsn
))
1817 if (optimize
== 0 && !has_vtype_op (rinsn
))
1819 gcc_assert (!vsetvl_discard_result_insn_p (rinsn
));
1821 extract_insn_cached (rinsn
);
1822 rtx avl
= ::get_avl (rinsn
);
1823 m_avl
= avl_info (avl
, nullptr);
1824 m_sew
= ::get_sew (rinsn
);
1825 m_vlmul
= ::get_vlmul (rinsn
);
1826 m_ta
= tail_agnostic_p (rinsn
);
1827 m_ma
= mask_agnostic_p (rinsn
);
1831 vector_insn_info::parse_insn (insn_info
*insn
)
1833 *this = vector_insn_info ();
1835 /* Return if it is debug insn for the consistency with optimize == 0. */
1836 if (insn
->is_debug_insn ())
1839 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1840 if (insn
->is_call () || insn
->is_asm ())
1846 /* If this is something that updates VL/VTYPE that we don't know about, set
1847 the state to unknown. */
1848 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ())
1849 && (find_access (insn
->defs (), VL_REGNUM
)
1850 || find_access (insn
->defs (), VTYPE_REGNUM
)))
1856 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ()))
1859 /* Warning: This function has to work on both the lowered (i.e. post
1860 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1861 of this is that it can't use the value of a SEW, VL, or Policy operand as
1862 they might be stale after lowering. */
1863 vl_vtype_info::operator= (get_vl_vtype_info (insn
));
1866 if (vector_config_insn_p (insn
->rtl ()))
1868 m_demands
[DEMAND_AVL
] = true;
1869 m_demands
[DEMAND_RATIO
] = true;
1873 if (has_vl_op (insn
->rtl ()))
1874 m_demands
[DEMAND_AVL
] = true;
1876 if (get_attr_ratio (insn
->rtl ()) != INVALID_ATTRIBUTE
)
1877 m_demands
[DEMAND_RATIO
] = true;
1880 /* TODO: By default, if it doesn't demand RATIO, we set it
1881 demand SEW && LMUL both. Some instructions may demand SEW
1882 only and ignore LMUL, will fix it later. */
1883 m_demands
[DEMAND_SEW
] = true;
1884 if (!ignore_vlmul_insn_p (insn
->rtl ()))
1885 m_demands
[DEMAND_LMUL
] = true;
1888 if (get_attr_ta (insn
->rtl ()) != INVALID_ATTRIBUTE
)
1889 m_demands
[DEMAND_TAIL_POLICY
] = true;
1890 if (get_attr_ma (insn
->rtl ()) != INVALID_ATTRIBUTE
)
1891 m_demands
[DEMAND_MASK_POLICY
] = true;
1893 if (vector_config_insn_p (insn
->rtl ()))
1896 if (scalar_move_insn_p (insn
->rtl ()))
1898 if (m_avl
.has_non_zero_avl ())
1899 m_demands
[DEMAND_NONZERO_AVL
] = true;
1901 m_demands
[DEMAND_GE_SEW
] = true;
1904 if (!m_avl
.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl
.get_source ())
1906 if (!m_avl
.get_source ()->insn ()->is_real ()
1907 && !m_avl
.get_source ()->insn ()->is_phi ())
1910 insn_info
*def_insn
= extract_single_source (m_avl
.get_source ());
1911 if (!def_insn
|| !vsetvl_insn_p (def_insn
->rtl ()))
1914 vector_insn_info new_info
;
1915 new_info
.parse_insn (def_insn
);
1916 if (!same_vlmax_p (new_info
) && !scalar_move_insn_p (insn
->rtl ()))
1919 if (new_info
.has_avl ())
1921 if (new_info
.has_avl_imm ())
1922 set_avl_info (avl_info (new_info
.get_avl (), nullptr));
1925 if (vlmax_avl_p (new_info
.get_avl ()))
1926 set_avl_info (avl_info (new_info
.get_avl (), get_avl_source ()));
1929 /* Conservatively propagate non-VLMAX AVL of user vsetvl:
1930 1. The user vsetvl should be same block with the rvv insn.
1931 2. The user vsetvl is the only def insn of rvv insn.
1932 3. The AVL is not modified between def-use chain.
1933 4. The VL is only used by insn within EBB.
1935 bool modified_p
= false;
1936 for (insn_info
*i
= def_insn
->next_nondebug_insn ();
1937 real_insn_and_same_bb_p (i
, get_insn ()->bb ());
1938 i
= i
->next_nondebug_insn ())
1940 /* Consider this following sequence:
1942 insn 1: vsetvli a5,a3,e8,mf4,ta,mu
1943 insn 2: vsetvli zero,a5,e32,m1,ta,ma
1946 vsetvli a2,zero,e32,m1,ta,ma
1948 vsetvli zero,a5,e32,m1,ta,ma
1951 insn 3: sub a3,a3,a5
1954 We can local AVL propagate "a3" from insn 1 to insn 2
1955 if no insns between insn 1 and insn 2 modify "a3 even
1956 though insn 3 modifies "a3".
1957 Otherwise, we can't perform local AVL propagation.
1959 Early break if we reach the insn 2. */
1960 if (!before_p (i
, insn
))
1962 if (find_access (i
->defs (), REGNO (new_info
.get_avl ())))
1969 bool has_live_out_use
= false;
1970 for (use_info
*use
: m_avl
.get_source ()->all_uses ())
1972 if (use
->is_live_out_use ())
1974 has_live_out_use
= true;
1978 if (!modified_p
&& !has_live_out_use
1979 && def_insn
== m_avl
.get_source ()->insn ()
1980 && m_insn
->bb () == def_insn
->bb ())
1981 set_avl_info (new_info
.get_avl_info ());
1986 if (scalar_move_insn_p (insn
->rtl ()) && m_avl
.has_non_zero_avl ())
1987 m_demands
[DEMAND_NONZERO_AVL
] = true;
1991 vector_insn_info::compatible_p (const vector_insn_info
&other
) const
1993 gcc_assert (valid_or_dirty_p () && other
.valid_or_dirty_p ()
1994 && "Can't compare invalid demanded infos");
1996 for (const auto &cond
: incompatible_conds
)
1997 if (cond
.dual_incompatible_p (*this, other
))
2003 vector_insn_info::skip_avl_compatible_p (const vector_insn_info
&other
) const
2005 gcc_assert (valid_or_dirty_p () && other
.valid_or_dirty_p ()
2006 && "Can't compare invalid demanded infos");
2007 unsigned array_size
= sizeof (incompatible_conds
) / sizeof (demands_cond
);
2008 /* Bypass AVL incompatible cases. */
2009 for (unsigned i
= 1; i
< array_size
; i
++)
2010 if (incompatible_conds
[i
].dual_incompatible_p (*this, other
))
2016 vector_insn_info::compatible_avl_p (const vl_vtype_info
&other
) const
2018 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2019 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2020 if (!demand_p (DEMAND_AVL
))
2022 if (demand_p (DEMAND_NONZERO_AVL
) && other
.has_non_zero_avl ())
2024 return get_avl_info () == other
.get_avl_info ();
2028 vector_insn_info::compatible_avl_p (const avl_info
&other
) const
2030 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2031 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2032 gcc_assert (demand_p (DEMAND_AVL
) && "Can't compare AVL undemand state");
2033 if (!demand_p (DEMAND_AVL
))
2035 if (demand_p (DEMAND_NONZERO_AVL
) && other
.has_non_zero_avl ())
2037 return get_avl_info () == other
;
2041 vector_insn_info::compatible_vtype_p (const vl_vtype_info
&other
) const
2043 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2044 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
2045 if (demand_p (DEMAND_SEW
))
2047 if (!demand_p (DEMAND_GE_SEW
) && m_sew
!= other
.get_sew ())
2049 if (demand_p (DEMAND_GE_SEW
) && m_sew
> other
.get_sew ())
2052 if (demand_p (DEMAND_LMUL
) && m_vlmul
!= other
.get_vlmul ())
2054 if (demand_p (DEMAND_RATIO
) && m_ratio
!= other
.get_ratio ())
2056 if (demand_p (DEMAND_TAIL_POLICY
) && m_ta
!= other
.get_ta ())
2058 if (demand_p (DEMAND_MASK_POLICY
) && m_ma
!= other
.get_ma ())
2063 /* Determine whether the vector instructions requirements represented by
2064 Require are compatible with the previous vsetvli instruction represented
2065 by this. INSN is the instruction whose requirements we're considering. */
2067 vector_insn_info::compatible_p (const vl_vtype_info
&curr_info
) const
2069 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2073 /* Nothing is compatible with Unknown. */
2077 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2079 if (!demand_p (DEMAND_AVL
))
2080 if (m_sew
== curr_info
.get_sew ())
2083 return compatible_avl_p (curr_info
) && compatible_vtype_p (curr_info
);
2087 vector_insn_info::available_p (const vector_insn_info
&other
) const
2089 return *this >= other
;
2093 vector_insn_info::fuse_avl (const vector_insn_info
&info1
,
2094 const vector_insn_info
&info2
)
2096 set_insn (info1
.get_insn ());
2097 if (info1
.demand_p (DEMAND_AVL
))
2099 if (info1
.demand_p (DEMAND_NONZERO_AVL
))
2101 if (info2
.demand_p (DEMAND_AVL
)
2102 && !info2
.demand_p (DEMAND_NONZERO_AVL
))
2104 set_avl_info (info2
.get_avl_info ());
2105 set_demand (DEMAND_AVL
, true);
2106 set_demand (DEMAND_NONZERO_AVL
, false);
2110 set_avl_info (info1
.get_avl_info ());
2111 set_demand (DEMAND_NONZERO_AVL
, info1
.demand_p (DEMAND_NONZERO_AVL
));
2115 set_avl_info (info2
.get_avl_info ());
2116 set_demand (DEMAND_NONZERO_AVL
, info2
.demand_p (DEMAND_NONZERO_AVL
));
2118 set_demand (DEMAND_AVL
,
2119 info1
.demand_p (DEMAND_AVL
) || info2
.demand_p (DEMAND_AVL
));
2123 vector_insn_info::fuse_sew_lmul (const vector_insn_info
&info1
,
2124 const vector_insn_info
&info2
)
2126 /* We need to fuse sew && lmul according to demand info:
2132 if (same_sew_lmul_demand_p (info1
.get_demands (), info2
.get_demands ()))
2134 set_demand (DEMAND_SEW
, info2
.demand_p (DEMAND_SEW
));
2135 set_demand (DEMAND_LMUL
, info2
.demand_p (DEMAND_LMUL
));
2136 set_demand (DEMAND_RATIO
, info2
.demand_p (DEMAND_RATIO
));
2137 set_demand (DEMAND_GE_SEW
, info2
.demand_p (DEMAND_GE_SEW
));
2138 set_sew (info2
.get_sew ());
2139 set_vlmul (info2
.get_vlmul ());
2140 set_ratio (info2
.get_ratio ());
2143 for (const auto &rule
: fuse_rules
)
2145 if (rule
.pair
.match_cond_p (info1
.get_demands (), info2
.get_demands ()))
2147 set_demand (DEMAND_SEW
, rule
.demand_sew_p
);
2148 set_demand (DEMAND_LMUL
, rule
.demand_lmul_p
);
2149 set_demand (DEMAND_RATIO
, rule
.demand_ratio_p
);
2150 set_demand (DEMAND_GE_SEW
, rule
.demand_ge_sew_p
);
2151 set_sew (rule
.new_sew (info1
, info2
));
2152 set_vlmul (rule
.new_vlmul (info1
, info2
));
2153 set_ratio (rule
.new_ratio (info1
, info2
));
2156 if (rule
.pair
.match_cond_p (info2
.get_demands (), info1
.get_demands ()))
2158 set_demand (DEMAND_SEW
, rule
.demand_sew_p
);
2159 set_demand (DEMAND_LMUL
, rule
.demand_lmul_p
);
2160 set_demand (DEMAND_RATIO
, rule
.demand_ratio_p
);
2161 set_demand (DEMAND_GE_SEW
, rule
.demand_ge_sew_p
);
2162 set_sew (rule
.new_sew (info2
, info1
));
2163 set_vlmul (rule
.new_vlmul (info2
, info1
));
2164 set_ratio (rule
.new_ratio (info2
, info1
));
2172 vector_insn_info::fuse_tail_policy (const vector_insn_info
&info1
,
2173 const vector_insn_info
&info2
)
2175 if (info1
.demand_p (DEMAND_TAIL_POLICY
))
2177 set_ta (info1
.get_ta ());
2178 demand (DEMAND_TAIL_POLICY
);
2180 else if (info2
.demand_p (DEMAND_TAIL_POLICY
))
2182 set_ta (info2
.get_ta ());
2183 demand (DEMAND_TAIL_POLICY
);
2186 set_ta (get_default_ta ());
2190 vector_insn_info::fuse_mask_policy (const vector_insn_info
&info1
,
2191 const vector_insn_info
&info2
)
2193 if (info1
.demand_p (DEMAND_MASK_POLICY
))
2195 set_ma (info1
.get_ma ());
2196 demand (DEMAND_MASK_POLICY
);
2198 else if (info2
.demand_p (DEMAND_MASK_POLICY
))
2200 set_ma (info2
.get_ma ());
2201 demand (DEMAND_MASK_POLICY
);
2204 set_ma (get_default_ma ());
2208 vector_insn_info::local_merge (const vector_insn_info
&merge_info
) const
2210 if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info
)
2211 gcc_assert (this->compatible_p (merge_info
)
2212 && "Can't merge incompatible demanded infos");
2214 vector_insn_info new_info
;
2215 new_info
.set_valid ();
2216 /* For local backward data flow, we always update INSN && AVL as the
2217 latest INSN and AVL so that we can keep track status of each INSN. */
2218 new_info
.fuse_avl (merge_info
, *this);
2219 new_info
.fuse_sew_lmul (*this, merge_info
);
2220 new_info
.fuse_tail_policy (*this, merge_info
);
2221 new_info
.fuse_mask_policy (*this, merge_info
);
2226 vector_insn_info::global_merge (const vector_insn_info
&merge_info
,
2227 unsigned int bb_index
) const
2229 if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info
)
2230 gcc_assert (this->compatible_p (merge_info
)
2231 && "Can't merge incompatible demanded infos");
2233 vector_insn_info new_info
;
2234 new_info
.set_valid ();
2236 /* For global data flow, we should keep original INSN and AVL if they
2237 valid since we should keep the life information of each block.
2241 We should keep INSN && AVL of bb 1 since we will eventually emit
2242 vsetvl instruction according to INSN and AVL of bb 1. */
2243 new_info
.fuse_avl (*this, merge_info
);
2244 /* Recompute the AVL source whose block index is equal to BB_INDEX. */
2245 if (new_info
.get_avl_source ()
2246 && new_info
.get_avl_source ()->insn ()->is_phi ()
2247 && new_info
.get_avl_source ()->bb ()->index () != bb_index
)
2249 hash_set
<set_info
*> sets
2250 = get_all_sets (new_info
.get_avl_source (), true, true, true);
2251 new_info
.set_avl_source (nullptr);
2252 bool can_find_set_p
= false;
2253 set_info
*first_set
= nullptr;
2254 for (set_info
*set
: sets
)
2258 if (set
->bb ()->index () == bb_index
)
2260 gcc_assert (!can_find_set_p
);
2261 new_info
.set_avl_source (set
);
2262 can_find_set_p
= true;
2265 if (!can_find_set_p
&& sets
.elements () == 1
2266 && first_set
->insn ()->is_real ())
2267 new_info
.set_avl_source (first_set
);
2270 /* Make sure VLMAX AVL always has a set_info the get VL. */
2271 if (vlmax_avl_p (new_info
.get_avl ()))
2273 if (this->get_avl_source ())
2274 new_info
.set_avl_source (this->get_avl_source ());
2277 gcc_assert (merge_info
.get_avl_source ());
2278 new_info
.set_avl_source (merge_info
.get_avl_source ());
2282 new_info
.fuse_sew_lmul (*this, merge_info
);
2283 new_info
.fuse_tail_policy (*this, merge_info
);
2284 new_info
.fuse_mask_policy (*this, merge_info
);
2288 /* Wrapper helps to return the AVL or VL operand for the
2289 vector_insn_info. Return AVL if the AVL is not VLMAX.
2290 Otherwise, return the VL operand. */
2292 vector_insn_info::get_avl_or_vl_reg (void) const
2294 gcc_assert (has_avl_reg ());
2295 if (!vlmax_avl_p (get_avl ()))
2298 rtx_insn
*rinsn
= get_insn ()->rtl ();
2299 if (has_vl_op (rinsn
) || vsetvl_insn_p (rinsn
))
2301 rtx vl
= ::get_vl (rinsn
);
2302 /* For VLMAX, we should make sure we get the
2303 REG to emit 'vsetvl VL,zero' since the 'VL'
2304 should be the REG according to RVV ISA. */
2309 /* We always has avl_source if it is VLMAX AVL. */
2310 gcc_assert (get_avl_source ());
2311 return get_avl_reg_rtx ();
2315 vector_insn_info::update_fault_first_load_avl (insn_info
*insn
)
2317 // Update AVL to vl-output of the fault first load.
2318 const insn_info
*read_vl
= get_forward_read_vl_insn (insn
);
2321 rtx vl
= SET_DEST (PATTERN (read_vl
->rtl ()));
2322 def_info
*def
= find_access (read_vl
->defs (), REGNO (vl
));
2323 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
2324 set_avl_info (avl_info (vl
, set
));
2332 vlmul_to_str (vlmul_type vlmul
)
2345 return "INVALID LMUL";
2359 policy_to_str (bool agnostic_p
)
2361 return agnostic_p
? "agnostic" : "undisturbed";
2365 vector_insn_info::dump (FILE *file
) const
2367 fprintf (file
, "[");
2369 fprintf (file
, "UNINITIALIZED,");
2370 else if (valid_p ())
2371 fprintf (file
, "VALID,");
2372 else if (unknown_p ())
2373 fprintf (file
, "UNKNOWN,");
2374 else if (empty_p ())
2375 fprintf (file
, "EMPTY,");
2377 fprintf (file
, "DIRTY,");
2379 fprintf (file
, "Demand field={%d(VL),", demand_p (DEMAND_AVL
));
2380 fprintf (file
, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL
));
2381 fprintf (file
, "%d(SEW),", demand_p (DEMAND_SEW
));
2382 fprintf (file
, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW
));
2383 fprintf (file
, "%d(LMUL),", demand_p (DEMAND_LMUL
));
2384 fprintf (file
, "%d(RATIO),", demand_p (DEMAND_RATIO
));
2385 fprintf (file
, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY
));
2386 fprintf (file
, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY
));
2388 fprintf (file
, "AVL=");
2389 print_rtl_single (file
, get_avl ());
2390 fprintf (file
, "SEW=%d,", get_sew ());
2391 fprintf (file
, "VLMUL=%s,", vlmul_to_str (get_vlmul ()));
2392 fprintf (file
, "RATIO=%d,", get_ratio ());
2393 fprintf (file
, "TAIL_POLICY=%s,", policy_to_str (get_ta ()));
2394 fprintf (file
, "MASK_POLICY=%s", policy_to_str (get_ma ()));
2395 fprintf (file
, "]\n");
2401 fprintf (file
, "The real INSN=");
2402 print_rtl_single (file
, get_insn ()->rtl ());
2407 vector_infos_manager::vector_infos_manager ()
2409 vector_edge_list
= nullptr;
2410 vector_kill
= nullptr;
2411 vector_del
= nullptr;
2412 vector_insert
= nullptr;
2413 vector_antic
= nullptr;
2414 vector_transp
= nullptr;
2415 vector_comp
= nullptr;
2416 vector_avin
= nullptr;
2417 vector_avout
= nullptr;
2418 vector_antin
= nullptr;
2419 vector_antout
= nullptr;
2420 vector_earliest
= nullptr;
2421 vector_insn_infos
.safe_grow (get_max_uid ());
2422 vector_block_infos
.safe_grow (last_basic_block_for_fn (cfun
));
2427 FOR_ALL_BB_FN (cfg_bb
, cfun
)
2429 vector_block_infos
[cfg_bb
->index
].local_dem
= vector_insn_info ();
2430 vector_block_infos
[cfg_bb
->index
].reaching_out
= vector_insn_info ();
2431 FOR_BB_INSNS (cfg_bb
, rinsn
)
2432 vector_insn_infos
[INSN_UID (rinsn
)].parse_insn (rinsn
);
2437 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2439 vector_block_infos
[bb
->index ()].local_dem
= vector_insn_info ();
2440 vector_block_infos
[bb
->index ()].reaching_out
= vector_insn_info ();
2441 for (insn_info
*insn
: bb
->real_insns ())
2442 vector_insn_infos
[insn
->uid ()].parse_insn (insn
);
2443 vector_block_infos
[bb
->index ()].probability
= profile_probability ();
2449 vector_infos_manager::create_expr (vector_insn_info
&info
)
2451 for (size_t i
= 0; i
< vector_exprs
.length (); i
++)
2452 if (*vector_exprs
[i
] == info
)
2454 vector_exprs
.safe_push (&info
);
2458 vector_infos_manager::get_expr_id (const vector_insn_info
&info
) const
2460 for (size_t i
= 0; i
< vector_exprs
.length (); i
++)
2461 if (*vector_exprs
[i
] == info
)
2467 vector_infos_manager::get_all_available_exprs (
2468 const vector_insn_info
&info
) const
2470 auto_vec
<size_t> available_list
;
2471 for (size_t i
= 0; i
< vector_exprs
.length (); i
++)
2472 if (info
.available_p (*vector_exprs
[i
]))
2473 available_list
.safe_push (i
);
2474 return available_list
;
2478 vector_infos_manager::all_same_ratio_p (sbitmap bitdata
) const
2480 if (bitmap_empty_p (bitdata
))
2484 unsigned int bb_index
;
2485 sbitmap_iterator sbi
;
2487 EXECUTE_IF_SET_IN_BITMAP (bitdata
, 0, bb_index
, sbi
)
2490 ratio
= vector_exprs
[bb_index
]->get_ratio ();
2491 else if (vector_exprs
[bb_index
]->get_ratio () != ratio
)
2497 /* Return TRUE if the incoming vector configuration state
2498 to CFG_BB is compatible with the vector configuration
2499 state in CFG_BB, FALSE otherwise. */
2501 vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb
) const
2503 const auto &info
= vector_block_infos
[cfg_bb
->index
].local_dem
;
2504 sbitmap avin
= vector_avin
[cfg_bb
->index
];
2505 unsigned int bb_index
;
2506 sbitmap_iterator sbi
;
2507 EXECUTE_IF_SET_IN_BITMAP (avin
, 0, bb_index
, sbi
)
2509 const auto &avin_info
2510 = static_cast<const vl_vtype_info
&> (*vector_exprs
[bb_index
]);
2511 if (!info
.compatible_p (avin_info
))
2518 vector_infos_manager::all_same_avl_p (const basic_block cfg_bb
,
2519 sbitmap bitdata
) const
2521 if (bitmap_empty_p (bitdata
))
2524 const auto &block_info
= vector_block_infos
[cfg_bb
->index
];
2525 if (!block_info
.local_dem
.demand_p (DEMAND_AVL
))
2528 avl_info avl
= block_info
.local_dem
.get_avl_info ();
2529 unsigned int bb_index
;
2530 sbitmap_iterator sbi
;
2532 EXECUTE_IF_SET_IN_BITMAP (bitdata
, 0, bb_index
, sbi
)
2534 if (vector_exprs
[bb_index
]->get_avl_info () != avl
)
2541 vector_infos_manager::earliest_fusion_worthwhile_p (
2542 const basic_block cfg_bb
) const
2546 profile_probability prob
= profile_probability::uninitialized ();
2547 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
2549 if (prob
== profile_probability::uninitialized ())
2550 prob
= vector_block_infos
[e
->dest
->index
].probability
;
2551 else if (prob
== vector_block_infos
[e
->dest
->index
].probability
)
2554 /* We pick the highest probability among those incompatible VSETVL
2555 infos. When all incompatible VSTEVL infos have same probability, we
2556 don't pick any of them. */
2563 vector_infos_manager::vsetvl_dominated_by_all_preds_p (
2564 const basic_block cfg_bb
, const vector_insn_info
&info
) const
2568 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
2570 const auto &reaching_out
= vector_block_infos
[e
->src
->index
].reaching_out
;
2571 if (e
->src
->index
== cfg_bb
->index
&& reaching_out
.compatible_p (info
))
2573 if (!vsetvl_dominated_by_p (e
->src
, info
, reaching_out
, false))
2580 vector_infos_manager::expr_set_num (sbitmap bitdata
) const
2583 for (size_t i
= 0; i
< vector_exprs
.length (); i
++)
2584 if (bitmap_bit_p (bitdata
, i
))
2590 vector_infos_manager::release (void)
2592 if (!vector_insn_infos
.is_empty ())
2593 vector_insn_infos
.release ();
2594 if (!vector_block_infos
.is_empty ())
2595 vector_block_infos
.release ();
2596 if (!vector_exprs
.is_empty ())
2597 vector_exprs
.release ();
2599 gcc_assert (to_refine_vsetvls
.is_empty ());
2600 gcc_assert (to_delete_vsetvls
.is_empty ());
2602 free_bitmap_vectors ();
2606 vector_infos_manager::create_bitmap_vectors (void)
2608 /* Create the bitmap vectors. */
2609 vector_antic
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2610 vector_exprs
.length ());
2611 vector_transp
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2612 vector_exprs
.length ());
2613 vector_comp
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2614 vector_exprs
.length ());
2615 vector_avin
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2616 vector_exprs
.length ());
2617 vector_avout
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2618 vector_exprs
.length ());
2619 vector_kill
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2620 vector_exprs
.length ());
2621 vector_antin
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2622 vector_exprs
.length ());
2623 vector_antout
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2624 vector_exprs
.length ());
2626 bitmap_vector_ones (vector_transp
, last_basic_block_for_fn (cfun
));
2627 bitmap_vector_clear (vector_antic
, last_basic_block_for_fn (cfun
));
2628 bitmap_vector_clear (vector_comp
, last_basic_block_for_fn (cfun
));
2629 vector_edge_list
= create_edge_list ();
2630 vector_earliest
= sbitmap_vector_alloc (NUM_EDGES (vector_edge_list
),
2631 vector_exprs
.length ());
2635 vector_infos_manager::free_bitmap_vectors (void)
2637 /* Finished. Free up all the things we've allocated. */
2638 free_edge_list (vector_edge_list
);
2640 sbitmap_vector_free (vector_del
);
2642 sbitmap_vector_free (vector_insert
);
2644 sbitmap_vector_free (vector_kill
);
2646 sbitmap_vector_free (vector_antic
);
2648 sbitmap_vector_free (vector_transp
);
2650 sbitmap_vector_free (vector_comp
);
2652 sbitmap_vector_free (vector_avin
);
2654 sbitmap_vector_free (vector_avout
);
2656 sbitmap_vector_free (vector_antin
);
2658 sbitmap_vector_free (vector_antout
);
2659 if (vector_earliest
)
2660 sbitmap_vector_free (vector_earliest
);
2662 vector_edge_list
= nullptr;
2663 vector_kill
= nullptr;
2664 vector_del
= nullptr;
2665 vector_insert
= nullptr;
2666 vector_antic
= nullptr;
2667 vector_transp
= nullptr;
2668 vector_comp
= nullptr;
2669 vector_avin
= nullptr;
2670 vector_avout
= nullptr;
2671 vector_antin
= nullptr;
2672 vector_antout
= nullptr;
2673 vector_earliest
= nullptr;
2677 vector_infos_manager::dump (FILE *file
) const
2682 fprintf (file
, "\n");
2683 FOR_ALL_BB_FN (cfg_bb
, cfun
)
2685 fprintf (file
, "Local vector info of <bb %d>:\n", cfg_bb
->index
);
2686 fprintf (file
, "<HEADER>=");
2687 vector_block_infos
[cfg_bb
->index
].local_dem
.dump (file
);
2688 FOR_BB_INSNS (cfg_bb
, rinsn
)
2690 if (!NONDEBUG_INSN_P (rinsn
) || !has_vtype_op (rinsn
))
2692 fprintf (file
, "<insn %d>=", INSN_UID (rinsn
));
2693 const auto &info
= vector_insn_infos
[INSN_UID (rinsn
)];
2696 fprintf (file
, "<FOOTER>=");
2697 vector_block_infos
[cfg_bb
->index
].reaching_out
.dump (file
);
2698 fprintf (file
, "<Probability>=");
2699 vector_block_infos
[cfg_bb
->index
].probability
.dump (file
);
2700 fprintf (file
, "\n\n");
2703 fprintf (file
, "\n");
2704 FOR_ALL_BB_FN (cfg_bb
, cfun
)
2706 fprintf (file
, "Local properties of <bb %d>:\n", cfg_bb
->index
);
2708 fprintf (file
, "<ANTLOC>=");
2709 if (vector_antic
== nullptr)
2710 fprintf (file
, "(nil)\n");
2712 dump_bitmap_file (file
, vector_antic
[cfg_bb
->index
]);
2714 fprintf (file
, "<AVLOC>=");
2715 if (vector_comp
== nullptr)
2716 fprintf (file
, "(nil)\n");
2718 dump_bitmap_file (file
, vector_comp
[cfg_bb
->index
]);
2720 fprintf (file
, "<TRANSP>=");
2721 if (vector_transp
== nullptr)
2722 fprintf (file
, "(nil)\n");
2724 dump_bitmap_file (file
, vector_transp
[cfg_bb
->index
]);
2726 fprintf (file
, "<KILL>=");
2727 if (vector_kill
== nullptr)
2728 fprintf (file
, "(nil)\n");
2730 dump_bitmap_file (file
, vector_kill
[cfg_bb
->index
]);
2732 fprintf (file
, "<ANTIN>=");
2733 if (vector_antin
== nullptr)
2734 fprintf (file
, "(nil)\n");
2736 dump_bitmap_file (file
, vector_antin
[cfg_bb
->index
]);
2738 fprintf (file
, "<ANTOUT>=");
2739 if (vector_antout
== nullptr)
2740 fprintf (file
, "(nil)\n");
2742 dump_bitmap_file (file
, vector_antout
[cfg_bb
->index
]);
2745 fprintf (file
, "\n");
2746 FOR_ALL_BB_FN (cfg_bb
, cfun
)
2748 fprintf (file
, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2751 fprintf (file
, "<AVIN>=");
2752 if (vector_avin
== nullptr)
2753 fprintf (file
, "(nil)\n");
2755 dump_bitmap_file (file
, vector_avin
[cfg_bb
->index
]);
2757 fprintf (file
, "<AVOUT>=");
2758 if (vector_avout
== nullptr)
2759 fprintf (file
, "(nil)\n");
2761 dump_bitmap_file (file
, vector_avout
[cfg_bb
->index
]);
2763 fprintf (file
, "<DELETE>=");
2764 if (vector_del
== nullptr)
2765 fprintf (file
, "(nil)\n");
2767 dump_bitmap_file (file
, vector_del
[cfg_bb
->index
]);
2770 for (size_t i
= 0; i
< vector_exprs
.length (); i
++)
2772 for (int ed
= 0; ed
< NUM_EDGES (vector_edge_list
); ed
++)
2774 edge eg
= INDEX_EDGE (vector_edge_list
, ed
);
2777 if (bitmap_bit_p (vector_insert
[ed
], i
))
2780 "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2782 "INSERT edge %d from <bb %d> to <bb %d> for VSETVL "
2784 ed
, eg
->src
->index
, eg
->dest
->index
, i
);
2789 if (bitmap_bit_p (vector_earliest
[ed
], i
))
2792 "\nGlobal LCM (Lazy code motion) EARLIEST info:\n");
2795 "EARLIEST edge %d from <bb %d> to <bb %d> for VSETVL "
2797 ed
, eg
->src
->index
, eg
->dest
->index
, i
);
2804 const pass_data pass_data_vsetvl
= {
2805 RTL_PASS
, /* type */
2806 "vsetvl", /* name */
2807 OPTGROUP_NONE
, /* optinfo_flags */
2808 TV_NONE
, /* tv_id */
2809 0, /* properties_required */
2810 0, /* properties_provided */
2811 0, /* properties_destroyed */
2812 0, /* todo_flags_start */
2813 0, /* todo_flags_finish */
2816 class pass_vsetvl
: public rtl_opt_pass
2819 vector_infos_manager
*m_vector_manager
;
2821 const vector_insn_info
&get_vector_info (const rtx_insn
*) const;
2822 const vector_insn_info
&get_vector_info (const insn_info
*) const;
2823 const vector_block_info
&get_block_info (const basic_block
) const;
2824 const vector_block_info
&get_block_info (const bb_info
*) const;
2825 vector_block_info
&get_block_info (const basic_block
);
2826 vector_block_info
&get_block_info (const bb_info
*);
2827 void update_vector_info (const insn_info
*, const vector_insn_info
&);
2828 void update_block_info (int, profile_probability
, const vector_insn_info
&);
2830 void simple_vsetvl (void) const;
2831 void lazy_vsetvl (void);
2834 void compute_local_backward_infos (const bb_info
*);
2837 bool need_vsetvl (const vector_insn_info
&, const vector_insn_info
&) const;
2838 void transfer_before (vector_insn_info
&, insn_info
*) const;
2839 void transfer_after (vector_insn_info
&, insn_info
*) const;
2840 void emit_local_forward_vsetvls (const bb_info
*);
2843 bool earliest_fusion (void);
2844 void vsetvl_fusion (void);
2847 void prune_expressions (void);
2848 void compute_local_properties (void);
2849 bool can_refine_vsetvl_p (const basic_block
, const vector_insn_info
&) const;
2850 void refine_vsetvls (void) const;
2851 void cleanup_vsetvls (void);
2852 bool commit_vsetvls (void);
2853 void pre_vsetvl (void);
2856 rtx_insn
*get_vsetvl_at_end (const bb_info
*, vector_insn_info
*) const;
2857 void local_eliminate_vsetvl_insn (const bb_info
*) const;
2858 bool global_eliminate_vsetvl_insn (const bb_info
*) const;
2859 void ssa_post_optimization (void) const;
2862 void df_post_optimization (void) const;
2866 void compute_probabilities (void);
2869 pass_vsetvl (gcc::context
*ctxt
) : rtl_opt_pass (pass_data_vsetvl
, ctxt
) {}
2871 /* opt_pass methods: */
2872 virtual bool gate (function
*) final override
{ return TARGET_VECTOR
; }
2873 virtual unsigned int execute (function
*) final override
;
2874 }; // class pass_vsetvl
2876 const vector_insn_info
&
2877 pass_vsetvl::get_vector_info (const rtx_insn
*i
) const
2879 return m_vector_manager
->vector_insn_infos
[INSN_UID (i
)];
2882 const vector_insn_info
&
2883 pass_vsetvl::get_vector_info (const insn_info
*i
) const
2885 return m_vector_manager
->vector_insn_infos
[i
->uid ()];
2888 const vector_block_info
&
2889 pass_vsetvl::get_block_info (const basic_block bb
) const
2891 return m_vector_manager
->vector_block_infos
[bb
->index
];
2894 const vector_block_info
&
2895 pass_vsetvl::get_block_info (const bb_info
*bb
) const
2897 return m_vector_manager
->vector_block_infos
[bb
->index ()];
2901 pass_vsetvl::get_block_info (const basic_block bb
)
2903 return m_vector_manager
->vector_block_infos
[bb
->index
];
2907 pass_vsetvl::get_block_info (const bb_info
*bb
)
2909 return m_vector_manager
->vector_block_infos
[bb
->index ()];
2913 pass_vsetvl::update_vector_info (const insn_info
*i
,
2914 const vector_insn_info
&new_info
)
2916 m_vector_manager
->vector_insn_infos
[i
->uid ()] = new_info
;
2920 pass_vsetvl::update_block_info (int index
, profile_probability prob
,
2921 const vector_insn_info
&new_info
)
2923 m_vector_manager
->vector_block_infos
[index
].probability
= prob
;
2924 if (m_vector_manager
->vector_block_infos
[index
].local_dem
2925 == m_vector_manager
->vector_block_infos
[index
].reaching_out
)
2926 m_vector_manager
->vector_block_infos
[index
].local_dem
= new_info
;
2927 m_vector_manager
->vector_block_infos
[index
].reaching_out
= new_info
;
2930 /* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2932 pass_vsetvl::simple_vsetvl (void) const
2936 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2938 n_basic_blocks_for_fn (cfun
), function_name (cfun
));
2942 FOR_ALL_BB_FN (cfg_bb
, cfun
)
2944 FOR_BB_INSNS (cfg_bb
, rinsn
)
2946 if (!NONDEBUG_INSN_P (rinsn
))
2948 if (has_vtype_op (rinsn
))
2950 const auto info
= get_vector_info (rinsn
);
2951 emit_vsetvl_insn (VSETVL_DISCARD_RESULT
, EMIT_BEFORE
, info
,
2958 /* Compute demanded information by backward data-flow analysis. */
2960 pass_vsetvl::compute_local_backward_infos (const bb_info
*bb
)
2962 vector_insn_info change
;
2963 change
.set_empty ();
2965 auto &block_info
= m_vector_manager
->vector_block_infos
[bb
->index ()];
2966 block_info
.reaching_out
= change
;
2968 for (insn_info
*insn
: bb
->reverse_real_nondebug_insns ())
2970 auto &info
= get_vector_info (insn
);
2972 if (info
.uninit_p ())
2973 /* If it is uninitialized, propagate it directly. */
2974 update_vector_info (insn
, change
);
2975 else if (info
.unknown_p ())
2979 gcc_assert (info
.valid_p () && "Unexpected Invalid demanded info");
2980 if (change
.valid_p ())
2982 if (!(propagate_avl_across_demands_p (change
, info
)
2983 && !reg_available_p (insn
, change
))
2984 && change
.compatible_p (info
))
2986 update_vector_info (insn
, change
.local_merge (info
));
2987 /* Fix PR109399, we should update user vsetvl instruction
2988 if there is a change in demand fusion. */
2989 if (vsetvl_insn_p (insn
->rtl ()))
2990 change_vsetvl_insn (insn
, info
);
2997 block_info
.local_dem
= change
;
2998 if (block_info
.local_dem
.empty_p ())
2999 block_info
.reaching_out
= block_info
.local_dem
;
3002 /* Return true if a dem_info is required to transition from curr_info to
3003 require before INSN. */
3005 pass_vsetvl::need_vsetvl (const vector_insn_info
&require
,
3006 const vector_insn_info
&curr_info
) const
3008 if (!curr_info
.valid_p () || curr_info
.unknown_p () || curr_info
.uninit_p ())
3011 if (require
.compatible_p (static_cast<const vl_vtype_info
&> (curr_info
)))
3017 /* Given an incoming state reaching INSN, modifies that state so that it is
3018 minimally compatible with INSN. The resulting state is guaranteed to be
3019 semantically legal for INSN, but may not be the state requested by INSN. */
3021 pass_vsetvl::transfer_before (vector_insn_info
&info
, insn_info
*insn
) const
3023 if (!has_vtype_op (insn
->rtl ()))
3026 const vector_insn_info require
= get_vector_info (insn
);
3027 if (info
.valid_p () && !need_vsetvl (require
, info
))
3032 /* Given a state with which we evaluated insn (see transfer_before above for why
3033 this might be different that the state insn requested), modify the state to
3034 reflect the changes insn might make. */
3036 pass_vsetvl::transfer_after (vector_insn_info
&info
, insn_info
*insn
) const
3038 if (vector_config_insn_p (insn
->rtl ()))
3040 info
= get_vector_info (insn
);
3044 if (fault_first_load_p (insn
->rtl ())
3045 && info
.update_fault_first_load_avl (insn
))
3048 /* If this is something that updates VL/VTYPE that we don't know about, set
3049 the state to unknown. */
3050 if (insn
->is_call () || insn
->is_asm ()
3051 || find_access (insn
->defs (), VL_REGNUM
)
3052 || find_access (insn
->defs (), VTYPE_REGNUM
))
3053 info
= vector_insn_info::get_unknown ();
3056 /* Emit vsetvl within each block by forward data-flow analysis. */
3058 pass_vsetvl::emit_local_forward_vsetvls (const bb_info
*bb
)
3060 auto &block_info
= m_vector_manager
->vector_block_infos
[bb
->index ()];
3061 if (block_info
.local_dem
.empty_p ())
3064 vector_insn_info curr_info
;
3065 for (insn_info
*insn
: bb
->real_nondebug_insns ())
3067 const vector_insn_info prev_info
= curr_info
;
3068 enum vsetvl_type type
= NUM_VSETVL_TYPE
;
3069 transfer_before (curr_info
, insn
);
3071 if (has_vtype_op (insn
->rtl ()))
3073 if (static_cast<const vl_vtype_info
&> (prev_info
)
3074 != static_cast<const vl_vtype_info
&> (curr_info
))
3076 const auto require
= get_vector_info (insn
);
3077 if (!require
.compatible_p (
3078 static_cast<const vl_vtype_info
&> (prev_info
)))
3079 type
= insert_vsetvl (EMIT_BEFORE
, insn
->rtl (), require
,
3084 /* Fix the issue of following sequence:
3088 vmv.x.s (demand AVL = 8).
3090 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
3091 correct: vsetivli zero, 8
3092 vadd (demand AVL = 8). */
3093 if (type
== VSETVL_VTYPE_CHANGE_ONLY
)
3095 /* Update the curr_info to be real correct AVL. */
3096 curr_info
.set_avl_info (prev_info
.get_avl_info ());
3098 transfer_after (curr_info
, insn
);
3101 block_info
.reaching_out
= curr_info
;
3104 /* Assemble the candidates expressions for LCM. */
3106 pass_vsetvl::prune_expressions (void)
3108 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3110 if (m_vector_manager
->vector_block_infos
[bb
->index ()]
3111 .local_dem
.valid_or_dirty_p ())
3112 m_vector_manager
->create_expr (
3113 m_vector_manager
->vector_block_infos
[bb
->index ()].local_dem
);
3114 if (m_vector_manager
->vector_block_infos
[bb
->index ()]
3115 .reaching_out
.valid_or_dirty_p ())
3116 m_vector_manager
->create_expr (
3117 m_vector_manager
->vector_block_infos
[bb
->index ()].reaching_out
);
3122 fprintf (dump_file
, "\nThe total VSETVL expression num = %d\n",
3123 m_vector_manager
->vector_exprs
.length ());
3124 fprintf (dump_file
, "Expression List:\n");
3125 for (size_t i
= 0; i
< m_vector_manager
->vector_exprs
.length (); i
++)
3127 fprintf (dump_file
, "Expr[%ld]:\n", i
);
3128 m_vector_manager
->vector_exprs
[i
]->dump (dump_file
);
3129 fprintf (dump_file
, "\n");
3134 /* Compute the local properties of each recorded expression.
3136 Local properties are those that are defined by the block, irrespective of
3139 An expression is transparent in a block if its operands are not modified
3142 An expression is computed (locally available) in a block if it is computed
3143 at least once and expression would contain the same value if the
3144 computation was moved to the end of the block.
3146 An expression is locally anticipatable in a block if it is computed at
3147 least once and expression would contain the same value if the computation
3148 was moved to the beginning of the block. */
3150 pass_vsetvl::compute_local_properties (void)
3152 /* - If T is locally available at the end of a block, then T' must be
3153 available at the end of the same block. Since some optimization has
3154 occurred earlier, T' might not be locally available, however, it must
3155 have been previously computed on all paths. As a formula, T at AVLOC(B)
3156 implies that T' at AVOUT(B).
3157 An "available occurrence" is one that is the last occurrence in the
3158 basic block and the operands are not modified by following statements in
3159 the basic block [including this insn].
3161 - If T is locally anticipated at the beginning of a block, then either
3162 T', is locally anticipated or it is already available from previous
3163 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3164 ANTLOC(B) at AVIN(B).
3165 An "anticipatable occurrence" is one that is the first occurrence in the
3166 basic block, the operands are not modified in the basic block prior
3167 to the occurrence and the output is not used between the start of
3168 the block and the occurrence. */
3171 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3173 unsigned int curr_bb_idx
= bb
->index ();
3174 if (curr_bb_idx
== ENTRY_BLOCK
|| curr_bb_idx
== EXIT_BLOCK
)
3176 const auto local_dem
3177 = m_vector_manager
->vector_block_infos
[curr_bb_idx
].local_dem
;
3178 const auto reaching_out
3179 = m_vector_manager
->vector_block_infos
[curr_bb_idx
].reaching_out
;
3181 /* Compute transparent. */
3182 for (size_t i
= 0; i
< m_vector_manager
->vector_exprs
.length (); i
++)
3184 const auto *expr
= m_vector_manager
->vector_exprs
[i
];
3185 if (local_dem
.valid_or_dirty_p () || local_dem
.unknown_p ())
3186 bitmap_clear_bit (m_vector_manager
->vector_transp
[curr_bb_idx
], i
);
3187 else if (expr
->has_avl_reg ())
3189 rtx reg
= expr
->get_avl_or_vl_reg ();
3190 for (const insn_info
*insn
: bb
->real_nondebug_insns ())
3192 if (find_access (insn
->defs (), REGNO (reg
)))
3195 m_vector_manager
->vector_transp
[curr_bb_idx
], i
);
3198 else if (vlmax_avl_p (expr
->get_avl ())
3199 && find_access (insn
->uses (), REGNO (reg
)))
3202 m_vector_manager
->vector_transp
[curr_bb_idx
], i
);
3209 /* Compute anticipatable occurrences. */
3210 if (local_dem
.valid_or_dirty_p ())
3211 if (anticipatable_occurrence_p (bb
, local_dem
))
3212 bitmap_set_bit (m_vector_manager
->vector_antic
[curr_bb_idx
],
3213 m_vector_manager
->get_expr_id (local_dem
));
3215 /* Compute available occurrences. */
3216 if (reaching_out
.valid_or_dirty_p ())
3218 auto_vec
<size_t> available_list
3219 = m_vector_manager
->get_all_available_exprs (reaching_out
);
3220 for (size_t i
= 0; i
< available_list
.length (); i
++)
3222 const vector_insn_info
*expr
3223 = m_vector_manager
->vector_exprs
[available_list
[i
]];
3224 if (available_occurrence_p (bb
, *expr
))
3225 bitmap_set_bit (m_vector_manager
->vector_comp
[curr_bb_idx
],
3230 if (loop_basic_block_p (bb
->cfg_bb ()) && local_dem
.valid_or_dirty_p ()
3231 && reaching_out
.valid_or_dirty_p ()
3232 && !local_dem
.compatible_p (reaching_out
))
3233 bitmap_clear_bit (m_vector_manager
->vector_antic
[curr_bb_idx
],
3234 m_vector_manager
->get_expr_id (local_dem
));
3237 /* Compute kill for each basic block using:
3242 FOR_EACH_BB_FN (cfg_bb
, cfun
)
3244 bitmap_ior (m_vector_manager
->vector_kill
[cfg_bb
->index
],
3245 m_vector_manager
->vector_transp
[cfg_bb
->index
],
3246 m_vector_manager
->vector_comp
[cfg_bb
->index
]);
3247 bitmap_not (m_vector_manager
->vector_kill
[cfg_bb
->index
],
3248 m_vector_manager
->vector_kill
[cfg_bb
->index
]);
3251 FOR_EACH_BB_FN (cfg_bb
, cfun
)
3256 /* If the current block is the destination of an abnormal edge, we
3257 kill all trapping (for PRE) and memory (for hoist) expressions
3258 because we won't be able to properly place the instruction on
3259 the edge. So make them neither anticipatable nor transparent.
3260 This is fairly conservative.
3262 ??? For hoisting it may be necessary to check for set-and-jump
3263 instructions here, not just for abnormal edges. The general problem
3264 is that when an expression cannot not be placed right at the end of
3265 a basic block we should account for any side-effects of a subsequent
3266 jump instructions that could clobber the expression. It would
3267 be best to implement this check along the lines of
3268 should_hoist_expr_to_dom where the target block is already known
3269 and, hence, there's no need to conservatively prune expressions on
3270 "intermediate" set-and-jump instructions. */
3271 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
3272 if (e
->flags
& EDGE_COMPLEX
)
3274 bitmap_clear (m_vector_manager
->vector_antic
[cfg_bb
->index
]);
3275 bitmap_clear (m_vector_manager
->vector_transp
[cfg_bb
->index
]);
3280 /* Fuse demand info for earliest edge. */
3282 pass_vsetvl::earliest_fusion (void)
3284 bool changed_p
= false;
3285 for (int ed
= 0; ed
< NUM_EDGES (m_vector_manager
->vector_edge_list
); ed
++)
3287 for (size_t i
= 0; i
< m_vector_manager
->vector_exprs
.length (); i
++)
3289 auto &expr
= *m_vector_manager
->vector_exprs
[i
];
3290 if (expr
.empty_p ())
3292 edge eg
= INDEX_EDGE (m_vector_manager
->vector_edge_list
, ed
);
3293 /* If it is the edge that we never reach, skip its possible PRE
3294 fusion conservatively. */
3295 if (eg
->probability
== profile_probability::never ())
3297 if (eg
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)
3298 || eg
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
3300 if (bitmap_bit_p (m_vector_manager
->vector_earliest
[ed
], i
))
3302 auto &src_block_info
= get_block_info (eg
->src
);
3303 auto &dest_block_info
= get_block_info (eg
->dest
);
3304 if (src_block_info
.reaching_out
.unknown_p ())
3307 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3308 vector_insn_info new_info
= vector_insn_info ();
3309 profile_probability prob
= src_block_info
.probability
;
3310 /* We don't fuse user vsetvl into EMPTY or
3311 DIRTY (EMPTY but polluted) block for these
3314 - The user vsetvl instruction is configured as
3315 no side effects that the previous passes
3316 (GSCE, Loop-invariant, ..., etc)
3317 should be able to do a good job on optimization
3318 of user explicit vsetvls so we don't need to
3319 PRE optimization (The user vsetvls should be
3320 on the optimal local already before this pass)
3321 again for user vsetvls in VSETVL PASS here
3322 (Phase 3 && Phase 4).
3324 - Allowing user vsetvls be optimized in PRE
3325 optimization here (Phase 3 && Phase 4) will
3326 complicate the codes so much so we prefer user
3327 vsetvls be optimized in post-optimization
3328 (Phase 5 && Phase 6). */
3329 if (vsetvl_insn_p (expr
.get_insn ()->rtl ()))
3331 if (src_block_info
.reaching_out
.empty_p ())
3333 else if (src_block_info
.reaching_out
.dirty_p ()
3334 && !src_block_info
.reaching_out
.compatible_p (expr
))
3336 new_info
.set_empty ();
3337 /* Update probability as uninitialized status so that
3338 we won't try to fuse any demand info into such EMPTY
3340 prob
= profile_probability::uninitialized ();
3341 update_block_info (eg
->src
->index
, prob
, new_info
);
3346 if (src_block_info
.reaching_out
.empty_p ())
3348 if (src_block_info
.probability
3349 == profile_probability::uninitialized ())
3351 new_info
= expr
.global_merge (expr
, eg
->src
->index
);
3352 new_info
.set_dirty ();
3353 prob
= dest_block_info
.probability
;
3354 update_block_info (eg
->src
->index
, prob
, new_info
);
3357 else if (src_block_info
.reaching_out
.dirty_p ())
3359 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3360 if (demands_can_be_fused_p (src_block_info
.reaching_out
,
3363 new_info
= src_block_info
.reaching_out
.global_merge (
3364 expr
, eg
->src
->index
);
3365 new_info
.set_dirty ();
3366 prob
+= dest_block_info
.probability
;
3368 else if (!src_block_info
.reaching_out
.compatible_p (expr
)
3369 && !m_vector_manager
->earliest_fusion_worthwhile_p (
3372 new_info
.set_empty ();
3373 prob
= profile_probability::uninitialized ();
3375 else if (!src_block_info
.reaching_out
.compatible_p (expr
)
3376 && dest_block_info
.probability
3377 > src_block_info
.probability
)
3380 new_info
.set_dirty ();
3381 prob
= dest_block_info
.probability
;
3385 update_block_info (eg
->src
->index
, prob
, new_info
);
3392 src_block_info
.reaching_out
.get_insn ()->rtl ())
3393 && vsetvl_dominated_by_p (eg
->src
, expr
,
3394 src_block_info
.reaching_out
,
3397 else if (!demands_can_be_fused_p (src_block_info
.reaching_out
,
3400 else if (!earliest_pred_can_be_fused_p (
3401 crtl
->ssa
->bb (eg
->src
),
3402 src_block_info
.reaching_out
, expr
, &vl
))
3405 vector_insn_info new_info
3406 = src_block_info
.reaching_out
.global_merge (expr
,
3409 prob
= std::max (dest_block_info
.probability
,
3410 src_block_info
.probability
);
3411 change_vsetvl_insn (new_info
.get_insn (), new_info
, vl
);
3412 update_block_info (eg
->src
->index
, prob
, new_info
);
3421 /* Fuse VSETVL demand info according LCM computed location. */
3423 pass_vsetvl::vsetvl_fusion (void)
3425 /* Fuse VSETVL demand info until VSETVL CFG fixed. */
3426 bool changed_p
= true;
3432 prune_expressions ();
3433 m_vector_manager
->create_bitmap_vectors ();
3434 compute_local_properties ();
3435 /* Compute global availability. */
3436 compute_available (m_vector_manager
->vector_comp
,
3437 m_vector_manager
->vector_kill
,
3438 m_vector_manager
->vector_avout
,
3439 m_vector_manager
->vector_avin
);
3440 /* Compute global anticipatability. */
3441 compute_antinout_edge (m_vector_manager
->vector_antic
,
3442 m_vector_manager
->vector_transp
,
3443 m_vector_manager
->vector_antin
,
3444 m_vector_manager
->vector_antout
);
3445 /* Compute earliestness. */
3446 compute_earliest (m_vector_manager
->vector_edge_list
,
3447 m_vector_manager
->vector_exprs
.length (),
3448 m_vector_manager
->vector_antin
,
3449 m_vector_manager
->vector_antout
,
3450 m_vector_manager
->vector_avout
,
3451 m_vector_manager
->vector_kill
,
3452 m_vector_manager
->vector_earliest
);
3453 changed_p
|= earliest_fusion ();
3454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3456 fprintf (dump_file
, "\nEARLIEST fusion %d\n", fusion_no
);
3457 m_vector_manager
->dump (dump_file
);
3459 m_vector_manager
->free_bitmap_vectors ();
3460 if (!m_vector_manager
->vector_exprs
.is_empty ())
3461 m_vector_manager
->vector_exprs
.release ();
3465 /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3467 pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb
,
3468 const vector_insn_info
&info
) const
3470 if (!m_vector_manager
->all_same_ratio_p (
3471 m_vector_manager
->vector_avin
[cfg_bb
->index
]))
3474 if (!m_vector_manager
->all_same_avl_p (
3475 cfg_bb
, m_vector_manager
->vector_avin
[cfg_bb
->index
]))
3479 = bitmap_first_set_bit (m_vector_manager
->vector_avin
[cfg_bb
->index
]);
3480 if (!m_vector_manager
->vector_exprs
[expr_id
]->same_vlmax_p (info
))
3482 if (!m_vector_manager
->vector_exprs
[expr_id
]->compatible_avl_p (info
))
3487 bool all_valid_p
= true;
3488 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
3490 if (bitmap_empty_p (m_vector_manager
->vector_avout
[e
->src
->index
]))
3492 all_valid_p
= false;
3502 /* Optimize athe case like this:
3505 vsetvl 0 a5,zero,e8,mf8
3506 insn 0 (demand SEW + LMUL)
3508 vsetvl 1 a5,zero,e16,mf4
3509 insn 1 (demand SEW + LMUL)
3511 In this case, we should be able to refine
3512 vsetvl 1 into vsetvl zero, zero according AVIN. */
3514 pass_vsetvl::refine_vsetvls (void) const
3517 FOR_EACH_BB_FN (cfg_bb
, cfun
)
3519 auto info
= get_block_info (cfg_bb
).local_dem
;
3520 insn_info
*insn
= info
.get_insn ();
3521 if (!info
.valid_p ())
3524 rtx_insn
*rinsn
= insn
->rtl ();
3525 if (!can_refine_vsetvl_p (cfg_bb
, info
))
3528 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3529 will be used by the following instructions. */
3530 if (vector_config_insn_p (rinsn
))
3532 m_vector_manager
->to_refine_vsetvls
.add (rinsn
);
3536 /* If all incoming edges to a block have a vector state that is compatbile
3537 with the block. In such a case we need not emit a vsetvl in the current
3540 gcc_assert (has_vtype_op (insn
->rtl ()));
3541 rinsn
= PREV_INSN (insn
->rtl ());
3542 gcc_assert (vector_config_insn_p (PREV_INSN (insn
->rtl ())));
3543 if (m_vector_manager
->all_avail_in_compatible_p (cfg_bb
))
3545 size_t id
= m_vector_manager
->get_expr_id (info
);
3546 if (bitmap_bit_p (m_vector_manager
->vector_del
[cfg_bb
->index
], id
))
3548 eliminate_insn (rinsn
);
3553 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY
, info
, NULL_RTX
);
3554 change_insn (rinsn
, new_pat
);
3560 pass_vsetvl::cleanup_vsetvls ()
3563 FOR_EACH_BB_FN (cfg_bb
, cfun
)
3565 auto &info
= get_block_info (cfg_bb
).reaching_out
;
3566 gcc_assert (m_vector_manager
->expr_set_num (
3567 m_vector_manager
->vector_del
[cfg_bb
->index
])
3569 for (size_t i
= 0; i
< m_vector_manager
->vector_exprs
.length (); i
++)
3571 if (bitmap_bit_p (m_vector_manager
->vector_del
[cfg_bb
->index
], i
))
3573 if (info
.dirty_p ())
3574 info
.set_unknown ();
3577 const auto dem
= get_block_info (cfg_bb
).local_dem
;
3578 gcc_assert (dem
== *m_vector_manager
->vector_exprs
[i
]);
3579 insn_info
*insn
= dem
.get_insn ();
3580 gcc_assert (insn
&& insn
->rtl ());
3582 /* We can't eliminate user vsetvl since the dest will be used
3583 * by the following instructions. */
3584 if (vector_config_insn_p (insn
->rtl ()))
3586 m_vector_manager
->to_delete_vsetvls
.add (insn
->rtl ());
3590 gcc_assert (has_vtype_op (insn
->rtl ()));
3591 rinsn
= PREV_INSN (insn
->rtl ());
3592 gcc_assert (vector_config_insn_p (PREV_INSN (insn
->rtl ())));
3593 eliminate_insn (rinsn
);
3601 pass_vsetvl::commit_vsetvls (void)
3603 bool need_commit
= false;
3605 for (int ed
= 0; ed
< NUM_EDGES (m_vector_manager
->vector_edge_list
); ed
++)
3607 for (size_t i
= 0; i
< m_vector_manager
->vector_exprs
.length (); i
++)
3609 edge eg
= INDEX_EDGE (m_vector_manager
->vector_edge_list
, ed
);
3610 if (bitmap_bit_p (m_vector_manager
->vector_insert
[ed
], i
))
3612 const vector_insn_info
*require
3613 = m_vector_manager
->vector_exprs
[i
];
3614 gcc_assert (require
->valid_or_dirty_p ());
3615 rtl_profile_for_edge (eg
);
3618 insn_info
*insn
= require
->get_insn ();
3619 vector_insn_info prev_info
= vector_insn_info ();
3620 sbitmap bitdata
= m_vector_manager
->vector_avout
[eg
->src
->index
];
3621 if (m_vector_manager
->all_same_ratio_p (bitdata
)
3622 && m_vector_manager
->all_same_avl_p (eg
->dest
, bitdata
))
3624 size_t first
= bitmap_first_set_bit (bitdata
);
3625 prev_info
= *m_vector_manager
->vector_exprs
[first
];
3628 insert_vsetvl (EMIT_DIRECT
, insn
->rtl (), *require
, prev_info
);
3629 rtx_insn
*rinsn
= get_insns ();
3631 default_rtl_profile ();
3633 /* We should not get an abnormal edge here. */
3634 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3636 insert_insn_on_edge (rinsn
, eg
);
3641 "\nInsert vsetvl insn %d at edge %d from <bb %d> to "
3643 INSN_UID (rinsn
), ed
, eg
->src
->index
,
3645 print_rtl_single (dump_file
, rinsn
);
3651 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3653 basic_block cfg_bb
= bb
->cfg_bb ();
3654 const auto reaching_out
= get_block_info (cfg_bb
).reaching_out
;
3655 if (!reaching_out
.dirty_p ())
3659 if (!reaching_out
.demand_p (DEMAND_AVL
))
3661 vl_vtype_info new_info
= reaching_out
;
3662 new_info
.set_avl_info (avl_info (const0_rtx
, nullptr));
3663 new_pat
= gen_vsetvl_pat (VSETVL_DISCARD_RESULT
, new_info
, NULL_RTX
);
3665 else if (can_refine_vsetvl_p (cfg_bb
, reaching_out
))
3667 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY
, reaching_out
, NULL_RTX
);
3668 else if (vlmax_avl_p (reaching_out
.get_avl ()))
3670 rtx vl
= reaching_out
.get_avl_or_vl_reg ();
3671 new_pat
= gen_vsetvl_pat (VSETVL_NORMAL
, reaching_out
, vl
);
3675 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT
, reaching_out
, NULL_RTX
);
3678 edge_iterator eg_iterator
;
3679 FOR_EACH_EDGE (eg
, eg_iterator
, cfg_bb
->succs
)
3681 /* We should not get an abnormal edge here. */
3682 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3683 /* We failed to optimize this case in Phase 3 (earliest fusion):
3685 bb 2: vsetvl a5, a3 ...
3687 bb 3: vsetvl a5, a2 ...
3689 bb 4: vsetvli zero, a5 ---> Redundant, should be elided.
3691 Since "a5" value can come from either bb 2 or bb 3, we can't make
3692 it optimized in Phase 3 which will make phase 3 so complicated.
3693 Now, we do post optimization here to elide the redundant VSETVL
3695 if (m_vector_manager
->vsetvl_dominated_by_all_preds_p (cfg_bb
,
3700 emit_insn (copy_rtx (new_pat
));
3701 rtx_insn
*rinsn
= get_insns ();
3704 insert_insn_on_edge (rinsn
, eg
);
3709 "\nInsert vsetvl insn %d from <bb %d> to <bb %d>:\n",
3710 INSN_UID (rinsn
), cfg_bb
->index
, eg
->dest
->index
);
3711 print_rtl_single (dump_file
, rinsn
);
3720 pass_vsetvl::pre_vsetvl (void)
3722 /* Compute entity list. */
3723 prune_expressions ();
3725 m_vector_manager
->create_bitmap_vectors ();
3726 compute_local_properties ();
3727 m_vector_manager
->vector_edge_list
= pre_edge_lcm_avs (
3728 m_vector_manager
->vector_exprs
.length (), m_vector_manager
->vector_transp
,
3729 m_vector_manager
->vector_comp
, m_vector_manager
->vector_antic
,
3730 m_vector_manager
->vector_kill
, m_vector_manager
->vector_avin
,
3731 m_vector_manager
->vector_avout
, &m_vector_manager
->vector_insert
,
3732 &m_vector_manager
->vector_del
);
3734 /* We should dump the information before CFG is changed. Otherwise it will
3735 produce ICE (internal compiler error). */
3736 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3737 m_vector_manager
->dump (dump_file
);
3741 bool need_commit
= commit_vsetvls ();
3743 commit_edge_insertions ();
3746 /* Some instruction can not be accessed in RTL_SSA when we don't re-init
3747 the new RTL_SSA framework but it is definetely at the END of the block.
3749 Here we optimize the VSETVL is hoisted by LCM:
3753 vsetvli a5,a2,e32,m1,ta,mu
3755 vsetvli zero,a5,e32,m1,ta,mu
3760 vsetvli a5,a2,e32,m1,ta,mu
3761 LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
3766 pass_vsetvl::get_vsetvl_at_end (const bb_info
*bb
, vector_insn_info
*dem
) const
3768 rtx_insn
*end_vsetvl
= BB_END (bb
->cfg_bb ());
3769 if (end_vsetvl
&& NONDEBUG_INSN_P (end_vsetvl
))
3771 if (JUMP_P (end_vsetvl
))
3772 end_vsetvl
= PREV_INSN (end_vsetvl
);
3774 if (NONDEBUG_INSN_P (end_vsetvl
)
3775 && vsetvl_discard_result_insn_p (end_vsetvl
))
3777 /* Only handle single succ. here, multiple succ. is much
3778 more complicated. */
3779 if (single_succ_p (bb
->cfg_bb ()))
3781 edge e
= single_succ_edge (bb
->cfg_bb ());
3782 *dem
= get_block_info (e
->dest
).local_dem
;
3790 /* This predicator should only used within same basic block. */
3792 local_avl_compatible_p (rtx avl1
, rtx avl2
)
3794 if (!REG_P (avl1
) || !REG_P (avl2
))
3797 return REGNO (avl1
) == REGNO (avl2
);
3800 /* Local user vsetvl optimizaiton:
3805 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
3808 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3810 vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
3812 pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info
*bb
) const
3814 rtx_insn
*prev_vsetvl
= nullptr;
3815 rtx_insn
*curr_vsetvl
= nullptr;
3816 rtx vl_placeholder
= RVV_VLMAX
;
3817 rtx prev_avl
= vl_placeholder
;
3818 rtx curr_avl
= vl_placeholder
;
3819 vector_insn_info prev_dem
;
3821 /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
3822 found those instruciton. */
3823 if (rtx_insn
*end_vsetvl
= get_vsetvl_at_end (bb
, &prev_dem
))
3825 prev_avl
= get_avl (end_vsetvl
);
3826 prev_vsetvl
= end_vsetvl
;
3829 bool skip_one
= false;
3830 /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
3831 compatible with current vsetvl (curr_avl), and merge the vtype and avl
3832 info. into current vsetvl. */
3833 for (insn_info
*insn
: bb
->reverse_real_nondebug_insns ())
3835 rtx_insn
*rinsn
= insn
->rtl ();
3836 const auto &curr_dem
= get_vector_info (insn
);
3837 bool need_invalidate
= false;
3839 /* Skip if this insn already handled in last iteration. */
3846 if (vsetvl_insn_p (rinsn
))
3848 curr_vsetvl
= rinsn
;
3849 /* vsetvl are using vl rather than avl since it will try to merge
3850 with other vsetvl_discard_result.
3853 vsetvl a5,a4,e8,mf8 # vsetvl
3855 vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
3858 curr_avl
= get_vl (rinsn
);
3859 /* vsetvl is a cut point of local backward vsetvl elimination. */
3860 need_invalidate
= true;
3862 else if (has_vtype_op (rinsn
) && NONDEBUG_INSN_P (PREV_INSN (rinsn
))
3863 && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn
))
3864 || vsetvl_insn_p (PREV_INSN (rinsn
))))
3866 curr_vsetvl
= PREV_INSN (rinsn
);
3868 if (vsetvl_insn_p (PREV_INSN (rinsn
)))
3870 /* Need invalidate and skip if it's vsetvl. */
3871 need_invalidate
= true;
3872 /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
3873 * so only need to skip for vsetvl. */
3877 curr_avl
= curr_dem
.get_avl ();
3879 /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
3880 the avl is null, use vl_placeholder for unify the handling
3883 curr_avl
= vl_placeholder
;
3885 else if (insn
->is_call () || insn
->is_asm ()
3886 || find_access (insn
->defs (), VL_REGNUM
)
3887 || find_access (insn
->defs (), VTYPE_REGNUM
)
3888 || (REG_P (prev_avl
)
3889 && find_access (insn
->defs (), REGNO (prev_avl
))))
3891 /* Invalidate if this insn can't propagate vl, vtype or avl. */
3892 need_invalidate
= true;
3893 prev_dem
= vector_insn_info ();
3896 /* Not interested instruction. */
3899 /* Local AVL compatibility checking is simpler than global, we only
3900 need to check the REGNO is same. */
3901 if (prev_dem
.valid_or_dirty_p ()
3902 && prev_dem
.skip_avl_compatible_p (curr_dem
)
3903 && local_avl_compatible_p (prev_avl
, curr_avl
))
3905 /* curr_dem and prev_dem is compatible! */
3906 /* Update avl info since we need to make sure they are fully
3907 compatible before merge. */
3908 prev_dem
.set_avl_info (curr_dem
.get_avl_info ());
3909 /* Merge both and update into curr_vsetvl. */
3910 prev_dem
= curr_dem
.local_merge (prev_dem
);
3911 change_vsetvl_insn (curr_dem
.get_insn (), prev_dem
);
3912 /* Then we can drop prev_vsetvl. */
3913 eliminate_insn (prev_vsetvl
);
3916 if (need_invalidate
)
3918 prev_vsetvl
= nullptr;
3919 curr_vsetvl
= nullptr;
3920 prev_avl
= vl_placeholder
;
3921 curr_avl
= vl_placeholder
;
3922 prev_dem
= vector_insn_info ();
3926 prev_vsetvl
= curr_vsetvl
;
3927 prev_avl
= curr_avl
;
3928 prev_dem
= curr_dem
;
3933 /* Return the first vsetvl instruction in CFG_BB or NULL if
3934 none exists or if a user RVV instruction is enountered
3935 prior to any vsetvl. */
3937 get_first_vsetvl_before_rvv_insns (basic_block cfg_bb
,
3938 enum vsetvl_type insn_type
)
3940 gcc_assert (insn_type
== VSETVL_DISCARD_RESULT
3941 || insn_type
== VSETVL_VTYPE_CHANGE_ONLY
);
3943 FOR_BB_INSNS (cfg_bb
, rinsn
)
3945 if (!NONDEBUG_INSN_P (rinsn
))
3947 /* If we don't find any inserted vsetvli before user RVV instructions,
3948 we don't need to optimize the vsetvls in this block. */
3949 if (has_vtype_op (rinsn
) || vsetvl_insn_p (rinsn
))
3952 if (insn_type
== VSETVL_DISCARD_RESULT
3953 && vsetvl_discard_result_insn_p (rinsn
))
3955 if (insn_type
== VSETVL_VTYPE_CHANGE_ONLY
3956 && vsetvl_vtype_change_only_p (rinsn
))
3962 /* Global user vsetvl optimizaiton:
3970 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
3974 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3978 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
3982 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3986 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3990 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
3993 pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info
*bb
) const
3995 rtx_insn
*vsetvl_rinsn
= NULL
;
3996 vector_insn_info dem
= vector_insn_info ();
3997 const auto &block_info
= get_block_info (bb
);
3998 basic_block cfg_bb
= bb
->cfg_bb ();
4000 if (block_info
.local_dem
.valid_or_dirty_p ())
4002 /* Optimize the local vsetvl. */
4003 dem
= block_info
.local_dem
;
4005 = get_first_vsetvl_before_rvv_insns (cfg_bb
, VSETVL_DISCARD_RESULT
);
4008 /* Optimize the global vsetvl inserted by LCM. */
4009 vsetvl_rinsn
= get_vsetvl_at_end (bb
, &dem
);
4011 /* No need to optimize if block doesn't have vsetvl instructions. */
4012 if (!dem
.valid_or_dirty_p () || !vsetvl_rinsn
|| !dem
.get_avl_source ()
4013 || !dem
.has_avl_reg ())
4016 /* Condition 1: Check it has preds. */
4017 if (EDGE_COUNT (cfg_bb
->preds
) == 0)
4020 /* If all preds has VL/VTYPE status setted by user vsetvls, and these
4021 user vsetvls are all skip_avl_compatible_p with the vsetvl in this
4022 block, we can eliminate this vsetvl instruction. */
4023 sbitmap avin
= m_vector_manager
->vector_avin
[cfg_bb
->index
];
4025 unsigned int bb_index
;
4026 sbitmap_iterator sbi
;
4027 rtx avl
= dem
.get_avl ();
4028 hash_set
<set_info
*> sets
4029 = get_all_sets (dem
.get_avl_source (), true, false, false);
4030 /* Condition 2: All VL/VTYPE available in are all compatible. */
4031 EXECUTE_IF_SET_IN_BITMAP (avin
, 0, bb_index
, sbi
)
4033 const auto &expr
= m_vector_manager
->vector_exprs
[bb_index
];
4034 const auto &insn
= expr
->get_insn ();
4035 def_info
*def
= find_access (insn
->defs (), REGNO (avl
));
4036 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
4037 if (!vsetvl_insn_p (insn
->rtl ()) || insn
->bb () == bb
4038 || !sets
.contains (set
))
4042 /* Condition 3: We don't do the global optimization for the block
4043 has a pred is entry block or exit block. */
4044 /* Condition 4: All preds have available VL/VTYPE out. */
4047 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
4049 sbitmap avout
= m_vector_manager
->vector_avout
[e
->src
->index
];
4050 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)
4051 || e
->src
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
4052 || (unsigned int) e
->src
->index
4053 >= m_vector_manager
->vector_block_infos
.length ()
4054 || bitmap_empty_p (avout
))
4057 EXECUTE_IF_SET_IN_BITMAP (avout
, 0, bb_index
, sbi
)
4059 const auto &expr
= m_vector_manager
->vector_exprs
[bb_index
];
4060 const auto &insn
= expr
->get_insn ();
4061 def_info
*def
= find_access (insn
->defs (), REGNO (avl
));
4062 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
4063 if (!vsetvl_insn_p (insn
->rtl ()) || insn
->bb () == bb
4064 || !sets
.contains (set
) || !expr
->skip_avl_compatible_p (dem
))
4069 /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */
4070 auto_vec
<basic_block
> pred_cfg_bbs
4071 = get_dominated_by (CDI_POST_DOMINATORS
, cfg_bb
);
4072 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
4074 sbitmap avout
= m_vector_manager
->vector_avout
[e
->src
->index
];
4075 EXECUTE_IF_SET_IN_BITMAP (avout
, 0, bb_index
, sbi
)
4077 vector_insn_info prev_dem
= *m_vector_manager
->vector_exprs
[bb_index
];
4078 vector_insn_info curr_dem
= dem
;
4079 insn_info
*insn
= prev_dem
.get_insn ();
4080 if (!pred_cfg_bbs
.contains (insn
->bb ()->cfg_bb ()))
4082 /* Update avl info since we need to make sure they are fully
4083 compatible before merge. */
4084 curr_dem
.set_avl_info (prev_dem
.get_avl_info ());
4085 /* Merge both and update into curr_vsetvl. */
4086 prev_dem
= curr_dem
.local_merge (prev_dem
);
4087 change_vsetvl_insn (insn
, prev_dem
);
4091 /* Step2: eliminate the vsetvl instruction. */
4092 eliminate_insn (vsetvl_rinsn
);
4096 /* This function does the following post optimization base on RTL_SSA:
4098 1. Local user vsetvl optimizations.
4099 2. Global user vsetvl optimizations.
4100 3. AVL dependencies removal:
4101 Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
4102 implicitly. Since we will emit VSETVL instruction and make RVV
4103 instructions depending on VL/VTYPE global status registers, we remove the
4104 such AVL operand in the RVV instructions pattern here in order to remove
4105 AVL dependencies when AVL operand is a register operand.
4107 Before the VSETVL PASS:
4111 After the VSETVL PASS:
4113 vsetvli zero, a5, ...
4115 vadd.vv (..., const_int 0). */
4117 pass_vsetvl::ssa_post_optimization (void) const
4119 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
4121 local_eliminate_vsetvl_insn (bb
);
4122 bool changed_p
= true;
4126 changed_p
|= global_eliminate_vsetvl_insn (bb
);
4128 for (insn_info
*insn
: bb
->real_nondebug_insns ())
4130 rtx_insn
*rinsn
= insn
->rtl ();
4131 if (vlmax_avl_insn_p (rinsn
))
4133 eliminate_insn (rinsn
);
4137 /* Erase the AVL operand from the instruction. */
4138 if (!has_vl_op (rinsn
) || !REG_P (get_vl (rinsn
)))
4140 rtx avl
= get_vl (rinsn
);
4141 if (count_regno_occurrences (rinsn
, REGNO (avl
)) == 1)
4143 /* Get the list of uses for the new instruction. */
4144 auto attempt
= crtl
->ssa
->new_change_attempt ();
4145 insn_change
change (insn
);
4146 /* Remove the use of the substituted value. */
4147 access_array_builder
uses_builder (attempt
);
4148 uses_builder
.reserve (insn
->num_uses () - 1);
4149 for (use_info
*use
: insn
->uses ())
4150 if (use
!= find_access (insn
->uses (), REGNO (avl
)))
4151 uses_builder
.quick_push (use
);
4152 use_array new_uses
= use_array (uses_builder
.finish ());
4153 change
.new_uses
= new_uses
;
4154 change
.move_range
= insn
->ebb ()->insn_range ();
4156 if (fault_first_load_p (rinsn
))
4157 pat
= simplify_replace_rtx (PATTERN (rinsn
), avl
, const0_rtx
);
4160 rtx set
= single_set (rinsn
);
4162 = simplify_replace_rtx (SET_SRC (set
), avl
, const0_rtx
);
4163 pat
= gen_rtx_SET (SET_DEST (set
), src
);
4165 bool ok
= change_insn (crtl
->ssa
, change
, insn
, pat
);
4172 /* Return true if the SET result is not used by any instructions. */
4174 has_no_uses (basic_block cfg_bb
, rtx_insn
*rinsn
, int regno
)
4176 /* Handle the following case that can not be detected in RTL_SSA. */
4183 The use of "a6" is removed from "vadd" but the information is
4184 not updated in RTL_SSA framework. We don't want to re-new
4185 a new RTL_SSA which is expensive, instead, we use data-flow
4186 analysis to check whether "a6" has no uses. */
4187 if (bitmap_bit_p (df_get_live_out (cfg_bb
), regno
))
4191 for (iter
= NEXT_INSN (rinsn
); iter
&& iter
!= NEXT_INSN (BB_END (cfg_bb
));
4192 iter
= NEXT_INSN (iter
))
4193 if (df_find_use (iter
, regno_reg_rtx
[regno
]))
4199 /* This function does the following post optimization base on dataflow
4202 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any
4203 nondebug instructions. Even though this PASS runs after RA and it doesn't
4204 help for reduce register pressure, it can help instructions scheduling since
4205 we remove the dependencies.
4207 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5
4208 (AVL dependencies removal). */
4210 pass_vsetvl::df_post_optimization (void) const
4213 hash_set
<rtx_insn
*> to_delete
;
4216 FOR_ALL_BB_FN (cfg_bb
, cfun
)
4218 FOR_BB_INSNS (cfg_bb
, rinsn
)
4220 if (NONDEBUG_INSN_P (rinsn
) && vsetvl_insn_p (rinsn
))
4222 rtx vl
= get_vl (rinsn
);
4223 vector_insn_info info
;
4224 info
.parse_insn (rinsn
);
4225 bool to_delete_p
= m_vector_manager
->to_delete_p (rinsn
);
4226 bool to_refine_p
= m_vector_manager
->to_refine_p (rinsn
);
4227 if (has_no_uses (cfg_bb
, rinsn
, REGNO (vl
)))
4230 to_delete
.add (rinsn
);
4231 else if (to_refine_p
)
4233 rtx new_pat
= gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY
,
4235 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
,
4238 else if (!vlmax_avl_p (info
.get_avl ()))
4240 rtx new_pat
= gen_vsetvl_pat (VSETVL_DISCARD_RESULT
, info
,
4242 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
,
4249 for (rtx_insn
*rinsn
: to_delete
)
4250 eliminate_insn (rinsn
);
4254 pass_vsetvl::init (void)
4258 /* Initialization of RTL_SSA. */
4259 calculate_dominance_info (CDI_DOMINATORS
);
4260 calculate_dominance_info (CDI_POST_DOMINATORS
);
4262 crtl
->ssa
= new function_info (cfun
);
4265 m_vector_manager
= new vector_infos_manager ();
4266 compute_probabilities ();
4268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4270 fprintf (dump_file
, "\nPrologue: Initialize vector infos\n");
4271 m_vector_manager
->dump (dump_file
);
4276 pass_vsetvl::done (void)
4280 /* Finalization of RTL_SSA. */
4281 free_dominance_info (CDI_DOMINATORS
);
4282 free_dominance_info (CDI_POST_DOMINATORS
);
4283 if (crtl
->ssa
->perform_pending_updates ())
4286 crtl
->ssa
= nullptr;
4288 m_vector_manager
->release ();
4289 delete m_vector_manager
;
4290 m_vector_manager
= nullptr;
4293 /* Compute probability for each block. */
4295 pass_vsetvl::compute_probabilities (void)
4297 /* Don't compute it in -O0 since we don't need it. */
4303 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
4305 basic_block cfg_bb
= bb
->cfg_bb ();
4306 auto &curr_prob
= get_block_info (cfg_bb
).probability
;
4308 /* GCC assume entry block (bb 0) are always so
4309 executed so set its probability as "always". */
4310 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
4311 curr_prob
= profile_probability::always ();
4312 /* Exit block (bb 1) is the block we don't need to process. */
4313 if (EXIT_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
4316 gcc_assert (curr_prob
.initialized_p ());
4317 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
4319 auto &new_prob
= get_block_info (e
->dest
).probability
;
4320 /* Normally, the edge probability should be initialized.
4321 However, some special testing code which is written in
4322 GIMPLE IR style force the edge probility uninitialized,
4323 we conservatively set it as never so that it will not
4324 affect PRE (Phase 3 && Phse 4). */
4325 if (!e
->probability
.initialized_p ())
4326 new_prob
= profile_probability::never ();
4327 else if (!new_prob
.initialized_p ())
4328 new_prob
= curr_prob
* e
->probability
;
4329 else if (new_prob
== profile_probability::always ())
4332 new_prob
+= curr_prob
* e
->probability
;
4337 /* Lazy vsetvl insertion for optimize > 0. */
4339 pass_vsetvl::lazy_vsetvl (void)
4343 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4345 n_basic_blocks_for_fn (cfun
), function_name (cfun
));
4347 /* Phase 1 - Compute the local dems within each block.
4348 The data-flow analysis within each block is backward analysis. */
4350 fprintf (dump_file
, "\nPhase 1: Compute local backward vector infos\n");
4351 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
4352 compute_local_backward_infos (bb
);
4353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4354 m_vector_manager
->dump (dump_file
);
4356 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4357 demand, compute and save ANTLOC && AVLOC of each block. */
4360 "\nPhase 2: Emit vsetvl instruction within each block\n");
4361 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
4362 emit_local_forward_vsetvls (bb
);
4363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4364 m_vector_manager
->dump (dump_file
);
4366 /* Phase 3 - Propagate demanded info across blocks. */
4368 fprintf (dump_file
, "\nPhase 3: Demands propagation across blocks\n");
4371 /* Phase 4 - Lazy code motion. */
4373 fprintf (dump_file
, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4376 /* Phase 5 - Post optimization base on RTL_SSA. */
4378 fprintf (dump_file
, "\nPhase 5: Post optimization base on RTL_SSA\n");
4379 ssa_post_optimization ();
4381 /* Phase 6 - Post optimization base on data-flow analysis. */
4384 "\nPhase 6: Post optimization base on data-flow analysis\n");
4385 df_post_optimization ();
4388 /* Main entry point for this pass. */
4390 pass_vsetvl::execute (function
*)
4392 if (n_basic_blocks_for_fn (cfun
) <= 0)
4395 /* The RVV instruction may change after split which is not a stable
4396 instruction. We need to split it here to avoid potential issue
4397 since the VSETVL PASS is insert before split PASS. */
4400 /* Early return for there is no vector instructions. */
4401 if (!has_vector_insn (cfun
))
4416 make_pass_vsetvl (gcc::context
*ctxt
)
4418 return new pass_vsetvl (ctxt
);