RISC-V: Fix AVL/VL bug of VSETVL PASS[PR111548]
[official-gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
blob7af33e7ea6f96bb171e7f54eab008b2b5c4c28e0
1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
24 Strategy:
26 - Backward demanded info fusion within block.
28 - Lazy code motion (LCM) based demanded info backward propagation.
30 - RTL_SSA framework for def-use, PHI analysis.
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
34 Assumption:
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
38 This pass consists of 5 phases:
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
46 - Phase 3 - LCM Earliest-edge baseed VSETVL demand fusion.
48 - Phase 4 - Lazy code motion including: compute local properties,
49 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
52 used any more and VL operand of VSETVL instruction if it is not used by
53 any non-debug instructions.
55 - Phase 6 - DF based post VSETVL optimizations.
57 Implementation:
59 - The subroutine of optimize == 0 is simple_vsetvl.
60 This function simplily vsetvl insertion for each RVV
61 instruction. No optimization.
63 - The subroutine of optimize > 0 is lazy_vsetvl.
64 This function optimize vsetvl insertion process by
65 lazy code motion (LCM) layering on RTL_SSA.
67 - get_avl (), get_insn (), get_avl_source ():
69 1. get_insn () is the current instruction, find_access (get_insn
70 ())->def is the same as get_avl_source () if get_insn () demand VL.
71 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
72 ()->regno ().
73 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
76 #define IN_TARGET_CODE 1
77 #define INCLUDE_ALGORITHM
78 #define INCLUDE_FUNCTIONAL
80 #include "config.h"
81 #include "system.h"
82 #include "coretypes.h"
83 #include "tm.h"
84 #include "backend.h"
85 #include "rtl.h"
86 #include "target.h"
87 #include "tree-pass.h"
88 #include "df.h"
89 #include "rtl-ssa.h"
90 #include "cfgcleanup.h"
91 #include "insn-config.h"
92 #include "insn-attr.h"
93 #include "insn-opinit.h"
94 #include "tm-constrs.h"
95 #include "cfgrtl.h"
96 #include "cfganal.h"
97 #include "lcm.h"
98 #include "predict.h"
99 #include "profile-count.h"
100 #include "gcse.h"
101 #include "riscv-vsetvl.h"
103 using namespace rtl_ssa;
104 using namespace riscv_vector;
106 static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
107 static CONSTEXPR const vlmul_type ALL_LMUL[]
108 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
110 DEBUG_FUNCTION void
111 debug (const vector_insn_info *info)
113 info->dump (stderr);
116 DEBUG_FUNCTION void
117 debug (const vector_infos_manager *info)
119 info->dump (stderr);
122 static bool
123 vlmax_avl_p (rtx x)
125 return x && rtx_equal_p (x, RVV_VLMAX);
128 static bool
129 vlmax_avl_insn_p (rtx_insn *rinsn)
131 return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
135 /* Return true if the block is a loop itself:
136 local_dem
137 __________
138 ____|____ |
139 | | |
140 |________| |
141 |_________|
142 reaching_out
144 static bool
145 loop_basic_block_p (const basic_block cfg_bb)
147 if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
149 edge e;
150 edge_iterator ei;
151 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
152 if (e->dest->index == cfg_bb->index)
153 return true;
155 return false;
158 /* Return true if it is an RVV instruction depends on VTYPE global
159 status register. */
160 static bool
161 has_vtype_op (rtx_insn *rinsn)
163 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
166 /* Return true if it is an RVV instruction depends on VL global
167 status register. */
168 static bool
169 has_vl_op (rtx_insn *rinsn)
171 return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
174 /* Is this a SEW value that can be encoded into the VTYPE format. */
175 static bool
176 valid_sew_p (size_t sew)
178 return exact_log2 (sew) && sew >= 8 && sew <= 64;
181 /* Return true if the instruction ignores VLMUL field of VTYPE. */
182 static bool
183 ignore_vlmul_insn_p (rtx_insn *rinsn)
185 return get_attr_type (rinsn) == TYPE_VIMOVVX
186 || get_attr_type (rinsn) == TYPE_VFMOVVF
187 || get_attr_type (rinsn) == TYPE_VIMOVXV
188 || get_attr_type (rinsn) == TYPE_VFMOVFV;
191 /* Return true if the instruction is scalar move instruction. */
192 static bool
193 scalar_move_insn_p (rtx_insn *rinsn)
195 return get_attr_type (rinsn) == TYPE_VIMOVXV
196 || get_attr_type (rinsn) == TYPE_VFMOVFV;
199 /* Return true if the instruction is fault first load instruction. */
200 static bool
201 fault_first_load_p (rtx_insn *rinsn)
203 return recog_memoized (rinsn) >= 0
204 && (get_attr_type (rinsn) == TYPE_VLDFF
205 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
208 /* Return true if the instruction is read vl instruction. */
209 static bool
210 read_vl_insn_p (rtx_insn *rinsn)
212 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
215 /* Return true if it is a vsetvl instruction. */
216 static bool
217 vector_config_insn_p (rtx_insn *rinsn)
219 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
222 /* Return true if it is vsetvldi or vsetvlsi. */
223 static bool
224 vsetvl_insn_p (rtx_insn *rinsn)
226 if (!vector_config_insn_p (rinsn))
227 return false;
228 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
229 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
232 /* Return true if it is vsetvl zero, rs1. */
233 static bool
234 vsetvl_discard_result_insn_p (rtx_insn *rinsn)
236 if (!vector_config_insn_p (rinsn))
237 return false;
238 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
239 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
242 /* Return true if it is vsetvl zero, zero. */
243 static bool
244 vsetvl_vtype_change_only_p (rtx_insn *rinsn)
246 if (!vector_config_insn_p (rinsn))
247 return false;
248 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only);
251 static bool
252 after_or_same_p (const insn_info *insn1, const insn_info *insn2)
254 return insn1->compare_with (insn2) >= 0;
257 static bool
258 real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
260 return insn != nullptr && insn->is_real () && insn->bb () == bb;
263 static bool
264 before_p (const insn_info *insn1, const insn_info *insn2)
266 return insn1->compare_with (insn2) < 0;
269 /* Helper function to get VL operand. */
270 static rtx
271 get_vl (rtx_insn *rinsn)
273 if (has_vl_op (rinsn))
275 extract_insn_cached (rinsn);
276 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
278 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
281 /* An "anticipatable occurrence" is one that is the first occurrence in the
282 basic block, the operands are not modified in the basic block prior
283 to the occurrence and the output is not used between the start of
284 the block and the occurrence.
286 For VSETVL instruction, we have these following formats:
287 1. vsetvl zero, rs1.
288 2. vsetvl zero, imm.
289 3. vsetvl rd, rs1.
291 So base on these circumstances, a DEM is considered as a local anticipatable
292 occurrence should satisfy these following conditions:
294 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
295 2). rd (vl) are not modified in the basic block prior to the VSETVL.
296 3). rd (vl) is not used between the start of the block and the occurrence.
298 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
299 is modified prior to the occurrence. This case is already considered as
300 a non-local anticipatable occurrence.
302 static bool
303 anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
305 insn_info *insn = dem.get_insn ();
306 /* The only possible operand we care of VSETVL is AVL. */
307 if (dem.has_avl_reg ())
309 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
310 rtx avl
311 = has_vl_op (insn->rtl ()) ? get_vl (insn->rtl ()) : dem.get_avl ();
312 if (dem.dirty_p ())
314 gcc_assert (!vsetvl_insn_p (insn->rtl ()));
316 /* Earliest VSETVL will be inserted at the end of the block. */
317 for (const insn_info *i : bb->real_nondebug_insns ())
319 /* rs1 (avl) are not modified in the basic block prior to the
320 VSETVL. */
321 if (find_access (i->defs (), REGNO (avl)))
322 return false;
323 if (vlmax_avl_p (dem.get_avl ()))
325 /* rd (avl) is not used between the start of the block and
326 the occurrence. Note: Only for Dirty and VLMAX-avl. */
327 if (find_access (i->uses (), REGNO (avl)))
328 return false;
332 return true;
334 else if (!vlmax_avl_p (avl))
336 set_info *set = dem.get_avl_source ();
337 /* If it's undefined, it's not anticipatable conservatively. */
338 if (!set)
339 return false;
340 if (real_insn_and_same_bb_p (set->insn (), bb)
341 && before_p (set->insn (), insn))
342 return false;
343 for (insn_info *i = insn->prev_nondebug_insn ();
344 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
346 /* rs1 (avl) are not modified in the basic block prior to the
347 VSETVL. */
348 if (find_access (i->defs (), REGNO (avl)))
349 return false;
354 /* rd (vl) is not used between the start of the block and the occurrence. */
355 if (vsetvl_insn_p (insn->rtl ()))
357 rtx dest = get_vl (insn->rtl ());
358 for (insn_info *i = insn->prev_nondebug_insn ();
359 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
361 /* rd (vl) is not used between the start of the block and the
362 * occurrence. */
363 if (find_access (i->uses (), REGNO (dest)))
364 return false;
365 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
366 if (find_access (i->defs (), REGNO (dest)))
367 return false;
371 return true;
374 /* An "available occurrence" is one that is the last occurrence in the
375 basic block and the operands are not modified by following statements in
376 the basic block [including this insn].
378 For VSETVL instruction, we have these following formats:
379 1. vsetvl zero, rs1.
380 2. vsetvl zero, imm.
381 3. vsetvl rd, rs1.
383 So base on these circumstances, a DEM is considered as a local available
384 occurrence should satisfy these following conditions:
386 1). rs1 (avl) are not modified by following statements in
387 the basic block.
388 2). rd (vl) are not modified by following statements in
389 the basic block.
391 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
392 is modified prior to the occurrence. This case is already considered as
393 a non-local available occurrence.
395 static bool
396 available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
398 insn_info *insn = dem.get_insn ();
399 /* The only possible operand we care of VSETVL is AVL. */
400 if (dem.has_avl_reg ())
402 if (!vlmax_avl_p (dem.get_avl ()))
404 rtx dest = NULL_RTX;
405 insn_info *i = insn;
406 if (vsetvl_insn_p (insn->rtl ()))
408 dest = get_vl (insn->rtl ());
409 /* For user vsetvl a2, a2 instruction, we consider it as
410 available even though it modifies "a2". */
411 i = i->next_nondebug_insn ();
413 for (; real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
415 if (read_vl_insn_p (i->rtl ()))
416 continue;
417 /* rs1 (avl) are not modified by following statements in
418 the basic block. */
419 if (find_access (i->defs (), REGNO (dem.get_avl ())))
420 return false;
421 /* rd (vl) are not modified by following statements in
422 the basic block. */
423 if (dest && find_access (i->defs (), REGNO (dest)))
424 return false;
428 return true;
431 static bool
432 insn_should_be_added_p (const insn_info *insn, unsigned int types)
434 if (insn->is_real () && (types & REAL_SET))
435 return true;
436 if (insn->is_phi () && (types & PHI_SET))
437 return true;
438 if (insn->is_bb_head () && (types & BB_HEAD_SET))
439 return true;
440 if (insn->is_bb_end () && (types & BB_END_SET))
441 return true;
442 return false;
445 /* Recursively find all define instructions. The kind of instruction is
446 specified by the DEF_TYPE. */
447 static hash_set<set_info *>
448 get_all_sets (phi_info *phi, unsigned int types)
450 hash_set<set_info *> insns;
451 auto_vec<phi_info *> work_list;
452 hash_set<phi_info *> visited_list;
453 if (!phi)
454 return hash_set<set_info *> ();
455 work_list.safe_push (phi);
457 while (!work_list.is_empty ())
459 phi_info *phi = work_list.pop ();
460 visited_list.add (phi);
461 for (use_info *use : phi->inputs ())
463 def_info *def = use->def ();
464 set_info *set = safe_dyn_cast<set_info *> (def);
465 if (!set)
466 return hash_set<set_info *> ();
468 gcc_assert (!set->insn ()->is_debug_insn ());
470 if (insn_should_be_added_p (set->insn (), types))
471 insns.add (set);
472 if (set->insn ()->is_phi ())
474 phi_info *new_phi = as_a<phi_info *> (set);
475 if (!visited_list.contains (new_phi))
476 work_list.safe_push (new_phi);
480 return insns;
483 static hash_set<set_info *>
484 get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
485 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
487 if (real_p && phi_p && param_p)
488 return get_all_sets (safe_dyn_cast<phi_info *> (set),
489 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
491 else if (real_p && param_p)
492 return get_all_sets (safe_dyn_cast<phi_info *> (set),
493 REAL_SET | BB_HEAD_SET | BB_END_SET);
495 else if (real_p)
496 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
497 return hash_set<set_info *> ();
500 /* Helper function to get AVL operand. */
501 static rtx
502 get_avl (rtx_insn *rinsn)
504 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
505 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
507 if (!has_vl_op (rinsn))
508 return NULL_RTX;
509 if (get_attr_avl_type (rinsn) == VLMAX)
510 return RVV_VLMAX;
511 extract_insn_cached (rinsn);
512 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
515 static set_info *
516 get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
518 for (set_info *set : sets)
519 if (set->bb ()->cfg_bb () == cfg_bb)
520 return set;
521 return nullptr;
524 /* Helper function to get SEW operand. We always have SEW value for
525 all RVV instructions that have VTYPE OP. */
526 static uint8_t
527 get_sew (rtx_insn *rinsn)
529 return get_attr_sew (rinsn);
532 /* Helper function to get VLMUL operand. We always have VLMUL value for
533 all RVV instructions that have VTYPE OP. */
534 static enum vlmul_type
535 get_vlmul (rtx_insn *rinsn)
537 return (enum vlmul_type) get_attr_vlmul (rinsn);
540 /* Get default tail policy. */
541 static bool
542 get_default_ta ()
544 /* For the instruction that doesn't require TA, we still need a default value
545 to emit vsetvl. We pick up the default value according to prefer policy. */
546 return (bool) (get_prefer_tail_policy () & 0x1
547 || (get_prefer_tail_policy () >> 1 & 0x1));
550 /* Get default mask policy. */
551 static bool
552 get_default_ma ()
554 /* For the instruction that doesn't require MA, we still need a default value
555 to emit vsetvl. We pick up the default value according to prefer policy. */
556 return (bool) (get_prefer_mask_policy () & 0x1
557 || (get_prefer_mask_policy () >> 1 & 0x1));
560 /* Helper function to get TA operand. */
561 static bool
562 tail_agnostic_p (rtx_insn *rinsn)
564 /* If it doesn't have TA, we return agnostic by default. */
565 extract_insn_cached (rinsn);
566 int ta = get_attr_ta (rinsn);
567 return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
570 /* Helper function to get MA operand. */
571 static bool
572 mask_agnostic_p (rtx_insn *rinsn)
574 /* If it doesn't have MA, we return agnostic by default. */
575 extract_insn_cached (rinsn);
576 int ma = get_attr_ma (rinsn);
577 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
580 /* Return true if FN has a vector instruction that use VL/VTYPE. */
581 static bool
582 has_vector_insn (function *fn)
584 basic_block cfg_bb;
585 rtx_insn *rinsn;
586 FOR_ALL_BB_FN (cfg_bb, fn)
587 FOR_BB_INSNS (cfg_bb, rinsn)
588 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
589 return true;
590 return false;
593 /* Emit vsetvl instruction. */
594 static rtx
595 gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
597 rtx avl = info.get_avl ();
598 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
599 set the value of avl to (const_int 0) so that VSETVL PASS will
600 insert vsetvl correctly.*/
601 if (info.has_avl_no_reg ())
602 avl = GEN_INT (0);
603 rtx sew = gen_int_mode (info.get_sew (), Pmode);
604 rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
605 rtx ta = gen_int_mode (info.get_ta (), Pmode);
606 rtx ma = gen_int_mode (info.get_ma (), Pmode);
608 if (insn_type == VSETVL_NORMAL)
610 gcc_assert (vl != NULL_RTX);
611 return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
613 else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
614 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
615 else
616 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
619 static rtx
620 gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info,
621 rtx vl = NULL_RTX)
623 rtx new_pat;
624 vl_vtype_info new_info = info;
625 if (info.get_insn () && info.get_insn ()->rtl ()
626 && fault_first_load_p (info.get_insn ()->rtl ()))
627 new_info.set_avl_info (
628 avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
629 if (vl)
630 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, vl);
631 else
633 if (vsetvl_insn_p (rinsn))
634 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, get_vl (rinsn));
635 else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
636 new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
637 else
638 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
640 return new_pat;
643 static void
644 emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
645 const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
647 rtx pat = gen_vsetvl_pat (insn_type, info, vl);
648 if (dump_file)
650 fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
651 print_rtl_single (dump_file, pat);
652 fprintf (dump_file, "\nfor insn:\n");
653 print_rtl_single (dump_file, rinsn);
656 if (emit_type == EMIT_DIRECT)
657 emit_insn (pat);
658 else if (emit_type == EMIT_BEFORE)
659 emit_insn_before (pat, rinsn);
660 else
661 emit_insn_after (pat, rinsn);
664 static void
665 eliminate_insn (rtx_insn *rinsn)
667 if (dump_file)
669 fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
670 print_rtl_single (dump_file, rinsn);
672 if (in_sequence_p ())
673 remove_insn (rinsn);
674 else
675 delete_insn (rinsn);
678 static vsetvl_type
679 insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
680 const vector_insn_info &info, const vector_insn_info &prev_info)
682 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
683 VLMAX. */
684 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
685 && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
687 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
688 rinsn);
689 return VSETVL_VTYPE_CHANGE_ONLY;
692 if (info.has_avl_imm ())
694 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
695 rinsn);
696 return VSETVL_DISCARD_RESULT;
699 if (info.has_avl_no_reg ())
701 /* We can only use x0, x0 if there's no chance of the vtype change causing
702 the previous vl to become invalid. */
703 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
704 && info.same_vlmax_p (prev_info))
706 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
707 rinsn);
708 return VSETVL_VTYPE_CHANGE_ONLY;
710 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
711 vl_vtype_info new_info = info;
712 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
713 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
714 rinsn);
715 return VSETVL_DISCARD_RESULT;
718 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
719 opcode if the AVLReg is X0 as they have different register classes for
720 the AVL operand. */
721 if (vlmax_avl_p (info.get_avl ()))
723 gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
724 /* For user vsetvli a5, zero, we should use get_vl to get the VL
725 operand "a5". */
726 rtx vl_op = info.get_avl_or_vl_reg ();
727 gcc_assert (!vlmax_avl_p (vl_op));
728 emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
729 return VSETVL_NORMAL;
732 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
734 if (dump_file)
736 fprintf (dump_file, "Update VL/VTYPE info, previous info=");
737 prev_info.dump (dump_file);
739 return VSETVL_DISCARD_RESULT;
742 /* Get VL/VTYPE information for INSN. */
743 static vl_vtype_info
744 get_vl_vtype_info (const insn_info *insn)
746 set_info *set = nullptr;
747 rtx avl = ::get_avl (insn->rtl ());
748 if (avl && REG_P (avl))
750 if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
752 = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
753 else if (!vlmax_avl_p (avl))
754 set = find_access (insn->uses (), REGNO (avl))->def ();
755 else
756 set = nullptr;
759 uint8_t sew = get_sew (insn->rtl ());
760 enum vlmul_type vlmul = get_vlmul (insn->rtl ());
761 uint8_t ratio = get_attr_ratio (insn->rtl ());
762 /* when get_attr_ratio is invalid, this kind of instructions
763 doesn't care about ratio. However, we still need this value
764 in demand info backward analysis. */
765 if (ratio == INVALID_ATTRIBUTE)
766 ratio = calculate_ratio (sew, vlmul);
767 bool ta = tail_agnostic_p (insn->rtl ());
768 bool ma = mask_agnostic_p (insn->rtl ());
770 /* If merge operand is undef value, we prefer agnostic. */
771 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
772 if (merge_op_idx != INVALID_ATTRIBUTE
773 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
775 ta = true;
776 ma = true;
779 vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
780 return info;
783 /* Change insn and Assert the change always happens. */
784 static void
785 validate_change_or_fail (rtx object, rtx *loc, rtx new_rtx, bool in_group)
787 bool change_p = validate_change (object, loc, new_rtx, in_group);
788 gcc_assert (change_p);
791 static void
792 change_insn (rtx_insn *rinsn, rtx new_pat)
794 /* We don't apply change on RTL_SSA here since it's possible a
795 new INSN we add in the PASS before which doesn't have RTL_SSA
796 info yet.*/
797 if (dump_file)
799 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
800 INSN_UID (rinsn));
801 print_rtl_single (dump_file, PATTERN (rinsn));
804 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
806 if (dump_file)
808 fprintf (dump_file, "\nto:\n");
809 print_rtl_single (dump_file, PATTERN (rinsn));
813 static const insn_info *
814 get_forward_read_vl_insn (const insn_info *insn)
816 const bb_info *bb = insn->bb ();
817 for (const insn_info *i = insn->next_nondebug_insn ();
818 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
820 if (find_access (i->defs (), VL_REGNUM))
821 return nullptr;
822 if (read_vl_insn_p (i->rtl ()))
823 return i;
825 return nullptr;
828 static const insn_info *
829 get_backward_fault_first_load_insn (const insn_info *insn)
831 const bb_info *bb = insn->bb ();
832 for (const insn_info *i = insn->prev_nondebug_insn ();
833 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
835 if (fault_first_load_p (i->rtl ()))
836 return i;
837 if (find_access (i->defs (), VL_REGNUM))
838 return nullptr;
840 return nullptr;
843 static bool
844 change_insn (function_info *ssa, insn_change change, insn_info *insn,
845 rtx new_pat)
847 rtx_insn *rinsn = insn->rtl ();
848 auto attempt = ssa->new_change_attempt ();
849 if (!restrict_movement (change))
850 return false;
852 if (dump_file)
854 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
855 INSN_UID (rinsn));
856 print_rtl_single (dump_file, PATTERN (rinsn));
859 insn_change_watermark watermark;
860 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, true);
862 /* These routines report failures themselves. */
863 if (!recog (attempt, change) || !change_is_worthwhile (change, false))
864 return false;
866 /* Fix bug:
867 (insn 12 34 13 2 (set (reg:RVVM4DI 120 v24 [orig:134 _1 ] [134])
868 (if_then_else:RVVM4DI (unspec:RVVMF8BI [
869 (const_vector:RVVMF8BI repeat [
870 (const_int 1 [0x1])
872 (const_int 0 [0])
873 (const_int 2 [0x2]) repeated x2
874 (const_int 0 [0])
875 (reg:SI 66 vl)
876 (reg:SI 67 vtype)
877 ] UNSPEC_VPREDICATE)
878 (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
879 (sign_extend:RVVM4DI (vec_duplicate:RVVM4SI (reg:SI 15 a5
880 [140])))) (unspec:RVVM4DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF)))
881 "rvv.c":8:12 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
882 (mem/c:RVVM4DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
883 (expr_list:REG_EQUAL (if_then_else:RVVM4DI (unspec:RVVMF8BI [
884 (const_vector:RVVMF8BI repeat [
885 (const_int 1 [0x1])
887 (reg/v:DI 13 a3 [orig:139 vl ] [139])
888 (const_int 2 [0x2]) repeated x2
889 (const_int 0 [0])
890 (reg:SI 66 vl)
891 (reg:SI 67 vtype)
892 ] UNSPEC_VPREDICATE)
893 (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
894 (const_vector:RVVM4DI repeat [
895 (const_int 2730 [0xaaa])
897 (unspec:RVVM4DI [
898 (const_int 0 [0])
899 ] UNSPEC_VUNDEF))
900 (nil))))
901 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
902 "a3" which made us fail in change_insn. We reference to the
903 'aarch64-cc-fusion.cc' and add this method. */
904 remove_reg_equal_equiv_notes (rinsn);
905 confirm_change_group ();
906 ssa->change_insn (change);
908 if (dump_file)
910 fprintf (dump_file, "\nto:\n");
911 print_rtl_single (dump_file, PATTERN (rinsn));
913 return true;
916 static void
917 change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info,
918 rtx vl = NULL_RTX)
920 rtx_insn *rinsn;
921 if (vector_config_insn_p (insn->rtl ()))
923 rinsn = insn->rtl ();
924 gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
926 else
928 gcc_assert (has_vtype_op (insn->rtl ()));
929 rinsn = PREV_INSN (insn->rtl ());
930 gcc_assert (vector_config_insn_p (rinsn));
932 rtx new_pat = gen_vsetvl_pat (rinsn, info, vl);
933 change_insn (rinsn, new_pat);
936 static bool
937 avl_source_has_vsetvl_p (set_info *avl_source)
939 if (!avl_source)
940 return false;
941 if (!avl_source->insn ())
942 return false;
943 if (avl_source->insn ()->is_real ())
944 return vsetvl_insn_p (avl_source->insn ()->rtl ());
945 hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true);
946 for (const auto set : sets)
948 if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ()))
949 return true;
951 return false;
954 static bool
955 source_equal_p (insn_info *insn1, insn_info *insn2)
957 if (!insn1 || !insn2)
958 return false;
959 rtx_insn *rinsn1 = insn1->rtl ();
960 rtx_insn *rinsn2 = insn2->rtl ();
961 if (!rinsn1 || !rinsn2)
962 return false;
963 rtx note1 = find_reg_equal_equiv_note (rinsn1);
964 rtx note2 = find_reg_equal_equiv_note (rinsn2);
965 rtx single_set1 = single_set (rinsn1);
966 rtx single_set2 = single_set (rinsn2);
967 if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
969 const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
970 const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
971 return load1 && load2 && load1 == load2;
974 if (note1 && note2 && rtx_equal_p (note1, note2))
975 return true;
977 /* Since vsetvl instruction is not single SET.
978 We handle this case specially here. */
979 if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
981 /* For example:
982 vsetvl1 a6,a5,e32m1
983 RVV 1 (use a6 as AVL)
984 vsetvl2 a5,a5,e8mf4
985 RVV 2 (use a5 as AVL)
986 We consider AVL of RVV 1 and RVV 2 are same so that we can
987 gain more optimization opportunities.
989 Note: insn1_info.compatible_avl_p (insn2_info)
990 will make sure there is no instruction between vsetvl1 and vsetvl2
991 modify a5 since their def will be different if there is instruction
992 modify a5 and compatible_avl_p will return false. */
993 vector_insn_info insn1_info, insn2_info;
994 insn1_info.parse_insn (insn1);
995 insn2_info.parse_insn (insn2);
997 /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli
998 instructions which will complicate the situation. */
999 if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ())
1000 || avl_source_has_vsetvl_p (insn2_info.get_avl_source ()))
1001 return false;
1003 if (insn1_info.same_vlmax_p (insn2_info)
1004 && insn1_info.compatible_avl_p (insn2_info))
1005 return true;
1008 /* We only handle AVL is set by instructions with no side effects. */
1009 if (!single_set1 || !single_set2)
1010 return false;
1011 if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
1012 return false;
1013 /* RTL_SSA uses include REG_NOTE. Consider this following case:
1015 insn1 RTL:
1016 (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159])
1017 (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1018 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1019 (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1020 (const_int 2 [0x2]))
1021 (nil)))
1022 The RTL_SSA uses of this instruction has 2 uses:
1023 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice.
1024 2. (reg:DI 14 a4 [276]) - once.
1026 insn2 RTL:
1027 (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160])
1028 (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199])
1029 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1030 (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200])
1031 (const_int 2 [0x2]))
1032 (nil)))
1033 The RTL_SSA uses of this instruction has 3 uses:
1034 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once
1035 2. (reg:DI 14 a4 [276]) - once
1036 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once
1038 Return false when insn1->uses ().size () != insn2->uses ().size ()
1040 if (insn1->uses ().size () != insn2->uses ().size ())
1041 return false;
1042 for (size_t i = 0; i < insn1->uses ().size (); i++)
1043 if (insn1->uses ()[i] != insn2->uses ()[i])
1044 return false;
1045 return true;
1048 /* Helper function to get single same real RTL source.
1049 return NULL if it is not a single real RTL source. */
1050 static insn_info *
1051 extract_single_source (set_info *set)
1053 if (!set)
1054 return nullptr;
1055 if (set->insn ()->is_real ())
1056 return set->insn ();
1057 if (!set->insn ()->is_phi ())
1058 return nullptr;
1059 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
1061 insn_info *first_insn = (*sets.begin ())->insn ();
1062 if (first_insn->is_artificial ())
1063 return nullptr;
1064 for (const set_info *set : sets)
1066 /* If there is a head or end insn, we conservative return
1067 NULL so that VSETVL PASS will insert vsetvl directly. */
1068 if (set->insn ()->is_artificial ())
1069 return nullptr;
1070 if (!source_equal_p (set->insn (), first_insn))
1071 return nullptr;
1074 return first_insn;
1077 static unsigned
1078 calculate_sew (vlmul_type vlmul, unsigned int ratio)
1080 for (const unsigned sew : ALL_SEW)
1081 if (calculate_ratio (sew, vlmul) == ratio)
1082 return sew;
1083 return 0;
1086 static vlmul_type
1087 calculate_vlmul (unsigned int sew, unsigned int ratio)
1089 for (const vlmul_type vlmul : ALL_LMUL)
1090 if (calculate_ratio (sew, vlmul) == ratio)
1091 return vlmul;
1092 return LMUL_RESERVED;
1095 static bool
1096 incompatible_avl_p (const vector_insn_info &info1,
1097 const vector_insn_info &info2)
1099 return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
1102 static bool
1103 different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
1105 return info1.get_sew () != info2.get_sew ();
1108 static bool
1109 different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
1111 return info1.get_vlmul () != info2.get_vlmul ();
1114 static bool
1115 different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
1117 return info1.get_ratio () != info2.get_ratio ();
1120 static bool
1121 different_tail_policy_p (const vector_insn_info &info1,
1122 const vector_insn_info &info2)
1124 return info1.get_ta () != info2.get_ta ();
1127 static bool
1128 different_mask_policy_p (const vector_insn_info &info1,
1129 const vector_insn_info &info2)
1131 return info1.get_ma () != info2.get_ma ();
1134 static bool
1135 possible_zero_avl_p (const vector_insn_info &info1,
1136 const vector_insn_info &info2)
1138 return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
1141 static bool
1142 second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
1143 const vector_insn_info &info2)
1145 return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
1146 == LMUL_RESERVED;
1149 static bool
1150 second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
1151 const vector_insn_info &info2)
1153 return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
1156 static bool
1157 float_insn_valid_sew_p (const vector_insn_info &info, unsigned int sew)
1159 if (info.get_insn () && info.get_insn ()->is_real ()
1160 && get_attr_type (info.get_insn ()->rtl ()) == TYPE_VFMOVFV)
1162 if (sew == 16)
1163 return TARGET_VECTOR_ELEN_FP_16;
1164 else if (sew == 32)
1165 return TARGET_VECTOR_ELEN_FP_32;
1166 else if (sew == 64)
1167 return TARGET_VECTOR_ELEN_FP_64;
1169 return true;
1172 static bool
1173 second_sew_less_than_first_sew_p (const vector_insn_info &info1,
1174 const vector_insn_info &info2)
1176 return info2.get_sew () < info1.get_sew ()
1177 || !float_insn_valid_sew_p (info1, info2.get_sew ());
1180 static bool
1181 first_sew_less_than_second_sew_p (const vector_insn_info &info1,
1182 const vector_insn_info &info2)
1184 return info1.get_sew () < info2.get_sew ()
1185 || !float_insn_valid_sew_p (info2, info1.get_sew ());
1188 /* return 0 if LMUL1 == LMUL2.
1189 return -1 if LMUL1 < LMUL2.
1190 return 1 if LMUL1 > LMUL2. */
1191 static int
1192 compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
1194 if (vlmul1 == vlmul2)
1195 return 0;
1197 switch (vlmul1)
1199 case LMUL_1:
1200 if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1201 return 1;
1202 else
1203 return -1;
1204 case LMUL_2:
1205 if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1206 return 1;
1207 else
1208 return -1;
1209 case LMUL_4:
1210 if (vlmul2 == LMUL_8)
1211 return 1;
1212 else
1213 return -1;
1214 case LMUL_8:
1215 return -1;
1216 case LMUL_F2:
1217 if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
1218 || vlmul2 == LMUL_8)
1219 return 1;
1220 else
1221 return -1;
1222 case LMUL_F4:
1223 if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
1224 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1225 return 1;
1226 else
1227 return -1;
1228 case LMUL_F8:
1229 return 0;
1230 default:
1231 gcc_unreachable ();
1235 static bool
1236 second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
1237 const vector_insn_info &info2)
1239 return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
1242 static bool
1243 second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
1244 const vector_insn_info &info2)
1246 return info2.get_ratio () < info1.get_ratio ();
1249 static CONSTEXPR const demands_cond incompatible_conds[] = {
1250 #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1251 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1252 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1253 TAIL_POLICTY2, MASK_POLICY2, COND) \
1254 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1255 MASK_POLICY1}, \
1256 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1257 MASK_POLICY2}}, \
1258 COND},
1259 #include "riscv-vsetvl.def"
1262 static unsigned
1263 greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
1265 return std::max (info1.get_sew (), info2.get_sew ());
1268 static unsigned
1269 first_sew (const vector_insn_info &info1, const vector_insn_info &)
1271 return info1.get_sew ();
1274 static unsigned
1275 second_sew (const vector_insn_info &, const vector_insn_info &info2)
1277 return info2.get_sew ();
1280 static vlmul_type
1281 first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
1283 return info1.get_vlmul ();
1286 static vlmul_type
1287 second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
1289 return info2.get_vlmul ();
1292 static unsigned
1293 first_ratio (const vector_insn_info &info1, const vector_insn_info &)
1295 return info1.get_ratio ();
1298 static unsigned
1299 second_ratio (const vector_insn_info &, const vector_insn_info &info2)
1301 return info2.get_ratio ();
1304 static vlmul_type
1305 vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
1306 const vector_insn_info &info2)
1308 return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
1311 static vlmul_type
1312 vlmul_for_greatest_sew_second_ratio (const vector_insn_info &info1,
1313 const vector_insn_info &info2)
1315 return calculate_vlmul (MAX (info1.get_sew (), info2.get_sew ()),
1316 info2.get_ratio ());
1319 static unsigned
1320 ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
1321 const vector_insn_info &info2)
1323 return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
1326 static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
1327 #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1328 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1329 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1330 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1331 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1332 NEW_RATIO) \
1333 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1334 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1335 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1336 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1337 NEW_DEMAND_SEW, \
1338 NEW_DEMAND_LMUL, \
1339 NEW_DEMAND_RATIO, \
1340 NEW_DEMAND_GE_SEW, \
1341 NEW_SEW, \
1342 NEW_VLMUL, \
1343 NEW_RATIO},
1344 #include "riscv-vsetvl.def"
1347 static bool
1348 always_unavailable (const vector_insn_info &, const vector_insn_info &)
1350 return true;
1353 static bool
1354 avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1356 return !info2.compatible_avl_p (info1.get_avl_info ());
1359 static bool
1360 sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1362 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
1364 if (info2.demand_p (DEMAND_GE_SEW))
1365 return info1.get_sew () < info2.get_sew ();
1366 return info1.get_sew () != info2.get_sew ();
1368 return true;
1371 static bool
1372 lmul_unavailable_p (const vector_insn_info &info1,
1373 const vector_insn_info &info2)
1375 if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
1376 && !info2.demand_p (DEMAND_RATIO))
1377 return false;
1378 return true;
1381 static bool
1382 ge_sew_unavailable_p (const vector_insn_info &info1,
1383 const vector_insn_info &info2)
1385 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
1386 && info2.demand_p (DEMAND_GE_SEW))
1387 return info1.get_sew () < info2.get_sew ();
1388 return true;
1391 static bool
1392 ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
1393 const vector_insn_info &info2)
1395 if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
1396 return info1.get_sew () < info2.get_sew ();
1397 return true;
1400 static bool
1401 ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
1402 const vector_insn_info &info2)
1404 if (!info2.demand_p (DEMAND_LMUL))
1406 if (info2.demand_p (DEMAND_GE_SEW))
1407 return info1.get_sew () < info2.get_sew ();
1408 /* Demand GE_SEW should be available for non-demand SEW. */
1409 else if (!info2.demand_p (DEMAND_SEW))
1410 return false;
1412 return true;
1415 static CONSTEXPR const demands_cond unavailable_conds[] = {
1416 #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1417 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1418 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1419 MASK_POLICY2, COND) \
1420 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1421 MASK_POLICY1}, \
1422 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1423 MASK_POLICY2}}, \
1424 COND},
1425 #include "riscv-vsetvl.def"
1428 static bool
1429 same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
1431 return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
1432 && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
1433 && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
1434 && !dems2[DEMAND_GE_SEW];
1437 static bool
1438 propagate_avl_across_demands_p (const vector_insn_info &info1,
1439 const vector_insn_info &info2)
1441 if (info2.demand_p (DEMAND_AVL))
1443 if (info2.demand_p (DEMAND_NONZERO_AVL))
1444 return info1.demand_p (DEMAND_AVL)
1445 && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
1447 else
1448 return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
1449 return false;
1452 static bool
1453 reg_available_p (const insn_info *insn, const vector_insn_info &info)
1455 if (info.has_avl_reg () && !info.get_avl_source ())
1456 return false;
1457 insn_info *def_insn = info.get_avl_source ()->insn ();
1458 if (def_insn->bb () == insn->bb ())
1459 return before_p (def_insn, insn);
1460 else
1461 return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
1462 def_insn->bb ()->cfg_bb ());
1465 /* Return true if the instruction support relaxed compatible check. */
1466 static bool
1467 support_relaxed_compatible_p (const vector_insn_info &info1,
1468 const vector_insn_info &info2)
1470 if (fault_first_load_p (info1.get_insn ()->rtl ())
1471 && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
1472 && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
1474 hash_set<set_info *> sets
1475 = get_all_sets (info2.get_avl_source (), true, false, false);
1476 for (set_info *set : sets)
1478 if (read_vl_insn_p (set->insn ()->rtl ()))
1480 const insn_info *insn
1481 = get_backward_fault_first_load_insn (set->insn ());
1482 if (insn == info1.get_insn ())
1483 return info2.compatible_vtype_p (info1);
1487 return false;
1490 /* Count the number of REGNO in RINSN. */
1491 static int
1492 count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
1494 int count = 0;
1495 extract_insn (rinsn);
1496 for (int i = 0; i < recog_data.n_operands; i++)
1497 if (refers_to_regno_p (regno, recog_data.operand[i]))
1498 count++;
1499 return count;
1502 /* Return TRUE if the demands can be fused. */
1503 static bool
1504 demands_can_be_fused_p (const vector_insn_info &be_fused,
1505 const vector_insn_info &to_fuse)
1507 return be_fused.compatible_p (to_fuse) && !be_fused.available_p (to_fuse);
1510 /* Return true if we can fuse VSETVL demand info into predecessor of earliest
1511 * edge. */
1512 static bool
1513 earliest_pred_can_be_fused_p (const bb_info *earliest_pred,
1514 const vector_insn_info &earliest_info,
1515 const vector_insn_info &expr, rtx *vlmax_vl)
1517 /* Backward VLMAX VL:
1518 bb 3:
1519 vsetivli zero, 1 ... -> vsetvli t1, zero
1520 vmv.s.x
1521 bb 5:
1522 vsetvli t1, zero ... -> to be elided.
1523 vlse16.v
1525 We should forward "t1". */
1526 if (!earliest_info.has_avl_reg () && expr.has_avl_reg ())
1528 rtx avl_or_vl_reg = expr.get_avl_or_vl_reg ();
1529 gcc_assert (avl_or_vl_reg);
1530 const insn_info *last_insn = earliest_info.get_insn ();
1531 /* To fuse demand on earlest edge, we make sure AVL/VL
1532 didn't change from the consume insn to the predecessor
1533 of the edge. */
1534 for (insn_info *i = earliest_pred->end_insn ()->prev_nondebug_insn ();
1535 real_insn_and_same_bb_p (i, earliest_pred)
1536 && after_or_same_p (i, last_insn);
1537 i = i->prev_nondebug_insn ())
1539 if (find_access (i->defs (), REGNO (avl_or_vl_reg)))
1540 return false;
1541 if (find_access (i->uses (), REGNO (avl_or_vl_reg)))
1542 return false;
1544 if (vlmax_vl && vlmax_avl_p (expr.get_avl ()))
1545 *vlmax_vl = avl_or_vl_reg;
1548 return true;
1551 /* Return true if the current VSETVL 1 is dominated by preceding VSETVL 2.
1553 VSETVL 2 dominates VSETVL 1 should satisfy this following check:
1555 - VSETVL 2 should have the RATIO (SEW/LMUL) with VSETVL 1.
1556 - VSETVL 2 is user vsetvl (vsetvl VL, AVL)
1557 - VSETVL 2 "VL" result is the "AVL" of VSETL1. */
1558 static bool
1559 vsetvl_dominated_by_p (const basic_block cfg_bb,
1560 const vector_insn_info &vsetvl1,
1561 const vector_insn_info &vsetvl2, bool fuse_p)
1563 if (!vsetvl1.valid_or_dirty_p () || !vsetvl2.valid_or_dirty_p ())
1564 return false;
1565 if (!has_vl_op (vsetvl1.get_insn ()->rtl ())
1566 || !vsetvl_insn_p (vsetvl2.get_insn ()->rtl ()))
1567 return false;
1569 hash_set<set_info *> sets
1570 = get_all_sets (vsetvl1.get_avl_source (), true, false, false);
1571 set_info *set = get_same_bb_set (sets, cfg_bb);
1573 if (!vsetvl1.has_avl_reg () || vlmax_avl_p (vsetvl1.get_avl ())
1574 || !vsetvl2.same_vlmax_p (vsetvl1) || !set
1575 || set->insn () != vsetvl2.get_insn ())
1576 return false;
1578 if (fuse_p && vsetvl2.same_vtype_p (vsetvl1))
1579 return false;
1580 else if (!fuse_p && !vsetvl2.same_vtype_p (vsetvl1))
1581 return false;
1582 return true;
1585 avl_info::avl_info (const avl_info &other)
1587 m_value = other.get_value ();
1588 m_source = other.get_source ();
1591 avl_info::avl_info (rtx value_in, set_info *source_in)
1592 : m_value (value_in), m_source (source_in)
1595 bool
1596 avl_info::single_source_equal_p (const avl_info &other) const
1598 set_info *set1 = m_source;
1599 set_info *set2 = other.get_source ();
1600 insn_info *insn1 = extract_single_source (set1);
1601 insn_info *insn2 = extract_single_source (set2);
1602 if (!insn1 || !insn2)
1603 return false;
1604 return source_equal_p (insn1, insn2);
1607 bool
1608 avl_info::multiple_source_equal_p (const avl_info &other) const
1610 /* When the def info is same in RTL_SSA namespace, it's safe
1611 to consider they are avl compatible. */
1612 if (m_source == other.get_source ())
1613 return true;
1615 /* We only consider handle PHI node. */
1616 if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ())
1617 return false;
1619 phi_info *phi1 = as_a<phi_info *> (m_source);
1620 phi_info *phi2 = as_a<phi_info *> (other.get_source ());
1622 if (phi1->is_degenerate () && phi2->is_degenerate ())
1624 /* Degenerate PHI means the PHI node only have one input. */
1626 /* If both PHI nodes have the same single input in use list.
1627 We consider they are AVL compatible. */
1628 if (phi1->input_value (0) == phi2->input_value (0))
1629 return true;
1631 /* TODO: We can support more optimization cases in the future. */
1632 return false;
1635 avl_info &
1636 avl_info::operator= (const avl_info &other)
1638 m_value = other.get_value ();
1639 m_source = other.get_source ();
1640 return *this;
1643 bool
1644 avl_info::operator== (const avl_info &other) const
1646 if (!m_value)
1647 return !other.get_value ();
1648 if (!other.get_value ())
1649 return false;
1651 if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
1652 return false;
1654 /* Handle CONST_INT AVL. */
1655 if (CONST_INT_P (m_value))
1656 return INTVAL (m_value) == INTVAL (other.get_value ());
1658 /* Handle VLMAX AVL. */
1659 if (vlmax_avl_p (m_value))
1660 return vlmax_avl_p (other.get_value ());
1661 if (vlmax_avl_p (other.get_value ()))
1662 return false;
1664 /* If any source is undef value, we think they are not equal. */
1665 if (!m_source || !other.get_source ())
1666 return false;
1668 /* If both sources are single source (defined by a single real RTL)
1669 and their definitions are same. */
1670 if (single_source_equal_p (other))
1671 return true;
1673 return multiple_source_equal_p (other);
1676 bool
1677 avl_info::operator!= (const avl_info &other) const
1679 return !(*this == other);
1682 bool
1683 avl_info::has_non_zero_avl () const
1685 if (has_avl_imm ())
1686 return INTVAL (get_value ()) > 0;
1687 if (has_avl_reg ())
1688 return vlmax_avl_p (get_value ());
1689 return false;
1692 /* Initialize VL/VTYPE information. */
1693 vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
1694 enum vlmul_type vlmul_in, uint8_t ratio_in,
1695 bool ta_in, bool ma_in)
1696 : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
1697 m_ta (ta_in), m_ma (ma_in)
1699 gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
1702 bool
1703 vl_vtype_info::operator== (const vl_vtype_info &other) const
1705 return same_avl_p (other) && m_sew == other.get_sew ()
1706 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
1707 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
1710 bool
1711 vl_vtype_info::operator!= (const vl_vtype_info &other) const
1713 return !(*this == other);
1716 bool
1717 vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
1719 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1720 For example, const_int 3 and const_int 4, we need to compare
1721 RTL. If both AVL are REG and their REGNO are same, we need to
1722 compare SET. */
1723 return get_avl () == other.get_avl ()
1724 && get_avl_source () == other.get_avl_source ();
1727 bool
1728 vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
1730 return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
1731 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
1734 bool
1735 vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
1737 return get_ratio () == other.get_ratio ();
1740 /* Compare the compatibility between Dem1 and Dem2.
1741 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1742 meaning Dem1 is easier be compatible with others than Dem2
1743 or Dem2 is stricter than Dem1.
1744 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
1745 bool
1746 vector_insn_info::operator>= (const vector_insn_info &other) const
1748 if (support_relaxed_compatible_p (*this, other))
1750 unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
1751 /* Bypass AVL unavailable cases. */
1752 for (unsigned i = 2; i < array_size; i++)
1753 if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
1754 other.get_demands ())
1755 && unavailable_conds[i].incompatible_p (*this, other))
1756 return false;
1757 return true;
1760 if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
1761 return false;
1762 if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
1763 return true;
1765 if (*this == other)
1766 return true;
1768 for (const auto &cond : unavailable_conds)
1769 if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
1770 && cond.incompatible_p (*this, other))
1771 return false;
1773 return true;
1776 bool
1777 vector_insn_info::operator== (const vector_insn_info &other) const
1779 gcc_assert (!uninit_p () && !other.uninit_p ()
1780 && "Uninitialization should not happen");
1782 /* Empty is only equal to another Empty. */
1783 if (empty_p ())
1784 return other.empty_p ();
1785 if (other.empty_p ())
1786 return empty_p ();
1788 /* Unknown is only equal to another Unknown. */
1789 if (unknown_p ())
1790 return other.unknown_p ();
1791 if (other.unknown_p ())
1792 return unknown_p ();
1794 for (size_t i = 0; i < NUM_DEMAND; i++)
1795 if (m_demands[i] != other.demand_p ((enum demand_type) i))
1796 return false;
1798 /* We should consider different INSN demands as different
1799 expression. Otherwise, we will be doing incorrect vsetvl
1800 elimination. */
1801 if (m_insn != other.get_insn ())
1802 return false;
1804 if (!same_avl_p (other))
1805 return false;
1807 /* If the full VTYPE is valid, check that it is the same. */
1808 return same_vtype_p (other);
1811 void
1812 vector_insn_info::parse_insn (rtx_insn *rinsn)
1814 *this = vector_insn_info ();
1815 if (!NONDEBUG_INSN_P (rinsn))
1816 return;
1817 if (optimize == 0 && !has_vtype_op (rinsn))
1818 return;
1819 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
1820 m_state = VALID;
1821 extract_insn_cached (rinsn);
1822 rtx avl = ::get_avl (rinsn);
1823 m_avl = avl_info (avl, nullptr);
1824 m_sew = ::get_sew (rinsn);
1825 m_vlmul = ::get_vlmul (rinsn);
1826 m_ta = tail_agnostic_p (rinsn);
1827 m_ma = mask_agnostic_p (rinsn);
1830 void
1831 vector_insn_info::parse_insn (insn_info *insn)
1833 *this = vector_insn_info ();
1835 /* Return if it is debug insn for the consistency with optimize == 0. */
1836 if (insn->is_debug_insn ())
1837 return;
1839 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1840 if (insn->is_call () || insn->is_asm ())
1842 set_unknown ();
1843 return;
1846 /* If this is something that updates VL/VTYPE that we don't know about, set
1847 the state to unknown. */
1848 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
1849 && (find_access (insn->defs (), VL_REGNUM)
1850 || find_access (insn->defs (), VTYPE_REGNUM)))
1852 set_unknown ();
1853 return;
1856 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1857 return;
1859 /* Warning: This function has to work on both the lowered (i.e. post
1860 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1861 of this is that it can't use the value of a SEW, VL, or Policy operand as
1862 they might be stale after lowering. */
1863 vl_vtype_info::operator= (get_vl_vtype_info (insn));
1864 m_insn = insn;
1865 m_state = VALID;
1866 if (vector_config_insn_p (insn->rtl ()))
1868 m_demands[DEMAND_AVL] = true;
1869 m_demands[DEMAND_RATIO] = true;
1870 return;
1873 if (has_vl_op (insn->rtl ()))
1874 m_demands[DEMAND_AVL] = true;
1876 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1877 m_demands[DEMAND_RATIO] = true;
1878 else
1880 /* TODO: By default, if it doesn't demand RATIO, we set it
1881 demand SEW && LMUL both. Some instructions may demand SEW
1882 only and ignore LMUL, will fix it later. */
1883 m_demands[DEMAND_SEW] = true;
1884 if (!ignore_vlmul_insn_p (insn->rtl ()))
1885 m_demands[DEMAND_LMUL] = true;
1888 if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
1889 m_demands[DEMAND_TAIL_POLICY] = true;
1890 if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
1891 m_demands[DEMAND_MASK_POLICY] = true;
1893 if (vector_config_insn_p (insn->rtl ()))
1894 return;
1896 if (scalar_move_insn_p (insn->rtl ()))
1898 if (m_avl.has_non_zero_avl ())
1899 m_demands[DEMAND_NONZERO_AVL] = true;
1900 if (m_ta)
1901 m_demands[DEMAND_GE_SEW] = true;
1904 if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
1905 return;
1906 if (!m_avl.get_source ()->insn ()->is_real ()
1907 && !m_avl.get_source ()->insn ()->is_phi ())
1908 return;
1910 insn_info *def_insn = extract_single_source (m_avl.get_source ());
1911 if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
1912 return;
1914 vector_insn_info new_info;
1915 new_info.parse_insn (def_insn);
1916 if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
1917 return;
1919 if (new_info.has_avl ())
1921 if (new_info.has_avl_imm ())
1922 set_avl_info (avl_info (new_info.get_avl (), nullptr));
1923 else
1925 if (vlmax_avl_p (new_info.get_avl ()))
1926 set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
1927 else
1929 /* Conservatively propagate non-VLMAX AVL of user vsetvl:
1930 1. The user vsetvl should be same block with the rvv insn.
1931 2. The user vsetvl is the only def insn of rvv insn.
1932 3. The AVL is not modified between def-use chain.
1933 4. The VL is only used by insn within EBB.
1935 bool modified_p = false;
1936 for (insn_info *i = def_insn->next_nondebug_insn ();
1937 real_insn_and_same_bb_p (i, get_insn ()->bb ());
1938 i = i->next_nondebug_insn ())
1940 /* Consider this following sequence:
1942 insn 1: vsetvli a5,a3,e8,mf4,ta,mu
1943 insn 2: vsetvli zero,a5,e32,m1,ta,ma
1945 vle32.v v1,0(a1)
1946 vsetvli a2,zero,e32,m1,ta,ma
1947 vadd.vv v1,v1,v1
1948 vsetvli zero,a5,e32,m1,ta,ma
1949 vse32.v v1,0(a0)
1951 insn 3: sub a3,a3,a5
1954 We can local AVL propagate "a3" from insn 1 to insn 2
1955 if no insns between insn 1 and insn 2 modify "a3 even
1956 though insn 3 modifies "a3".
1957 Otherwise, we can't perform local AVL propagation.
1959 Early break if we reach the insn 2. */
1960 if (!before_p (i, insn))
1961 break;
1962 if (find_access (i->defs (), REGNO (new_info.get_avl ())))
1964 modified_p = true;
1965 break;
1969 bool has_live_out_use = false;
1970 for (use_info *use : m_avl.get_source ()->all_uses ())
1972 if (use->is_live_out_use ())
1974 has_live_out_use = true;
1975 break;
1978 if (!modified_p && !has_live_out_use
1979 && def_insn == m_avl.get_source ()->insn ()
1980 && m_insn->bb () == def_insn->bb ())
1981 set_avl_info (new_info.get_avl_info ());
1986 if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
1987 m_demands[DEMAND_NONZERO_AVL] = true;
1990 bool
1991 vector_insn_info::compatible_p (const vector_insn_info &other) const
1993 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
1994 && "Can't compare invalid demanded infos");
1996 for (const auto &cond : incompatible_conds)
1997 if (cond.dual_incompatible_p (*this, other))
1998 return false;
1999 return true;
2002 bool
2003 vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
2005 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2006 && "Can't compare invalid demanded infos");
2007 unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
2008 /* Bypass AVL incompatible cases. */
2009 for (unsigned i = 1; i < array_size; i++)
2010 if (incompatible_conds[i].dual_incompatible_p (*this, other))
2011 return false;
2012 return true;
2015 bool
2016 vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
2018 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2019 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2020 if (!demand_p (DEMAND_AVL))
2021 return true;
2022 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2023 return true;
2024 return get_avl_info () == other.get_avl_info ();
2027 bool
2028 vector_insn_info::compatible_avl_p (const avl_info &other) const
2030 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2031 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2032 gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
2033 if (!demand_p (DEMAND_AVL))
2034 return true;
2035 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2036 return true;
2037 return get_avl_info () == other;
2040 bool
2041 vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
2043 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2044 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
2045 if (demand_p (DEMAND_SEW))
2047 if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
2048 return false;
2049 if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
2050 return false;
2052 if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
2053 return false;
2054 if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
2055 return false;
2056 if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
2057 return false;
2058 if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
2059 return false;
2060 return true;
2063 /* Determine whether the vector instructions requirements represented by
2064 Require are compatible with the previous vsetvli instruction represented
2065 by this. INSN is the instruction whose requirements we're considering. */
2066 bool
2067 vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
2069 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2070 if (empty_p ())
2071 return false;
2073 /* Nothing is compatible with Unknown. */
2074 if (unknown_p ())
2075 return false;
2077 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2078 it compatible. */
2079 if (!demand_p (DEMAND_AVL))
2080 if (m_sew == curr_info.get_sew ())
2081 return true;
2083 return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
2086 bool
2087 vector_insn_info::available_p (const vector_insn_info &other) const
2089 return *this >= other;
2092 void
2093 vector_insn_info::fuse_avl (const vector_insn_info &info1,
2094 const vector_insn_info &info2)
2096 set_insn (info1.get_insn ());
2097 if (info1.demand_p (DEMAND_AVL))
2099 if (info1.demand_p (DEMAND_NONZERO_AVL))
2101 if (info2.demand_p (DEMAND_AVL)
2102 && !info2.demand_p (DEMAND_NONZERO_AVL))
2104 set_avl_info (info2.get_avl_info ());
2105 set_demand (DEMAND_AVL, true);
2106 set_demand (DEMAND_NONZERO_AVL, false);
2107 return;
2110 set_avl_info (info1.get_avl_info ());
2111 set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
2113 else
2115 set_avl_info (info2.get_avl_info ());
2116 set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
2118 set_demand (DEMAND_AVL,
2119 info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
2122 void
2123 vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
2124 const vector_insn_info &info2)
2126 /* We need to fuse sew && lmul according to demand info:
2128 1. GE_SEW.
2129 2. SEW.
2130 3. LMUL.
2131 4. RATIO. */
2132 if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
2134 set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
2135 set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
2136 set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
2137 set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
2138 set_sew (info2.get_sew ());
2139 set_vlmul (info2.get_vlmul ());
2140 set_ratio (info2.get_ratio ());
2141 return;
2143 for (const auto &rule : fuse_rules)
2145 if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
2147 set_demand (DEMAND_SEW, rule.demand_sew_p);
2148 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2149 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2150 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2151 set_sew (rule.new_sew (info1, info2));
2152 set_vlmul (rule.new_vlmul (info1, info2));
2153 set_ratio (rule.new_ratio (info1, info2));
2154 return;
2156 if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
2158 set_demand (DEMAND_SEW, rule.demand_sew_p);
2159 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2160 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2161 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2162 set_sew (rule.new_sew (info2, info1));
2163 set_vlmul (rule.new_vlmul (info2, info1));
2164 set_ratio (rule.new_ratio (info2, info1));
2165 return;
2168 gcc_unreachable ();
2171 void
2172 vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
2173 const vector_insn_info &info2)
2175 if (info1.demand_p (DEMAND_TAIL_POLICY))
2177 set_ta (info1.get_ta ());
2178 demand (DEMAND_TAIL_POLICY);
2180 else if (info2.demand_p (DEMAND_TAIL_POLICY))
2182 set_ta (info2.get_ta ());
2183 demand (DEMAND_TAIL_POLICY);
2185 else
2186 set_ta (get_default_ta ());
2189 void
2190 vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
2191 const vector_insn_info &info2)
2193 if (info1.demand_p (DEMAND_MASK_POLICY))
2195 set_ma (info1.get_ma ());
2196 demand (DEMAND_MASK_POLICY);
2198 else if (info2.demand_p (DEMAND_MASK_POLICY))
2200 set_ma (info2.get_ma ());
2201 demand (DEMAND_MASK_POLICY);
2203 else
2204 set_ma (get_default_ma ());
2207 vector_insn_info
2208 vector_insn_info::local_merge (const vector_insn_info &merge_info) const
2210 if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info)
2211 gcc_assert (this->compatible_p (merge_info)
2212 && "Can't merge incompatible demanded infos");
2214 vector_insn_info new_info;
2215 new_info.set_valid ();
2216 /* For local backward data flow, we always update INSN && AVL as the
2217 latest INSN and AVL so that we can keep track status of each INSN. */
2218 new_info.fuse_avl (merge_info, *this);
2219 new_info.fuse_sew_lmul (*this, merge_info);
2220 new_info.fuse_tail_policy (*this, merge_info);
2221 new_info.fuse_mask_policy (*this, merge_info);
2222 return new_info;
2225 vector_insn_info
2226 vector_insn_info::global_merge (const vector_insn_info &merge_info,
2227 unsigned int bb_index) const
2229 if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info)
2230 gcc_assert (this->compatible_p (merge_info)
2231 && "Can't merge incompatible demanded infos");
2233 vector_insn_info new_info;
2234 new_info.set_valid ();
2236 /* For global data flow, we should keep original INSN and AVL if they
2237 valid since we should keep the life information of each block.
2239 For example:
2240 bb 0 -> bb 1.
2241 We should keep INSN && AVL of bb 1 since we will eventually emit
2242 vsetvl instruction according to INSN and AVL of bb 1. */
2243 new_info.fuse_avl (*this, merge_info);
2244 /* Recompute the AVL source whose block index is equal to BB_INDEX. */
2245 if (new_info.get_avl_source ()
2246 && new_info.get_avl_source ()->insn ()->is_phi ()
2247 && new_info.get_avl_source ()->bb ()->index () != bb_index)
2249 hash_set<set_info *> sets
2250 = get_all_sets (new_info.get_avl_source (), true, true, true);
2251 new_info.set_avl_source (nullptr);
2252 bool can_find_set_p = false;
2253 set_info *first_set = nullptr;
2254 for (set_info *set : sets)
2256 if (!first_set)
2257 first_set = set;
2258 if (set->bb ()->index () == bb_index)
2260 gcc_assert (!can_find_set_p);
2261 new_info.set_avl_source (set);
2262 can_find_set_p = true;
2265 if (!can_find_set_p && sets.elements () == 1
2266 && first_set->insn ()->is_real ())
2267 new_info.set_avl_source (first_set);
2270 /* Make sure VLMAX AVL always has a set_info the get VL. */
2271 if (vlmax_avl_p (new_info.get_avl ()))
2273 if (this->get_avl_source ())
2274 new_info.set_avl_source (this->get_avl_source ());
2275 else
2277 gcc_assert (merge_info.get_avl_source ());
2278 new_info.set_avl_source (merge_info.get_avl_source ());
2282 new_info.fuse_sew_lmul (*this, merge_info);
2283 new_info.fuse_tail_policy (*this, merge_info);
2284 new_info.fuse_mask_policy (*this, merge_info);
2285 return new_info;
2288 /* Wrapper helps to return the AVL or VL operand for the
2289 vector_insn_info. Return AVL if the AVL is not VLMAX.
2290 Otherwise, return the VL operand. */
2292 vector_insn_info::get_avl_or_vl_reg (void) const
2294 gcc_assert (has_avl_reg ());
2295 if (!vlmax_avl_p (get_avl ()))
2296 return get_avl ();
2298 rtx_insn *rinsn = get_insn ()->rtl ();
2299 if (has_vl_op (rinsn) || vsetvl_insn_p (rinsn))
2301 rtx vl = ::get_vl (rinsn);
2302 /* For VLMAX, we should make sure we get the
2303 REG to emit 'vsetvl VL,zero' since the 'VL'
2304 should be the REG according to RVV ISA. */
2305 if (REG_P (vl))
2306 return vl;
2309 /* We always has avl_source if it is VLMAX AVL. */
2310 gcc_assert (get_avl_source ());
2311 return get_avl_reg_rtx ();
2314 bool
2315 vector_insn_info::update_fault_first_load_avl (insn_info *insn)
2317 // Update AVL to vl-output of the fault first load.
2318 const insn_info *read_vl = get_forward_read_vl_insn (insn);
2319 if (read_vl)
2321 rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
2322 def_info *def = find_access (read_vl->defs (), REGNO (vl));
2323 set_info *set = safe_dyn_cast<set_info *> (def);
2324 set_avl_info (avl_info (vl, set));
2325 set_insn (insn);
2326 return true;
2328 return false;
2331 static const char *
2332 vlmul_to_str (vlmul_type vlmul)
2334 switch (vlmul)
2336 case LMUL_1:
2337 return "m1";
2338 case LMUL_2:
2339 return "m2";
2340 case LMUL_4:
2341 return "m4";
2342 case LMUL_8:
2343 return "m8";
2344 case LMUL_RESERVED:
2345 return "INVALID LMUL";
2346 case LMUL_F8:
2347 return "mf8";
2348 case LMUL_F4:
2349 return "mf4";
2350 case LMUL_F2:
2351 return "mf2";
2353 default:
2354 gcc_unreachable ();
2358 static const char *
2359 policy_to_str (bool agnostic_p)
2361 return agnostic_p ? "agnostic" : "undisturbed";
2364 void
2365 vector_insn_info::dump (FILE *file) const
2367 fprintf (file, "[");
2368 if (uninit_p ())
2369 fprintf (file, "UNINITIALIZED,");
2370 else if (valid_p ())
2371 fprintf (file, "VALID,");
2372 else if (unknown_p ())
2373 fprintf (file, "UNKNOWN,");
2374 else if (empty_p ())
2375 fprintf (file, "EMPTY,");
2376 else
2377 fprintf (file, "DIRTY,");
2379 fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
2380 fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
2381 fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
2382 fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
2383 fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
2384 fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
2385 fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
2386 fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
2388 fprintf (file, "AVL=");
2389 print_rtl_single (file, get_avl ());
2390 fprintf (file, "SEW=%d,", get_sew ());
2391 fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ()));
2392 fprintf (file, "RATIO=%d,", get_ratio ());
2393 fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ()));
2394 fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ()));
2395 fprintf (file, "]\n");
2397 if (valid_p ())
2399 if (get_insn ())
2401 fprintf (file, "The real INSN=");
2402 print_rtl_single (file, get_insn ()->rtl ());
2407 vector_infos_manager::vector_infos_manager ()
2409 vector_edge_list = nullptr;
2410 vector_kill = nullptr;
2411 vector_del = nullptr;
2412 vector_insert = nullptr;
2413 vector_antic = nullptr;
2414 vector_transp = nullptr;
2415 vector_comp = nullptr;
2416 vector_avin = nullptr;
2417 vector_avout = nullptr;
2418 vector_antin = nullptr;
2419 vector_antout = nullptr;
2420 vector_earliest = nullptr;
2421 vector_insn_infos.safe_grow (get_max_uid ());
2422 vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
2423 if (!optimize)
2425 basic_block cfg_bb;
2426 rtx_insn *rinsn;
2427 FOR_ALL_BB_FN (cfg_bb, cfun)
2429 vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
2430 vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
2431 FOR_BB_INSNS (cfg_bb, rinsn)
2432 vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
2435 else
2437 for (const bb_info *bb : crtl->ssa->bbs ())
2439 vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
2440 vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
2441 for (insn_info *insn : bb->real_insns ())
2442 vector_insn_infos[insn->uid ()].parse_insn (insn);
2443 vector_block_infos[bb->index ()].probability = profile_probability ();
2448 void
2449 vector_infos_manager::create_expr (vector_insn_info &info)
2451 for (size_t i = 0; i < vector_exprs.length (); i++)
2452 if (*vector_exprs[i] == info)
2453 return;
2454 vector_exprs.safe_push (&info);
2457 size_t
2458 vector_infos_manager::get_expr_id (const vector_insn_info &info) const
2460 for (size_t i = 0; i < vector_exprs.length (); i++)
2461 if (*vector_exprs[i] == info)
2462 return i;
2463 gcc_unreachable ();
2466 auto_vec<size_t>
2467 vector_infos_manager::get_all_available_exprs (
2468 const vector_insn_info &info) const
2470 auto_vec<size_t> available_list;
2471 for (size_t i = 0; i < vector_exprs.length (); i++)
2472 if (info.available_p (*vector_exprs[i]))
2473 available_list.safe_push (i);
2474 return available_list;
2477 bool
2478 vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
2480 if (bitmap_empty_p (bitdata))
2481 return false;
2483 int ratio = -1;
2484 unsigned int bb_index;
2485 sbitmap_iterator sbi;
2487 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2489 if (ratio == -1)
2490 ratio = vector_exprs[bb_index]->get_ratio ();
2491 else if (vector_exprs[bb_index]->get_ratio () != ratio)
2492 return false;
2494 return true;
2497 /* Return TRUE if the incoming vector configuration state
2498 to CFG_BB is compatible with the vector configuration
2499 state in CFG_BB, FALSE otherwise. */
2500 bool
2501 vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
2503 const auto &info = vector_block_infos[cfg_bb->index].local_dem;
2504 sbitmap avin = vector_avin[cfg_bb->index];
2505 unsigned int bb_index;
2506 sbitmap_iterator sbi;
2507 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
2509 const auto &avin_info
2510 = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
2511 if (!info.compatible_p (avin_info))
2512 return false;
2514 return true;
2517 bool
2518 vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
2519 sbitmap bitdata) const
2521 if (bitmap_empty_p (bitdata))
2522 return false;
2524 const auto &block_info = vector_block_infos[cfg_bb->index];
2525 if (!block_info.local_dem.demand_p (DEMAND_AVL))
2526 return true;
2528 avl_info avl = block_info.local_dem.get_avl_info ();
2529 unsigned int bb_index;
2530 sbitmap_iterator sbi;
2532 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2534 if (vector_exprs[bb_index]->get_avl_info () != avl)
2535 return false;
2537 return true;
2540 bool
2541 vector_infos_manager::earliest_fusion_worthwhile_p (
2542 const basic_block cfg_bb) const
2544 edge e;
2545 edge_iterator ei;
2546 profile_probability prob = profile_probability::uninitialized ();
2547 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2549 if (prob == profile_probability::uninitialized ())
2550 prob = vector_block_infos[e->dest->index].probability;
2551 else if (prob == vector_block_infos[e->dest->index].probability)
2552 continue;
2553 else
2554 /* We pick the highest probability among those incompatible VSETVL
2555 infos. When all incompatible VSTEVL infos have same probability, we
2556 don't pick any of them. */
2557 return true;
2559 return false;
2562 bool
2563 vector_infos_manager::vsetvl_dominated_by_all_preds_p (
2564 const basic_block cfg_bb, const vector_insn_info &info) const
2566 edge e;
2567 edge_iterator ei;
2568 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
2570 const auto &reaching_out = vector_block_infos[e->src->index].reaching_out;
2571 if (e->src->index == cfg_bb->index && reaching_out.compatible_p (info))
2572 continue;
2573 if (!vsetvl_dominated_by_p (e->src, info, reaching_out, false))
2574 return false;
2576 return true;
2579 size_t
2580 vector_infos_manager::expr_set_num (sbitmap bitdata) const
2582 size_t count = 0;
2583 for (size_t i = 0; i < vector_exprs.length (); i++)
2584 if (bitmap_bit_p (bitdata, i))
2585 count++;
2586 return count;
2589 void
2590 vector_infos_manager::release (void)
2592 if (!vector_insn_infos.is_empty ())
2593 vector_insn_infos.release ();
2594 if (!vector_block_infos.is_empty ())
2595 vector_block_infos.release ();
2596 if (!vector_exprs.is_empty ())
2597 vector_exprs.release ();
2599 gcc_assert (to_refine_vsetvls.is_empty ());
2600 gcc_assert (to_delete_vsetvls.is_empty ());
2601 if (optimize > 0)
2602 free_bitmap_vectors ();
2605 void
2606 vector_infos_manager::create_bitmap_vectors (void)
2608 /* Create the bitmap vectors. */
2609 vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2610 vector_exprs.length ());
2611 vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2612 vector_exprs.length ());
2613 vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2614 vector_exprs.length ());
2615 vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2616 vector_exprs.length ());
2617 vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2618 vector_exprs.length ());
2619 vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2620 vector_exprs.length ());
2621 vector_antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2622 vector_exprs.length ());
2623 vector_antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2624 vector_exprs.length ());
2626 bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
2627 bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
2628 bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
2629 vector_edge_list = create_edge_list ();
2630 vector_earliest = sbitmap_vector_alloc (NUM_EDGES (vector_edge_list),
2631 vector_exprs.length ());
2634 void
2635 vector_infos_manager::free_bitmap_vectors (void)
2637 /* Finished. Free up all the things we've allocated. */
2638 free_edge_list (vector_edge_list);
2639 if (vector_del)
2640 sbitmap_vector_free (vector_del);
2641 if (vector_insert)
2642 sbitmap_vector_free (vector_insert);
2643 if (vector_kill)
2644 sbitmap_vector_free (vector_kill);
2645 if (vector_antic)
2646 sbitmap_vector_free (vector_antic);
2647 if (vector_transp)
2648 sbitmap_vector_free (vector_transp);
2649 if (vector_comp)
2650 sbitmap_vector_free (vector_comp);
2651 if (vector_avin)
2652 sbitmap_vector_free (vector_avin);
2653 if (vector_avout)
2654 sbitmap_vector_free (vector_avout);
2655 if (vector_antin)
2656 sbitmap_vector_free (vector_antin);
2657 if (vector_antout)
2658 sbitmap_vector_free (vector_antout);
2659 if (vector_earliest)
2660 sbitmap_vector_free (vector_earliest);
2662 vector_edge_list = nullptr;
2663 vector_kill = nullptr;
2664 vector_del = nullptr;
2665 vector_insert = nullptr;
2666 vector_antic = nullptr;
2667 vector_transp = nullptr;
2668 vector_comp = nullptr;
2669 vector_avin = nullptr;
2670 vector_avout = nullptr;
2671 vector_antin = nullptr;
2672 vector_antout = nullptr;
2673 vector_earliest = nullptr;
2676 void
2677 vector_infos_manager::dump (FILE *file) const
2679 basic_block cfg_bb;
2680 rtx_insn *rinsn;
2682 fprintf (file, "\n");
2683 FOR_ALL_BB_FN (cfg_bb, cfun)
2685 fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
2686 fprintf (file, "<HEADER>=");
2687 vector_block_infos[cfg_bb->index].local_dem.dump (file);
2688 FOR_BB_INSNS (cfg_bb, rinsn)
2690 if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
2691 continue;
2692 fprintf (file, "<insn %d>=", INSN_UID (rinsn));
2693 const auto &info = vector_insn_infos[INSN_UID (rinsn)];
2694 info.dump (file);
2696 fprintf (file, "<FOOTER>=");
2697 vector_block_infos[cfg_bb->index].reaching_out.dump (file);
2698 fprintf (file, "<Probability>=");
2699 vector_block_infos[cfg_bb->index].probability.dump (file);
2700 fprintf (file, "\n\n");
2703 fprintf (file, "\n");
2704 FOR_ALL_BB_FN (cfg_bb, cfun)
2706 fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
2708 fprintf (file, "<ANTLOC>=");
2709 if (vector_antic == nullptr)
2710 fprintf (file, "(nil)\n");
2711 else
2712 dump_bitmap_file (file, vector_antic[cfg_bb->index]);
2714 fprintf (file, "<AVLOC>=");
2715 if (vector_comp == nullptr)
2716 fprintf (file, "(nil)\n");
2717 else
2718 dump_bitmap_file (file, vector_comp[cfg_bb->index]);
2720 fprintf (file, "<TRANSP>=");
2721 if (vector_transp == nullptr)
2722 fprintf (file, "(nil)\n");
2723 else
2724 dump_bitmap_file (file, vector_transp[cfg_bb->index]);
2726 fprintf (file, "<KILL>=");
2727 if (vector_kill == nullptr)
2728 fprintf (file, "(nil)\n");
2729 else
2730 dump_bitmap_file (file, vector_kill[cfg_bb->index]);
2732 fprintf (file, "<ANTIN>=");
2733 if (vector_antin == nullptr)
2734 fprintf (file, "(nil)\n");
2735 else
2736 dump_bitmap_file (file, vector_antin[cfg_bb->index]);
2738 fprintf (file, "<ANTOUT>=");
2739 if (vector_antout == nullptr)
2740 fprintf (file, "(nil)\n");
2741 else
2742 dump_bitmap_file (file, vector_antout[cfg_bb->index]);
2745 fprintf (file, "\n");
2746 FOR_ALL_BB_FN (cfg_bb, cfun)
2748 fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2749 cfg_bb->index);
2751 fprintf (file, "<AVIN>=");
2752 if (vector_avin == nullptr)
2753 fprintf (file, "(nil)\n");
2754 else
2755 dump_bitmap_file (file, vector_avin[cfg_bb->index]);
2757 fprintf (file, "<AVOUT>=");
2758 if (vector_avout == nullptr)
2759 fprintf (file, "(nil)\n");
2760 else
2761 dump_bitmap_file (file, vector_avout[cfg_bb->index]);
2763 fprintf (file, "<DELETE>=");
2764 if (vector_del == nullptr)
2765 fprintf (file, "(nil)\n");
2766 else
2767 dump_bitmap_file (file, vector_del[cfg_bb->index]);
2770 for (size_t i = 0; i < vector_exprs.length (); i++)
2772 for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
2774 edge eg = INDEX_EDGE (vector_edge_list, ed);
2775 if (vector_insert)
2777 if (bitmap_bit_p (vector_insert[ed], i))
2779 fprintf (file,
2780 "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2781 fprintf (file,
2782 "INSERT edge %d from <bb %d> to <bb %d> for VSETVL "
2783 "expr[%ld]\n",
2784 ed, eg->src->index, eg->dest->index, i);
2787 else
2789 if (bitmap_bit_p (vector_earliest[ed], i))
2791 fprintf (file,
2792 "\nGlobal LCM (Lazy code motion) EARLIEST info:\n");
2793 fprintf (
2794 file,
2795 "EARLIEST edge %d from <bb %d> to <bb %d> for VSETVL "
2796 "expr[%ld]\n",
2797 ed, eg->src->index, eg->dest->index, i);
2804 const pass_data pass_data_vsetvl = {
2805 RTL_PASS, /* type */
2806 "vsetvl", /* name */
2807 OPTGROUP_NONE, /* optinfo_flags */
2808 TV_NONE, /* tv_id */
2809 0, /* properties_required */
2810 0, /* properties_provided */
2811 0, /* properties_destroyed */
2812 0, /* todo_flags_start */
2813 0, /* todo_flags_finish */
2816 class pass_vsetvl : public rtl_opt_pass
2818 private:
2819 vector_infos_manager *m_vector_manager;
2821 const vector_insn_info &get_vector_info (const rtx_insn *) const;
2822 const vector_insn_info &get_vector_info (const insn_info *) const;
2823 const vector_block_info &get_block_info (const basic_block) const;
2824 const vector_block_info &get_block_info (const bb_info *) const;
2825 vector_block_info &get_block_info (const basic_block);
2826 vector_block_info &get_block_info (const bb_info *);
2827 void update_vector_info (const insn_info *, const vector_insn_info &);
2828 void update_block_info (int, profile_probability, const vector_insn_info &);
2830 void simple_vsetvl (void) const;
2831 void lazy_vsetvl (void);
2833 /* Phase 1. */
2834 void compute_local_backward_infos (const bb_info *);
2836 /* Phase 2. */
2837 bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
2838 void transfer_before (vector_insn_info &, insn_info *) const;
2839 void transfer_after (vector_insn_info &, insn_info *) const;
2840 void emit_local_forward_vsetvls (const bb_info *);
2842 /* Phase 3. */
2843 bool earliest_fusion (void);
2844 void vsetvl_fusion (void);
2846 /* Phase 4. */
2847 void prune_expressions (void);
2848 void compute_local_properties (void);
2849 bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
2850 void refine_vsetvls (void) const;
2851 void cleanup_vsetvls (void);
2852 bool commit_vsetvls (void);
2853 void pre_vsetvl (void);
2855 /* Phase 5. */
2856 rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const;
2857 void local_eliminate_vsetvl_insn (const bb_info *) const;
2858 bool global_eliminate_vsetvl_insn (const bb_info *) const;
2859 void ssa_post_optimization (void) const;
2861 /* Phase 6. */
2862 void df_post_optimization (void) const;
2864 void init (void);
2865 void done (void);
2866 void compute_probabilities (void);
2868 public:
2869 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
2871 /* opt_pass methods: */
2872 virtual bool gate (function *) final override { return TARGET_VECTOR; }
2873 virtual unsigned int execute (function *) final override;
2874 }; // class pass_vsetvl
2876 const vector_insn_info &
2877 pass_vsetvl::get_vector_info (const rtx_insn *i) const
2879 return m_vector_manager->vector_insn_infos[INSN_UID (i)];
2882 const vector_insn_info &
2883 pass_vsetvl::get_vector_info (const insn_info *i) const
2885 return m_vector_manager->vector_insn_infos[i->uid ()];
2888 const vector_block_info &
2889 pass_vsetvl::get_block_info (const basic_block bb) const
2891 return m_vector_manager->vector_block_infos[bb->index];
2894 const vector_block_info &
2895 pass_vsetvl::get_block_info (const bb_info *bb) const
2897 return m_vector_manager->vector_block_infos[bb->index ()];
2900 vector_block_info &
2901 pass_vsetvl::get_block_info (const basic_block bb)
2903 return m_vector_manager->vector_block_infos[bb->index];
2906 vector_block_info &
2907 pass_vsetvl::get_block_info (const bb_info *bb)
2909 return m_vector_manager->vector_block_infos[bb->index ()];
2912 void
2913 pass_vsetvl::update_vector_info (const insn_info *i,
2914 const vector_insn_info &new_info)
2916 m_vector_manager->vector_insn_infos[i->uid ()] = new_info;
2919 void
2920 pass_vsetvl::update_block_info (int index, profile_probability prob,
2921 const vector_insn_info &new_info)
2923 m_vector_manager->vector_block_infos[index].probability = prob;
2924 if (m_vector_manager->vector_block_infos[index].local_dem
2925 == m_vector_manager->vector_block_infos[index].reaching_out)
2926 m_vector_manager->vector_block_infos[index].local_dem = new_info;
2927 m_vector_manager->vector_block_infos[index].reaching_out = new_info;
2930 /* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2931 void
2932 pass_vsetvl::simple_vsetvl (void) const
2934 if (dump_file)
2935 fprintf (dump_file,
2936 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2937 "function:%s\n",
2938 n_basic_blocks_for_fn (cfun), function_name (cfun));
2940 basic_block cfg_bb;
2941 rtx_insn *rinsn;
2942 FOR_ALL_BB_FN (cfg_bb, cfun)
2944 FOR_BB_INSNS (cfg_bb, rinsn)
2946 if (!NONDEBUG_INSN_P (rinsn))
2947 continue;
2948 if (has_vtype_op (rinsn))
2950 const auto info = get_vector_info (rinsn);
2951 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
2952 NULL_RTX, rinsn);
2958 /* Compute demanded information by backward data-flow analysis. */
2959 void
2960 pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
2962 vector_insn_info change;
2963 change.set_empty ();
2965 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2966 block_info.reaching_out = change;
2968 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
2970 auto &info = get_vector_info (insn);
2972 if (info.uninit_p ())
2973 /* If it is uninitialized, propagate it directly. */
2974 update_vector_info (insn, change);
2975 else if (info.unknown_p ())
2976 change = info;
2977 else
2979 gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
2980 if (change.valid_p ())
2982 if (!(propagate_avl_across_demands_p (change, info)
2983 && !reg_available_p (insn, change))
2984 && change.compatible_p (info))
2986 update_vector_info (insn, change.local_merge (info));
2987 /* Fix PR109399, we should update user vsetvl instruction
2988 if there is a change in demand fusion. */
2989 if (vsetvl_insn_p (insn->rtl ()))
2990 change_vsetvl_insn (insn, info);
2993 change = info;
2997 block_info.local_dem = change;
2998 if (block_info.local_dem.empty_p ())
2999 block_info.reaching_out = block_info.local_dem;
3002 /* Return true if a dem_info is required to transition from curr_info to
3003 require before INSN. */
3004 bool
3005 pass_vsetvl::need_vsetvl (const vector_insn_info &require,
3006 const vector_insn_info &curr_info) const
3008 if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
3009 return true;
3011 if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
3012 return false;
3014 return true;
3017 /* Given an incoming state reaching INSN, modifies that state so that it is
3018 minimally compatible with INSN. The resulting state is guaranteed to be
3019 semantically legal for INSN, but may not be the state requested by INSN. */
3020 void
3021 pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
3023 if (!has_vtype_op (insn->rtl ()))
3024 return;
3026 const vector_insn_info require = get_vector_info (insn);
3027 if (info.valid_p () && !need_vsetvl (require, info))
3028 return;
3029 info = require;
3032 /* Given a state with which we evaluated insn (see transfer_before above for why
3033 this might be different that the state insn requested), modify the state to
3034 reflect the changes insn might make. */
3035 void
3036 pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
3038 if (vector_config_insn_p (insn->rtl ()))
3040 info = get_vector_info (insn);
3041 return;
3044 if (fault_first_load_p (insn->rtl ())
3045 && info.update_fault_first_load_avl (insn))
3046 return;
3048 /* If this is something that updates VL/VTYPE that we don't know about, set
3049 the state to unknown. */
3050 if (insn->is_call () || insn->is_asm ()
3051 || find_access (insn->defs (), VL_REGNUM)
3052 || find_access (insn->defs (), VTYPE_REGNUM))
3053 info = vector_insn_info::get_unknown ();
3056 /* Emit vsetvl within each block by forward data-flow analysis. */
3057 void
3058 pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
3060 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
3061 if (block_info.local_dem.empty_p ())
3062 return;
3064 vector_insn_info curr_info;
3065 for (insn_info *insn : bb->real_nondebug_insns ())
3067 const vector_insn_info prev_info = curr_info;
3068 enum vsetvl_type type = NUM_VSETVL_TYPE;
3069 transfer_before (curr_info, insn);
3071 if (has_vtype_op (insn->rtl ()))
3073 if (static_cast<const vl_vtype_info &> (prev_info)
3074 != static_cast<const vl_vtype_info &> (curr_info))
3076 const auto require = get_vector_info (insn);
3077 if (!require.compatible_p (
3078 static_cast<const vl_vtype_info &> (prev_info)))
3079 type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
3080 prev_info);
3084 /* Fix the issue of following sequence:
3085 vsetivli zero, 5
3086 ....
3087 vsetvli zero, zero
3088 vmv.x.s (demand AVL = 8).
3089 ....
3090 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
3091 correct: vsetivli zero, 8
3092 vadd (demand AVL = 8). */
3093 if (type == VSETVL_VTYPE_CHANGE_ONLY)
3095 /* Update the curr_info to be real correct AVL. */
3096 curr_info.set_avl_info (prev_info.get_avl_info ());
3098 transfer_after (curr_info, insn);
3101 block_info.reaching_out = curr_info;
3104 /* Assemble the candidates expressions for LCM. */
3105 void
3106 pass_vsetvl::prune_expressions (void)
3108 for (const bb_info *bb : crtl->ssa->bbs ())
3110 if (m_vector_manager->vector_block_infos[bb->index ()]
3111 .local_dem.valid_or_dirty_p ())
3112 m_vector_manager->create_expr (
3113 m_vector_manager->vector_block_infos[bb->index ()].local_dem);
3114 if (m_vector_manager->vector_block_infos[bb->index ()]
3115 .reaching_out.valid_or_dirty_p ())
3116 m_vector_manager->create_expr (
3117 m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
3120 if (dump_file)
3122 fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
3123 m_vector_manager->vector_exprs.length ());
3124 fprintf (dump_file, "Expression List:\n");
3125 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3127 fprintf (dump_file, "Expr[%ld]:\n", i);
3128 m_vector_manager->vector_exprs[i]->dump (dump_file);
3129 fprintf (dump_file, "\n");
3134 /* Compute the local properties of each recorded expression.
3136 Local properties are those that are defined by the block, irrespective of
3137 other blocks.
3139 An expression is transparent in a block if its operands are not modified
3140 in the block.
3142 An expression is computed (locally available) in a block if it is computed
3143 at least once and expression would contain the same value if the
3144 computation was moved to the end of the block.
3146 An expression is locally anticipatable in a block if it is computed at
3147 least once and expression would contain the same value if the computation
3148 was moved to the beginning of the block. */
3149 void
3150 pass_vsetvl::compute_local_properties (void)
3152 /* - If T is locally available at the end of a block, then T' must be
3153 available at the end of the same block. Since some optimization has
3154 occurred earlier, T' might not be locally available, however, it must
3155 have been previously computed on all paths. As a formula, T at AVLOC(B)
3156 implies that T' at AVOUT(B).
3157 An "available occurrence" is one that is the last occurrence in the
3158 basic block and the operands are not modified by following statements in
3159 the basic block [including this insn].
3161 - If T is locally anticipated at the beginning of a block, then either
3162 T', is locally anticipated or it is already available from previous
3163 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3164 ANTLOC(B) at AVIN(B).
3165 An "anticipatable occurrence" is one that is the first occurrence in the
3166 basic block, the operands are not modified in the basic block prior
3167 to the occurrence and the output is not used between the start of
3168 the block and the occurrence. */
3170 basic_block cfg_bb;
3171 for (const bb_info *bb : crtl->ssa->bbs ())
3173 unsigned int curr_bb_idx = bb->index ();
3174 if (curr_bb_idx == ENTRY_BLOCK || curr_bb_idx == EXIT_BLOCK)
3175 continue;
3176 const auto local_dem
3177 = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
3178 const auto reaching_out
3179 = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
3181 /* Compute transparent. */
3182 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3184 const auto *expr = m_vector_manager->vector_exprs[i];
3185 if (local_dem.valid_or_dirty_p () || local_dem.unknown_p ())
3186 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
3187 else if (expr->has_avl_reg ())
3189 rtx reg = expr->get_avl_or_vl_reg ();
3190 for (const insn_info *insn : bb->real_nondebug_insns ())
3192 if (find_access (insn->defs (), REGNO (reg)))
3194 bitmap_clear_bit (
3195 m_vector_manager->vector_transp[curr_bb_idx], i);
3196 break;
3198 else if (vlmax_avl_p (expr->get_avl ())
3199 && find_access (insn->uses (), REGNO (reg)))
3201 bitmap_clear_bit (
3202 m_vector_manager->vector_transp[curr_bb_idx], i);
3203 break;
3209 /* Compute anticipatable occurrences. */
3210 if (local_dem.valid_or_dirty_p ())
3211 if (anticipatable_occurrence_p (bb, local_dem))
3212 bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
3213 m_vector_manager->get_expr_id (local_dem));
3215 /* Compute available occurrences. */
3216 if (reaching_out.valid_or_dirty_p ())
3218 auto_vec<size_t> available_list
3219 = m_vector_manager->get_all_available_exprs (reaching_out);
3220 for (size_t i = 0; i < available_list.length (); i++)
3222 const vector_insn_info *expr
3223 = m_vector_manager->vector_exprs[available_list[i]];
3224 if (available_occurrence_p (bb, *expr))
3225 bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
3226 available_list[i]);
3230 if (loop_basic_block_p (bb->cfg_bb ()) && local_dem.valid_or_dirty_p ()
3231 && reaching_out.valid_or_dirty_p ()
3232 && !local_dem.compatible_p (reaching_out))
3233 bitmap_clear_bit (m_vector_manager->vector_antic[curr_bb_idx],
3234 m_vector_manager->get_expr_id (local_dem));
3237 /* Compute kill for each basic block using:
3239 ~(TRANSP | COMP)
3242 FOR_EACH_BB_FN (cfg_bb, cfun)
3244 bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
3245 m_vector_manager->vector_transp[cfg_bb->index],
3246 m_vector_manager->vector_comp[cfg_bb->index]);
3247 bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
3248 m_vector_manager->vector_kill[cfg_bb->index]);
3251 FOR_EACH_BB_FN (cfg_bb, cfun)
3253 edge e;
3254 edge_iterator ei;
3256 /* If the current block is the destination of an abnormal edge, we
3257 kill all trapping (for PRE) and memory (for hoist) expressions
3258 because we won't be able to properly place the instruction on
3259 the edge. So make them neither anticipatable nor transparent.
3260 This is fairly conservative.
3262 ??? For hoisting it may be necessary to check for set-and-jump
3263 instructions here, not just for abnormal edges. The general problem
3264 is that when an expression cannot not be placed right at the end of
3265 a basic block we should account for any side-effects of a subsequent
3266 jump instructions that could clobber the expression. It would
3267 be best to implement this check along the lines of
3268 should_hoist_expr_to_dom where the target block is already known
3269 and, hence, there's no need to conservatively prune expressions on
3270 "intermediate" set-and-jump instructions. */
3271 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3272 if (e->flags & EDGE_COMPLEX)
3274 bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
3275 bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
3280 /* Fuse demand info for earliest edge. */
3281 bool
3282 pass_vsetvl::earliest_fusion (void)
3284 bool changed_p = false;
3285 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
3287 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3289 auto &expr = *m_vector_manager->vector_exprs[i];
3290 if (expr.empty_p ())
3291 continue;
3292 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
3293 /* If it is the edge that we never reach, skip its possible PRE
3294 fusion conservatively. */
3295 if (eg->probability == profile_probability::never ())
3296 break;
3297 if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
3298 || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
3299 break;
3300 if (bitmap_bit_p (m_vector_manager->vector_earliest[ed], i))
3302 auto &src_block_info = get_block_info (eg->src);
3303 auto &dest_block_info = get_block_info (eg->dest);
3304 if (src_block_info.reaching_out.unknown_p ())
3305 break;
3307 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3308 vector_insn_info new_info = vector_insn_info ();
3309 profile_probability prob = src_block_info.probability;
3310 /* We don't fuse user vsetvl into EMPTY or
3311 DIRTY (EMPTY but polluted) block for these
3312 following reasons:
3314 - The user vsetvl instruction is configured as
3315 no side effects that the previous passes
3316 (GSCE, Loop-invariant, ..., etc)
3317 should be able to do a good job on optimization
3318 of user explicit vsetvls so we don't need to
3319 PRE optimization (The user vsetvls should be
3320 on the optimal local already before this pass)
3321 again for user vsetvls in VSETVL PASS here
3322 (Phase 3 && Phase 4).
3324 - Allowing user vsetvls be optimized in PRE
3325 optimization here (Phase 3 && Phase 4) will
3326 complicate the codes so much so we prefer user
3327 vsetvls be optimized in post-optimization
3328 (Phase 5 && Phase 6). */
3329 if (vsetvl_insn_p (expr.get_insn ()->rtl ()))
3331 if (src_block_info.reaching_out.empty_p ())
3332 continue;
3333 else if (src_block_info.reaching_out.dirty_p ()
3334 && !src_block_info.reaching_out.compatible_p (expr))
3336 new_info.set_empty ();
3337 /* Update probability as uninitialized status so that
3338 we won't try to fuse any demand info into such EMPTY
3339 block any more. */
3340 prob = profile_probability::uninitialized ();
3341 update_block_info (eg->src->index, prob, new_info);
3342 continue;
3346 if (src_block_info.reaching_out.empty_p ())
3348 if (src_block_info.probability
3349 == profile_probability::uninitialized ())
3350 continue;
3351 new_info = expr.global_merge (expr, eg->src->index);
3352 new_info.set_dirty ();
3353 prob = dest_block_info.probability;
3354 update_block_info (eg->src->index, prob, new_info);
3355 changed_p = true;
3357 else if (src_block_info.reaching_out.dirty_p ())
3359 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3360 if (demands_can_be_fused_p (src_block_info.reaching_out,
3361 expr))
3363 new_info = src_block_info.reaching_out.global_merge (
3364 expr, eg->src->index);
3365 new_info.set_dirty ();
3366 prob += dest_block_info.probability;
3368 else if (!src_block_info.reaching_out.compatible_p (expr)
3369 && !m_vector_manager->earliest_fusion_worthwhile_p (
3370 eg->src))
3372 new_info.set_empty ();
3373 prob = profile_probability::uninitialized ();
3375 else if (!src_block_info.reaching_out.compatible_p (expr)
3376 && dest_block_info.probability
3377 > src_block_info.probability)
3379 new_info = expr;
3380 new_info.set_dirty ();
3381 prob = dest_block_info.probability;
3383 else
3384 continue;
3385 update_block_info (eg->src->index, prob, new_info);
3386 changed_p = true;
3388 else
3390 rtx vl = NULL_RTX;
3391 if (vsetvl_insn_p (
3392 src_block_info.reaching_out.get_insn ()->rtl ())
3393 && vsetvl_dominated_by_p (eg->src, expr,
3394 src_block_info.reaching_out,
3395 true))
3397 else if (!demands_can_be_fused_p (src_block_info.reaching_out,
3398 expr))
3399 continue;
3400 else if (!earliest_pred_can_be_fused_p (
3401 crtl->ssa->bb (eg->src),
3402 src_block_info.reaching_out, expr, &vl))
3403 continue;
3405 vector_insn_info new_info
3406 = src_block_info.reaching_out.global_merge (expr,
3407 eg->src->index);
3409 prob = std::max (dest_block_info.probability,
3410 src_block_info.probability);
3411 change_vsetvl_insn (new_info.get_insn (), new_info, vl);
3412 update_block_info (eg->src->index, prob, new_info);
3413 changed_p = true;
3418 return changed_p;
3421 /* Fuse VSETVL demand info according LCM computed location. */
3422 void
3423 pass_vsetvl::vsetvl_fusion (void)
3425 /* Fuse VSETVL demand info until VSETVL CFG fixed. */
3426 bool changed_p = true;
3427 int fusion_no = 0;
3428 while (changed_p)
3430 changed_p = false;
3431 fusion_no++;
3432 prune_expressions ();
3433 m_vector_manager->create_bitmap_vectors ();
3434 compute_local_properties ();
3435 /* Compute global availability. */
3436 compute_available (m_vector_manager->vector_comp,
3437 m_vector_manager->vector_kill,
3438 m_vector_manager->vector_avout,
3439 m_vector_manager->vector_avin);
3440 /* Compute global anticipatability. */
3441 compute_antinout_edge (m_vector_manager->vector_antic,
3442 m_vector_manager->vector_transp,
3443 m_vector_manager->vector_antin,
3444 m_vector_manager->vector_antout);
3445 /* Compute earliestness. */
3446 compute_earliest (m_vector_manager->vector_edge_list,
3447 m_vector_manager->vector_exprs.length (),
3448 m_vector_manager->vector_antin,
3449 m_vector_manager->vector_antout,
3450 m_vector_manager->vector_avout,
3451 m_vector_manager->vector_kill,
3452 m_vector_manager->vector_earliest);
3453 changed_p |= earliest_fusion ();
3454 if (dump_file && (dump_flags & TDF_DETAILS))
3456 fprintf (dump_file, "\nEARLIEST fusion %d\n", fusion_no);
3457 m_vector_manager->dump (dump_file);
3459 m_vector_manager->free_bitmap_vectors ();
3460 if (!m_vector_manager->vector_exprs.is_empty ())
3461 m_vector_manager->vector_exprs.release ();
3465 /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3466 bool
3467 pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
3468 const vector_insn_info &info) const
3470 if (!m_vector_manager->all_same_ratio_p (
3471 m_vector_manager->vector_avin[cfg_bb->index]))
3472 return false;
3474 if (!m_vector_manager->all_same_avl_p (
3475 cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
3476 return false;
3478 size_t expr_id
3479 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
3480 if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
3481 return false;
3482 if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
3483 return false;
3485 edge e;
3486 edge_iterator ei;
3487 bool all_valid_p = true;
3488 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3490 if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
3492 all_valid_p = false;
3493 break;
3497 if (!all_valid_p)
3498 return false;
3499 return true;
3502 /* Optimize athe case like this:
3504 bb 0:
3505 vsetvl 0 a5,zero,e8,mf8
3506 insn 0 (demand SEW + LMUL)
3507 bb 1:
3508 vsetvl 1 a5,zero,e16,mf4
3509 insn 1 (demand SEW + LMUL)
3511 In this case, we should be able to refine
3512 vsetvl 1 into vsetvl zero, zero according AVIN. */
3513 void
3514 pass_vsetvl::refine_vsetvls (void) const
3516 basic_block cfg_bb;
3517 FOR_EACH_BB_FN (cfg_bb, cfun)
3519 auto info = get_block_info (cfg_bb).local_dem;
3520 insn_info *insn = info.get_insn ();
3521 if (!info.valid_p ())
3522 continue;
3524 rtx_insn *rinsn = insn->rtl ();
3525 if (!can_refine_vsetvl_p (cfg_bb, info))
3526 continue;
3528 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3529 will be used by the following instructions. */
3530 if (vector_config_insn_p (rinsn))
3532 m_vector_manager->to_refine_vsetvls.add (rinsn);
3533 continue;
3536 /* If all incoming edges to a block have a vector state that is compatbile
3537 with the block. In such a case we need not emit a vsetvl in the current
3538 block. */
3540 gcc_assert (has_vtype_op (insn->rtl ()));
3541 rinsn = PREV_INSN (insn->rtl ());
3542 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3543 if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
3545 size_t id = m_vector_manager->get_expr_id (info);
3546 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
3547 continue;
3548 eliminate_insn (rinsn);
3550 else
3552 rtx new_pat
3553 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
3554 change_insn (rinsn, new_pat);
3559 void
3560 pass_vsetvl::cleanup_vsetvls ()
3562 basic_block cfg_bb;
3563 FOR_EACH_BB_FN (cfg_bb, cfun)
3565 auto &info = get_block_info (cfg_bb).reaching_out;
3566 gcc_assert (m_vector_manager->expr_set_num (
3567 m_vector_manager->vector_del[cfg_bb->index])
3568 <= 1);
3569 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3571 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
3573 if (info.dirty_p ())
3574 info.set_unknown ();
3575 else
3577 const auto dem = get_block_info (cfg_bb).local_dem;
3578 gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
3579 insn_info *insn = dem.get_insn ();
3580 gcc_assert (insn && insn->rtl ());
3581 rtx_insn *rinsn;
3582 /* We can't eliminate user vsetvl since the dest will be used
3583 * by the following instructions. */
3584 if (vector_config_insn_p (insn->rtl ()))
3586 m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
3587 continue;
3590 gcc_assert (has_vtype_op (insn->rtl ()));
3591 rinsn = PREV_INSN (insn->rtl ());
3592 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3593 eliminate_insn (rinsn);
3600 bool
3601 pass_vsetvl::commit_vsetvls (void)
3603 bool need_commit = false;
3605 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
3607 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3609 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
3610 if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
3612 const vector_insn_info *require
3613 = m_vector_manager->vector_exprs[i];
3614 gcc_assert (require->valid_or_dirty_p ());
3615 rtl_profile_for_edge (eg);
3616 start_sequence ();
3618 insn_info *insn = require->get_insn ();
3619 vector_insn_info prev_info = vector_insn_info ();
3620 sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
3621 if (m_vector_manager->all_same_ratio_p (bitdata)
3622 && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
3624 size_t first = bitmap_first_set_bit (bitdata);
3625 prev_info = *m_vector_manager->vector_exprs[first];
3628 insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
3629 rtx_insn *rinsn = get_insns ();
3630 end_sequence ();
3631 default_rtl_profile ();
3633 /* We should not get an abnormal edge here. */
3634 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3635 need_commit = true;
3636 insert_insn_on_edge (rinsn, eg);
3638 if (dump_file)
3640 fprintf (dump_file,
3641 "\nInsert vsetvl insn %d at edge %d from <bb %d> to "
3642 "<bb %d>:\n",
3643 INSN_UID (rinsn), ed, eg->src->index,
3644 eg->dest->index);
3645 print_rtl_single (dump_file, rinsn);
3651 for (const bb_info *bb : crtl->ssa->bbs ())
3653 basic_block cfg_bb = bb->cfg_bb ();
3654 const auto reaching_out = get_block_info (cfg_bb).reaching_out;
3655 if (!reaching_out.dirty_p ())
3656 continue;
3658 rtx new_pat;
3659 if (!reaching_out.demand_p (DEMAND_AVL))
3661 vl_vtype_info new_info = reaching_out;
3662 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
3663 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
3665 else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
3666 new_pat
3667 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
3668 else if (vlmax_avl_p (reaching_out.get_avl ()))
3670 rtx vl = reaching_out.get_avl_or_vl_reg ();
3671 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, vl);
3673 else
3674 new_pat
3675 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
3677 edge eg;
3678 edge_iterator eg_iterator;
3679 FOR_EACH_EDGE (eg, eg_iterator, cfg_bb->succs)
3681 /* We should not get an abnormal edge here. */
3682 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3683 /* We failed to optimize this case in Phase 3 (earliest fusion):
3685 bb 2: vsetvl a5, a3 ...
3686 goto bb 4
3687 bb 3: vsetvl a5, a2 ...
3688 goto bb 4
3689 bb 4: vsetvli zero, a5 ---> Redundant, should be elided.
3691 Since "a5" value can come from either bb 2 or bb 3, we can't make
3692 it optimized in Phase 3 which will make phase 3 so complicated.
3693 Now, we do post optimization here to elide the redundant VSETVL
3694 insn in bb4. */
3695 if (m_vector_manager->vsetvl_dominated_by_all_preds_p (cfg_bb,
3696 reaching_out))
3697 continue;
3699 start_sequence ();
3700 emit_insn (copy_rtx (new_pat));
3701 rtx_insn *rinsn = get_insns ();
3702 end_sequence ();
3704 insert_insn_on_edge (rinsn, eg);
3705 need_commit = true;
3706 if (dump_file)
3708 fprintf (dump_file,
3709 "\nInsert vsetvl insn %d from <bb %d> to <bb %d>:\n",
3710 INSN_UID (rinsn), cfg_bb->index, eg->dest->index);
3711 print_rtl_single (dump_file, rinsn);
3716 return need_commit;
3719 void
3720 pass_vsetvl::pre_vsetvl (void)
3722 /* Compute entity list. */
3723 prune_expressions ();
3725 m_vector_manager->create_bitmap_vectors ();
3726 compute_local_properties ();
3727 m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
3728 m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
3729 m_vector_manager->vector_comp, m_vector_manager->vector_antic,
3730 m_vector_manager->vector_kill, m_vector_manager->vector_avin,
3731 m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
3732 &m_vector_manager->vector_del);
3734 /* We should dump the information before CFG is changed. Otherwise it will
3735 produce ICE (internal compiler error). */
3736 if (dump_file && (dump_flags & TDF_DETAILS))
3737 m_vector_manager->dump (dump_file);
3739 refine_vsetvls ();
3740 cleanup_vsetvls ();
3741 bool need_commit = commit_vsetvls ();
3742 if (need_commit)
3743 commit_edge_insertions ();
3746 /* Some instruction can not be accessed in RTL_SSA when we don't re-init
3747 the new RTL_SSA framework but it is definetely at the END of the block.
3749 Here we optimize the VSETVL is hoisted by LCM:
3751 Before LCM:
3752 bb 1:
3753 vsetvli a5,a2,e32,m1,ta,mu
3754 bb 2:
3755 vsetvli zero,a5,e32,m1,ta,mu
3758 After LCM:
3759 bb 1:
3760 vsetvli a5,a2,e32,m1,ta,mu
3761 LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
3762 bb 2:
3765 rtx_insn *
3766 pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const
3768 rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
3769 if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl))
3771 if (JUMP_P (end_vsetvl))
3772 end_vsetvl = PREV_INSN (end_vsetvl);
3774 if (NONDEBUG_INSN_P (end_vsetvl)
3775 && vsetvl_discard_result_insn_p (end_vsetvl))
3777 /* Only handle single succ. here, multiple succ. is much
3778 more complicated. */
3779 if (single_succ_p (bb->cfg_bb ()))
3781 edge e = single_succ_edge (bb->cfg_bb ());
3782 *dem = get_block_info (e->dest).local_dem;
3783 return end_vsetvl;
3787 return nullptr;
3790 /* This predicator should only used within same basic block. */
3791 static bool
3792 local_avl_compatible_p (rtx avl1, rtx avl2)
3794 if (!REG_P (avl1) || !REG_P (avl2))
3795 return false;
3797 return REGNO (avl1) == REGNO (avl2);
3800 /* Local user vsetvl optimizaiton:
3802 Case 1:
3803 vsetvl a5,a4,e8,mf8
3805 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
3807 Case 2:
3808 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3810 vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
3811 void
3812 pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const
3814 rtx_insn *prev_vsetvl = nullptr;
3815 rtx_insn *curr_vsetvl = nullptr;
3816 rtx vl_placeholder = RVV_VLMAX;
3817 rtx prev_avl = vl_placeholder;
3818 rtx curr_avl = vl_placeholder;
3819 vector_insn_info prev_dem;
3821 /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
3822 found those instruciton. */
3823 if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem))
3825 prev_avl = get_avl (end_vsetvl);
3826 prev_vsetvl = end_vsetvl;
3829 bool skip_one = false;
3830 /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
3831 compatible with current vsetvl (curr_avl), and merge the vtype and avl
3832 info. into current vsetvl. */
3833 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
3835 rtx_insn *rinsn = insn->rtl ();
3836 const auto &curr_dem = get_vector_info (insn);
3837 bool need_invalidate = false;
3839 /* Skip if this insn already handled in last iteration. */
3840 if (skip_one)
3842 skip_one = false;
3843 continue;
3846 if (vsetvl_insn_p (rinsn))
3848 curr_vsetvl = rinsn;
3849 /* vsetvl are using vl rather than avl since it will try to merge
3850 with other vsetvl_discard_result.
3852 v--- avl
3853 vsetvl a5,a4,e8,mf8 # vsetvl
3854 ... ^--- vl
3855 vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
3856 ^--- avl
3858 curr_avl = get_vl (rinsn);
3859 /* vsetvl is a cut point of local backward vsetvl elimination. */
3860 need_invalidate = true;
3862 else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn))
3863 && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn))
3864 || vsetvl_insn_p (PREV_INSN (rinsn))))
3866 curr_vsetvl = PREV_INSN (rinsn);
3868 if (vsetvl_insn_p (PREV_INSN (rinsn)))
3870 /* Need invalidate and skip if it's vsetvl. */
3871 need_invalidate = true;
3872 /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
3873 * so only need to skip for vsetvl. */
3874 skip_one = true;
3877 curr_avl = curr_dem.get_avl ();
3879 /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
3880 the avl is null, use vl_placeholder for unify the handling
3881 logic. */
3882 if (!curr_avl)
3883 curr_avl = vl_placeholder;
3885 else if (insn->is_call () || insn->is_asm ()
3886 || find_access (insn->defs (), VL_REGNUM)
3887 || find_access (insn->defs (), VTYPE_REGNUM)
3888 || (REG_P (prev_avl)
3889 && find_access (insn->defs (), REGNO (prev_avl))))
3891 /* Invalidate if this insn can't propagate vl, vtype or avl. */
3892 need_invalidate = true;
3893 prev_dem = vector_insn_info ();
3895 else
3896 /* Not interested instruction. */
3897 continue;
3899 /* Local AVL compatibility checking is simpler than global, we only
3900 need to check the REGNO is same. */
3901 if (prev_dem.valid_or_dirty_p ()
3902 && prev_dem.skip_avl_compatible_p (curr_dem)
3903 && local_avl_compatible_p (prev_avl, curr_avl))
3905 /* curr_dem and prev_dem is compatible! */
3906 /* Update avl info since we need to make sure they are fully
3907 compatible before merge. */
3908 prev_dem.set_avl_info (curr_dem.get_avl_info ());
3909 /* Merge both and update into curr_vsetvl. */
3910 prev_dem = curr_dem.local_merge (prev_dem);
3911 change_vsetvl_insn (curr_dem.get_insn (), prev_dem);
3912 /* Then we can drop prev_vsetvl. */
3913 eliminate_insn (prev_vsetvl);
3916 if (need_invalidate)
3918 prev_vsetvl = nullptr;
3919 curr_vsetvl = nullptr;
3920 prev_avl = vl_placeholder;
3921 curr_avl = vl_placeholder;
3922 prev_dem = vector_insn_info ();
3924 else
3926 prev_vsetvl = curr_vsetvl;
3927 prev_avl = curr_avl;
3928 prev_dem = curr_dem;
3933 /* Return the first vsetvl instruction in CFG_BB or NULL if
3934 none exists or if a user RVV instruction is enountered
3935 prior to any vsetvl. */
3936 static rtx_insn *
3937 get_first_vsetvl_before_rvv_insns (basic_block cfg_bb,
3938 enum vsetvl_type insn_type)
3940 gcc_assert (insn_type == VSETVL_DISCARD_RESULT
3941 || insn_type == VSETVL_VTYPE_CHANGE_ONLY);
3942 rtx_insn *rinsn;
3943 FOR_BB_INSNS (cfg_bb, rinsn)
3945 if (!NONDEBUG_INSN_P (rinsn))
3946 continue;
3947 /* If we don't find any inserted vsetvli before user RVV instructions,
3948 we don't need to optimize the vsetvls in this block. */
3949 if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn))
3950 return nullptr;
3952 if (insn_type == VSETVL_DISCARD_RESULT
3953 && vsetvl_discard_result_insn_p (rinsn))
3954 return rinsn;
3955 if (insn_type == VSETVL_VTYPE_CHANGE_ONLY
3956 && vsetvl_vtype_change_only_p (rinsn))
3957 return rinsn;
3959 return nullptr;
3962 /* Global user vsetvl optimizaiton:
3964 Case 1:
3965 bb 1:
3966 vsetvl a5,a4,e8,mf8
3968 bb 2:
3970 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
3972 Case 2:
3973 bb 1:
3974 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3976 bb 2:
3978 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
3980 Case 3:
3981 bb 1:
3982 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3984 bb 2:
3986 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
3987 goto bb 3
3988 bb 3:
3990 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
3992 bool
3993 pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const
3995 rtx_insn *vsetvl_rinsn = NULL;
3996 vector_insn_info dem = vector_insn_info ();
3997 const auto &block_info = get_block_info (bb);
3998 basic_block cfg_bb = bb->cfg_bb ();
4000 if (block_info.local_dem.valid_or_dirty_p ())
4002 /* Optimize the local vsetvl. */
4003 dem = block_info.local_dem;
4004 vsetvl_rinsn
4005 = get_first_vsetvl_before_rvv_insns (cfg_bb, VSETVL_DISCARD_RESULT);
4007 if (!vsetvl_rinsn)
4008 /* Optimize the global vsetvl inserted by LCM. */
4009 vsetvl_rinsn = get_vsetvl_at_end (bb, &dem);
4011 /* No need to optimize if block doesn't have vsetvl instructions. */
4012 if (!dem.valid_or_dirty_p () || !vsetvl_rinsn || !dem.get_avl_source ()
4013 || !dem.has_avl_reg ())
4014 return false;
4016 /* Condition 1: Check it has preds. */
4017 if (EDGE_COUNT (cfg_bb->preds) == 0)
4018 return false;
4020 /* If all preds has VL/VTYPE status setted by user vsetvls, and these
4021 user vsetvls are all skip_avl_compatible_p with the vsetvl in this
4022 block, we can eliminate this vsetvl instruction. */
4023 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
4025 unsigned int bb_index;
4026 sbitmap_iterator sbi;
4027 rtx avl = dem.get_avl ();
4028 hash_set<set_info *> sets
4029 = get_all_sets (dem.get_avl_source (), true, false, false);
4030 /* Condition 2: All VL/VTYPE available in are all compatible. */
4031 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
4033 const auto &expr = m_vector_manager->vector_exprs[bb_index];
4034 const auto &insn = expr->get_insn ();
4035 def_info *def = find_access (insn->defs (), REGNO (avl));
4036 set_info *set = safe_dyn_cast<set_info *> (def);
4037 if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
4038 || !sets.contains (set))
4039 return false;
4042 /* Condition 3: We don't do the global optimization for the block
4043 has a pred is entry block or exit block. */
4044 /* Condition 4: All preds have available VL/VTYPE out. */
4045 edge e;
4046 edge_iterator ei;
4047 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
4049 sbitmap avout = m_vector_manager->vector_avout[e->src->index];
4050 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
4051 || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun)
4052 || (unsigned int) e->src->index
4053 >= m_vector_manager->vector_block_infos.length ()
4054 || bitmap_empty_p (avout))
4055 return false;
4057 EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
4059 const auto &expr = m_vector_manager->vector_exprs[bb_index];
4060 const auto &insn = expr->get_insn ();
4061 def_info *def = find_access (insn->defs (), REGNO (avl));
4062 set_info *set = safe_dyn_cast<set_info *> (def);
4063 if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
4064 || !sets.contains (set) || !expr->skip_avl_compatible_p (dem))
4065 return false;
4069 /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */
4070 auto_vec<basic_block> pred_cfg_bbs
4071 = get_dominated_by (CDI_POST_DOMINATORS, cfg_bb);
4072 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
4074 sbitmap avout = m_vector_manager->vector_avout[e->src->index];
4075 EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
4077 vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index];
4078 vector_insn_info curr_dem = dem;
4079 insn_info *insn = prev_dem.get_insn ();
4080 if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ()))
4081 continue;
4082 /* Update avl info since we need to make sure they are fully
4083 compatible before merge. */
4084 curr_dem.set_avl_info (prev_dem.get_avl_info ());
4085 /* Merge both and update into curr_vsetvl. */
4086 prev_dem = curr_dem.local_merge (prev_dem);
4087 change_vsetvl_insn (insn, prev_dem);
4091 /* Step2: eliminate the vsetvl instruction. */
4092 eliminate_insn (vsetvl_rinsn);
4093 return true;
4096 /* This function does the following post optimization base on RTL_SSA:
4098 1. Local user vsetvl optimizations.
4099 2. Global user vsetvl optimizations.
4100 3. AVL dependencies removal:
4101 Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
4102 implicitly. Since we will emit VSETVL instruction and make RVV
4103 instructions depending on VL/VTYPE global status registers, we remove the
4104 such AVL operand in the RVV instructions pattern here in order to remove
4105 AVL dependencies when AVL operand is a register operand.
4107 Before the VSETVL PASS:
4108 li a5,32
4110 vadd.vv (..., a5)
4111 After the VSETVL PASS:
4112 li a5,32
4113 vsetvli zero, a5, ...
4115 vadd.vv (..., const_int 0). */
4116 void
4117 pass_vsetvl::ssa_post_optimization (void) const
4119 for (const bb_info *bb : crtl->ssa->bbs ())
4121 local_eliminate_vsetvl_insn (bb);
4122 bool changed_p = true;
4123 while (changed_p)
4125 changed_p = false;
4126 changed_p |= global_eliminate_vsetvl_insn (bb);
4128 for (insn_info *insn : bb->real_nondebug_insns ())
4130 rtx_insn *rinsn = insn->rtl ();
4131 if (vlmax_avl_insn_p (rinsn))
4133 eliminate_insn (rinsn);
4134 continue;
4137 /* Erase the AVL operand from the instruction. */
4138 if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
4139 continue;
4140 rtx avl = get_vl (rinsn);
4141 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
4143 /* Get the list of uses for the new instruction. */
4144 auto attempt = crtl->ssa->new_change_attempt ();
4145 insn_change change (insn);
4146 /* Remove the use of the substituted value. */
4147 access_array_builder uses_builder (attempt);
4148 uses_builder.reserve (insn->num_uses () - 1);
4149 for (use_info *use : insn->uses ())
4150 if (use != find_access (insn->uses (), REGNO (avl)))
4151 uses_builder.quick_push (use);
4152 use_array new_uses = use_array (uses_builder.finish ());
4153 change.new_uses = new_uses;
4154 change.move_range = insn->ebb ()->insn_range ();
4155 rtx pat;
4156 if (fault_first_load_p (rinsn))
4157 pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
4158 else
4160 rtx set = single_set (rinsn);
4161 rtx src
4162 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
4163 pat = gen_rtx_SET (SET_DEST (set), src);
4165 bool ok = change_insn (crtl->ssa, change, insn, pat);
4166 gcc_assert (ok);
4172 /* Return true if the SET result is not used by any instructions. */
4173 static bool
4174 has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
4176 /* Handle the following case that can not be detected in RTL_SSA. */
4177 /* E.g.
4178 li a5, 100
4179 vsetvli a6, a5...
4181 vadd (use a6)
4183 The use of "a6" is removed from "vadd" but the information is
4184 not updated in RTL_SSA framework. We don't want to re-new
4185 a new RTL_SSA which is expensive, instead, we use data-flow
4186 analysis to check whether "a6" has no uses. */
4187 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
4188 return false;
4190 rtx_insn *iter;
4191 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
4192 iter = NEXT_INSN (iter))
4193 if (df_find_use (iter, regno_reg_rtx[regno]))
4194 return false;
4196 return true;
4199 /* This function does the following post optimization base on dataflow
4200 analysis:
4202 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any
4203 nondebug instructions. Even though this PASS runs after RA and it doesn't
4204 help for reduce register pressure, it can help instructions scheduling since
4205 we remove the dependencies.
4207 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5
4208 (AVL dependencies removal). */
4209 void
4210 pass_vsetvl::df_post_optimization (void) const
4212 df_analyze ();
4213 hash_set<rtx_insn *> to_delete;
4214 basic_block cfg_bb;
4215 rtx_insn *rinsn;
4216 FOR_ALL_BB_FN (cfg_bb, cfun)
4218 FOR_BB_INSNS (cfg_bb, rinsn)
4220 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
4222 rtx vl = get_vl (rinsn);
4223 vector_insn_info info;
4224 info.parse_insn (rinsn);
4225 bool to_delete_p = m_vector_manager->to_delete_p (rinsn);
4226 bool to_refine_p = m_vector_manager->to_refine_p (rinsn);
4227 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
4229 if (to_delete_p)
4230 to_delete.add (rinsn);
4231 else if (to_refine_p)
4233 rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
4234 info, NULL_RTX);
4235 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
4236 false);
4238 else if (!vlmax_avl_p (info.get_avl ()))
4240 rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info,
4241 NULL_RTX);
4242 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
4243 false);
4249 for (rtx_insn *rinsn : to_delete)
4250 eliminate_insn (rinsn);
4253 void
4254 pass_vsetvl::init (void)
4256 if (optimize > 0)
4258 /* Initialization of RTL_SSA. */
4259 calculate_dominance_info (CDI_DOMINATORS);
4260 calculate_dominance_info (CDI_POST_DOMINATORS);
4261 df_analyze ();
4262 crtl->ssa = new function_info (cfun);
4265 m_vector_manager = new vector_infos_manager ();
4266 compute_probabilities ();
4268 if (dump_file && (dump_flags & TDF_DETAILS))
4270 fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
4271 m_vector_manager->dump (dump_file);
4275 void
4276 pass_vsetvl::done (void)
4278 if (optimize > 0)
4280 /* Finalization of RTL_SSA. */
4281 free_dominance_info (CDI_DOMINATORS);
4282 free_dominance_info (CDI_POST_DOMINATORS);
4283 if (crtl->ssa->perform_pending_updates ())
4284 cleanup_cfg (0);
4285 delete crtl->ssa;
4286 crtl->ssa = nullptr;
4288 m_vector_manager->release ();
4289 delete m_vector_manager;
4290 m_vector_manager = nullptr;
4293 /* Compute probability for each block. */
4294 void
4295 pass_vsetvl::compute_probabilities (void)
4297 /* Don't compute it in -O0 since we don't need it. */
4298 if (!optimize)
4299 return;
4300 edge e;
4301 edge_iterator ei;
4303 for (const bb_info *bb : crtl->ssa->bbs ())
4305 basic_block cfg_bb = bb->cfg_bb ();
4306 auto &curr_prob = get_block_info (cfg_bb).probability;
4308 /* GCC assume entry block (bb 0) are always so
4309 executed so set its probability as "always". */
4310 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4311 curr_prob = profile_probability::always ();
4312 /* Exit block (bb 1) is the block we don't need to process. */
4313 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4314 continue;
4316 gcc_assert (curr_prob.initialized_p ());
4317 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4319 auto &new_prob = get_block_info (e->dest).probability;
4320 /* Normally, the edge probability should be initialized.
4321 However, some special testing code which is written in
4322 GIMPLE IR style force the edge probility uninitialized,
4323 we conservatively set it as never so that it will not
4324 affect PRE (Phase 3 && Phse 4). */
4325 if (!e->probability.initialized_p ())
4326 new_prob = profile_probability::never ();
4327 else if (!new_prob.initialized_p ())
4328 new_prob = curr_prob * e->probability;
4329 else if (new_prob == profile_probability::always ())
4330 continue;
4331 else
4332 new_prob += curr_prob * e->probability;
4337 /* Lazy vsetvl insertion for optimize > 0. */
4338 void
4339 pass_vsetvl::lazy_vsetvl (void)
4341 if (dump_file)
4342 fprintf (dump_file,
4343 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4344 "function:%s\n",
4345 n_basic_blocks_for_fn (cfun), function_name (cfun));
4347 /* Phase 1 - Compute the local dems within each block.
4348 The data-flow analysis within each block is backward analysis. */
4349 if (dump_file)
4350 fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
4351 for (const bb_info *bb : crtl->ssa->bbs ())
4352 compute_local_backward_infos (bb);
4353 if (dump_file && (dump_flags & TDF_DETAILS))
4354 m_vector_manager->dump (dump_file);
4356 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4357 demand, compute and save ANTLOC && AVLOC of each block. */
4358 if (dump_file)
4359 fprintf (dump_file,
4360 "\nPhase 2: Emit vsetvl instruction within each block\n");
4361 for (const bb_info *bb : crtl->ssa->bbs ())
4362 emit_local_forward_vsetvls (bb);
4363 if (dump_file && (dump_flags & TDF_DETAILS))
4364 m_vector_manager->dump (dump_file);
4366 /* Phase 3 - Propagate demanded info across blocks. */
4367 if (dump_file)
4368 fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
4369 vsetvl_fusion ();
4371 /* Phase 4 - Lazy code motion. */
4372 if (dump_file)
4373 fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4374 pre_vsetvl ();
4376 /* Phase 5 - Post optimization base on RTL_SSA. */
4377 if (dump_file)
4378 fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n");
4379 ssa_post_optimization ();
4381 /* Phase 6 - Post optimization base on data-flow analysis. */
4382 if (dump_file)
4383 fprintf (dump_file,
4384 "\nPhase 6: Post optimization base on data-flow analysis\n");
4385 df_post_optimization ();
4388 /* Main entry point for this pass. */
4389 unsigned int
4390 pass_vsetvl::execute (function *)
4392 if (n_basic_blocks_for_fn (cfun) <= 0)
4393 return 0;
4395 /* The RVV instruction may change after split which is not a stable
4396 instruction. We need to split it here to avoid potential issue
4397 since the VSETVL PASS is insert before split PASS. */
4398 split_all_insns ();
4400 /* Early return for there is no vector instructions. */
4401 if (!has_vector_insn (cfun))
4402 return 0;
4404 init ();
4406 if (!optimize)
4407 simple_vsetvl ();
4408 else
4409 lazy_vsetvl ();
4411 done ();
4412 return 0;
4415 rtl_opt_pass *
4416 make_pass_vsetvl (gcc::context *ctxt)
4418 return new pass_vsetvl (ctxt);