1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51 vect_free_slp_tree (slp_tree node
)
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
57 vect_free_slp_tree (child
);
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt
))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
68 SLP_TREE_CHILDREN (node
).release ();
69 SLP_TREE_SCALAR_STMTS (node
).release ();
70 SLP_TREE_VEC_STMTS (node
).release ();
71 SLP_TREE_LOAD_PERMUTATION (node
).release ();
77 /* Free the memory allocated for the SLP instance. */
80 vect_free_slp_instance (slp_instance instance
)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
83 SLP_INSTANCE_LOADS (instance
).release ();
88 /* Create an SLP node for SCALAR_STMTS. */
91 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
94 gimple
*stmt
= scalar_stmts
[0];
97 if (is_gimple_call (stmt
))
98 nops
= gimple_call_num_args (stmt
);
99 else if (is_gimple_assign (stmt
))
101 nops
= gimple_num_ops (stmt
) - 1;
102 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
105 else if (gimple_code (stmt
) == GIMPLE_PHI
)
110 node
= XNEW (struct _slp_tree
);
111 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
112 SLP_TREE_VEC_STMTS (node
).create (0);
113 SLP_TREE_CHILDREN (node
).create (nops
);
114 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
115 SLP_TREE_TWO_OPERATORS (node
) = false;
116 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
119 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec
<gimple
*> def_stmts
;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
137 enum vect_def_type first_dt
;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
145 static vec
<slp_oprnd_info
>
146 vect_create_oprnd_info (int nops
, int group_size
)
149 slp_oprnd_info oprnd_info
;
150 vec
<slp_oprnd_info
> oprnds_info
;
152 oprnds_info
.create (nops
);
153 for (i
= 0; i
< nops
; i
++)
155 oprnd_info
= XNEW (struct _slp_oprnd_info
);
156 oprnd_info
->def_stmts
.create (group_size
);
157 oprnd_info
->first_dt
= vect_uninitialized_def
;
158 oprnd_info
->first_op_type
= NULL_TREE
;
159 oprnd_info
->first_pattern
= false;
160 oprnd_info
->second_pattern
= false;
161 oprnds_info
.quick_push (oprnd_info
);
168 /* Free operands info. */
171 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
174 slp_oprnd_info oprnd_info
;
176 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
178 oprnd_info
->def_stmts
.release ();
179 XDELETE (oprnd_info
);
182 oprnds_info
.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
190 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
192 gimple
*next_stmt
= first_stmt
;
195 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
200 if (next_stmt
== stmt
)
202 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
204 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
225 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
226 gimple
*stmt
, unsigned stmt_num
,
227 vec
<slp_oprnd_info
> *oprnds_info
)
230 unsigned int i
, number_of_oprnds
;
232 enum vect_def_type dt
= vect_uninitialized_def
;
233 bool pattern
= false;
234 slp_oprnd_info oprnd_info
;
235 int first_op_idx
= 1;
236 bool commutative
= false;
237 bool first_op_cond
= false;
238 bool first
= stmt_num
== 0;
239 bool second
= stmt_num
== 1;
241 if (is_gimple_call (stmt
))
243 number_of_oprnds
= gimple_call_num_args (stmt
);
246 else if (is_gimple_assign (stmt
))
248 enum tree_code code
= gimple_assign_rhs_code (stmt
);
249 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
255 first_op_cond
= true;
259 commutative
= commutative_tree_code (code
);
264 bool swapped
= (*swap
!= 0);
265 gcc_assert (!swapped
|| first_op_cond
);
266 for (i
= 0; i
< number_of_oprnds
; i
++)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map
= maps
[*swap
];
276 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
278 oprnd
= gimple_op (stmt
, map
[i
]);
281 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
283 oprnd_info
= (*oprnds_info
)[i
];
285 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
301 if (def_stmt
&& gimple_bb (def_stmt
)
302 && vect_stmt_in_region_p (vinfo
, def_stmt
)
303 && vinfo_for_stmt (def_stmt
)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
309 if (!first
&& !oprnd_info
->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info
->first_dt
!= vect_reduction_def
313 || (!second
&& !oprnd_info
->second_pattern
)))
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
329 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
335 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
336 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
338 if (dt
== vect_unknown_def_type
)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
342 "Unsupported pattern.\n");
346 switch (gimple_code (def_stmt
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "unsupported defining stmt:\n");
361 oprnd_info
->second_pattern
= pattern
;
365 oprnd_info
->first_dt
= dt
;
366 oprnd_info
->first_pattern
= pattern
;
367 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info
->first_dt
!= dt
377 && !(oprnd_info
->first_dt
== vect_reduction_def
378 && dt
== vect_internal_def
)
379 && !((oprnd_info
->first_dt
== vect_external_def
380 || oprnd_info
->first_dt
== vect_constant_def
)
381 && (dt
== vect_external_def
382 || dt
== vect_constant_def
)))
383 || !types_compatible_p (oprnd_info
->first_op_type
,
386 /* Try swapping operands if we got a mismatch. */
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
397 "Build SLP failed: different types\n");
403 /* Check the types of the definitions. */
406 case vect_constant_def
:
407 case vect_external_def
:
410 case vect_reduction_def
:
411 case vect_induction_def
:
412 case vect_internal_def
:
413 oprnd_info
->def_stmts
.quick_push (def_stmt
);
417 /* FORNOW: Not supported. */
418 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
421 "Build SLP failed: illegal type of def ");
422 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
423 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
433 /* If there are already uses of this stmt in a SLP instance then
434 we've committed to the operand order and can't swap it. */
435 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
437 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
440 "Build SLP failed: cannot swap operands of "
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
449 tree cond
= gimple_assign_rhs1 (stmt
);
450 enum tree_code code
= TREE_CODE (cond
);
455 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
456 &TREE_OPERAND (cond
, 1));
457 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
462 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
463 gimple_assign_rhs3_ptr (stmt
));
464 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
465 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
466 gcc_assert (code
!= ERROR_MARK
);
467 TREE_SET_CODE (cond
, code
);
471 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
472 gimple_assign_rhs2_ptr (stmt
));
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
,
476 "swapped operands to match def types in ");
477 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
485 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
486 caller's attempt to find the vector type in STMT with the narrowest
487 element type. Return true if VECTYPE is nonnull and if it is valid
488 for VINFO. When returning true, update MAX_NUNITS to reflect the
489 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
490 as for vect_build_slp_tree. */
493 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
494 tree vectype
, poly_uint64
*max_nunits
)
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
501 "Build SLP failed: unsupported data-type in ");
502 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
503 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
505 /* Fatal mismatch. */
509 /* If populating the vector type requires unrolling then fail
510 before adjusting *max_nunits for basic-block vectorization. */
511 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
512 unsigned HOST_WIDE_INT const_nunits
;
513 if (is_a
<bb_vec_info
> (vinfo
)
514 && (!nunits
.is_constant (&const_nunits
)
515 || const_nunits
> group_size
))
517 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
518 "Build SLP failed: unrolling required "
519 "in basic block SLP\n");
520 /* Fatal mismatch. */
524 /* In case of multiple types we need to detect the smallest type. */
525 vect_update_max_nunits (max_nunits
, vectype
);
529 /* Verify if the scalar stmts STMTS are isomorphic, require data
530 permutation or are of unsupported types of operation. Return
531 true if they are, otherwise return false and indicate in *MATCHES
532 which stmts are not isomorphic to the first one. If MATCHES[0]
533 is false then this indicates the comparison could not be
534 carried out or the stmts will never be vectorized by SLP.
536 Note COND_EXPR is possibly ismorphic to another one after swapping its
537 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
538 the first stmt by swapping the two operands of comparison; set SWAP[i]
539 to 2 if stmt I is isormorphic to the first stmt by inverting the code
540 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
541 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
544 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
545 vec
<gimple
*> stmts
, unsigned int group_size
,
546 unsigned nops
, poly_uint64
*max_nunits
,
547 bool *matches
, bool *two_operators
)
550 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
551 enum tree_code first_stmt_code
= ERROR_MARK
;
552 enum tree_code alt_stmt_code
= ERROR_MARK
;
553 enum tree_code rhs_code
= ERROR_MARK
;
554 enum tree_code first_cond_code
= ERROR_MARK
;
556 bool need_same_oprnds
= false;
557 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
560 machine_mode optab_op2_mode
;
561 machine_mode vec_mode
;
563 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
565 /* For every stmt in NODE find its def stmt/s. */
566 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
571 if (dump_enabled_p ())
573 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
574 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
577 /* Fail to vectorize statements marked as unvectorizable. */
578 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
580 if (dump_enabled_p ())
582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
583 "Build SLP failed: unvectorizable statement ");
584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
586 /* Fatal mismatch. */
591 lhs
= gimple_get_lhs (stmt
);
592 if (lhs
== NULL_TREE
)
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
597 "Build SLP failed: not GIMPLE_ASSIGN nor "
599 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
601 /* Fatal mismatch. */
606 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
607 vectype
= get_vectype_for_scalar_type (scalar_type
);
608 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
611 /* Fatal mismatch. */
616 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
618 rhs_code
= CALL_EXPR
;
619 if (gimple_call_internal_p (call_stmt
)
620 || gimple_call_tail_p (call_stmt
)
621 || gimple_call_noreturn_p (call_stmt
)
622 || !gimple_call_nothrow_p (call_stmt
)
623 || gimple_call_chain (call_stmt
))
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
628 "Build SLP failed: unsupported call type ");
629 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
632 /* Fatal mismatch. */
638 rhs_code
= gimple_assign_rhs_code (stmt
);
640 /* Check the operation. */
643 first_stmt_code
= rhs_code
;
645 /* Shift arguments should be equal in all the packed stmts for a
646 vector shift with scalar shift operand. */
647 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
648 || rhs_code
== LROTATE_EXPR
649 || rhs_code
== RROTATE_EXPR
)
651 vec_mode
= TYPE_MODE (vectype
);
653 /* First see if we have a vector/vector shift. */
654 optab
= optab_for_tree_code (rhs_code
, vectype
,
658 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
660 /* No vector/vector shift, try for a vector/scalar shift. */
661 optab
= optab_for_tree_code (rhs_code
, vectype
,
666 if (dump_enabled_p ())
667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
668 "Build SLP failed: no optab.\n");
669 /* Fatal mismatch. */
673 icode
= (int) optab_handler (optab
, vec_mode
);
674 if (icode
== CODE_FOR_nothing
)
676 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
679 "op not supported by target.\n");
680 /* Fatal mismatch. */
684 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
685 if (!VECTOR_MODE_P (optab_op2_mode
))
687 need_same_oprnds
= true;
688 first_op1
= gimple_assign_rhs2 (stmt
);
692 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
694 need_same_oprnds
= true;
695 first_op1
= gimple_assign_rhs2 (stmt
);
700 if (first_stmt_code
!= rhs_code
701 && alt_stmt_code
== ERROR_MARK
)
702 alt_stmt_code
= rhs_code
;
703 if (first_stmt_code
!= rhs_code
704 && (first_stmt_code
!= IMAGPART_EXPR
705 || rhs_code
!= REALPART_EXPR
)
706 && (first_stmt_code
!= REALPART_EXPR
707 || rhs_code
!= IMAGPART_EXPR
)
708 /* Handle mismatches in plus/minus by computing both
709 and merging the results. */
710 && !((first_stmt_code
== PLUS_EXPR
711 || first_stmt_code
== MINUS_EXPR
)
712 && (alt_stmt_code
== PLUS_EXPR
713 || alt_stmt_code
== MINUS_EXPR
)
714 && rhs_code
== alt_stmt_code
)
715 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
716 && (first_stmt_code
== ARRAY_REF
717 || first_stmt_code
== BIT_FIELD_REF
718 || first_stmt_code
== INDIRECT_REF
719 || first_stmt_code
== COMPONENT_REF
720 || first_stmt_code
== MEM_REF
)))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
725 "Build SLP failed: different operation "
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
730 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
738 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
740 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
743 "Build SLP failed: different shift "
745 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
751 if (rhs_code
== CALL_EXPR
)
753 gimple
*first_stmt
= stmts
[0];
754 if (gimple_call_num_args (stmt
) != nops
755 || !operand_equal_p (gimple_call_fn (first_stmt
),
756 gimple_call_fn (stmt
), 0)
757 || gimple_call_fntype (first_stmt
)
758 != gimple_call_fntype (stmt
))
760 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
763 "Build SLP failed: different calls in ");
764 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
773 /* Grouped store or load. */
774 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
776 if (REFERENCE_CLASS_P (lhs
))
784 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
787 /* Check that there are no loads from different interleaving
788 chains in the same node. */
789 if (prev_first_load
!= first_load
)
791 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
795 "Build SLP failed: different "
796 "interleaving chains in one node ");
797 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
805 prev_first_load
= first_load
;
807 } /* Grouped access. */
810 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
812 /* Not grouped load. */
813 if (dump_enabled_p ())
815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
816 "Build SLP failed: not grouped load ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
820 /* FORNOW: Not grouped loads are not supported. */
821 /* Fatal mismatch. */
826 /* Not memory operation. */
827 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
828 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
829 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
830 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
831 && rhs_code
!= CALL_EXPR
)
833 if (dump_enabled_p ())
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
836 "Build SLP failed: operation");
837 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
838 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
840 /* Fatal mismatch. */
845 if (rhs_code
== COND_EXPR
)
847 tree cond_expr
= gimple_assign_rhs1 (stmt
);
848 enum tree_code cond_code
= TREE_CODE (cond_expr
);
849 enum tree_code swap_code
= ERROR_MARK
;
850 enum tree_code invert_code
= ERROR_MARK
;
853 first_cond_code
= TREE_CODE (cond_expr
);
854 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
856 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
857 swap_code
= swap_tree_comparison (cond_code
);
858 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
861 if (first_cond_code
== cond_code
)
863 /* Isomorphic can be achieved by swapping. */
864 else if (first_cond_code
== swap_code
)
866 /* Isomorphic can be achieved by inverting. */
867 else if (first_cond_code
== invert_code
)
871 if (dump_enabled_p ())
873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
874 "Build SLP failed: different"
876 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
888 for (i
= 0; i
< group_size
; ++i
)
892 /* If we allowed a two-operation SLP node verify the target can cope
893 with the permute we are going to use. */
894 if (alt_stmt_code
!= ERROR_MARK
895 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
897 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
898 vec_perm_builder
sel (count
, count
, 1);
899 for (i
= 0; i
< count
; ++i
)
901 unsigned int elt
= i
;
902 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
904 sel
.quick_push (elt
);
906 vec_perm_indices
indices (sel
, 2, count
);
907 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
909 for (i
= 0; i
< group_size
; ++i
)
910 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
913 if (dump_enabled_p ())
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
916 "Build SLP failed: different operation "
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
922 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
928 *two_operators
= true;
934 /* Traits for the hash_set to record failed SLP builds for a stmt set.
935 Note we never remove apart from at destruction time so we do not
936 need a special value for deleted that differs from empty. */
939 typedef vec
<gimple
*> value_type
;
940 typedef vec
<gimple
*> compare_type
;
941 static inline hashval_t
hash (value_type
);
942 static inline bool equal (value_type existing
, value_type candidate
);
943 static inline bool is_empty (value_type x
) { return !x
.exists (); }
944 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
945 static inline void mark_empty (value_type
&x
) { x
.release (); }
946 static inline void mark_deleted (value_type
&x
) { x
.release (); }
947 static inline void remove (value_type
&x
) { x
.release (); }
950 bst_traits::hash (value_type x
)
953 for (unsigned i
= 0; i
< x
.length (); ++i
)
954 h
.add_int (gimple_uid (x
[i
]));
958 bst_traits::equal (value_type existing
, value_type candidate
)
960 if (existing
.length () != candidate
.length ())
962 for (unsigned i
= 0; i
< existing
.length (); ++i
)
963 if (existing
[i
] != candidate
[i
])
968 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
969 static scalar_stmts_set_t
*bst_fail
;
972 vect_build_slp_tree_2 (vec_info
*vinfo
,
973 vec
<gimple
*> stmts
, unsigned int group_size
,
974 poly_uint64
*max_nunits
,
975 vec
<slp_tree
> *loads
,
976 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
977 unsigned max_tree_size
);
980 vect_build_slp_tree (vec_info
*vinfo
,
981 vec
<gimple
*> stmts
, unsigned int group_size
,
982 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
983 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
984 unsigned max_tree_size
)
986 if (bst_fail
->contains (stmts
))
988 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
989 loads
, matches
, npermutes
, tree_size
,
991 /* When SLP build fails for stmts record this, otherwise SLP build
992 can be exponential in time when we allow to construct parts from
993 scalars, see PR81723. */
997 x
.create (stmts
.length ());
1004 /* Recursively build an SLP tree starting from NODE.
1005 Fail (and return a value not equal to zero) if def-stmts are not
1006 isomorphic, require data permutation or are of unsupported types of
1007 operation. Otherwise, return 0.
1008 The value returned is the depth in the SLP tree where a mismatch
1012 vect_build_slp_tree_2 (vec_info
*vinfo
,
1013 vec
<gimple
*> stmts
, unsigned int group_size
,
1014 poly_uint64
*max_nunits
,
1015 vec
<slp_tree
> *loads
,
1016 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1017 unsigned max_tree_size
)
1019 unsigned nops
, i
, this_tree_size
= 0;
1020 poly_uint64 this_max_nunits
= *max_nunits
;
1027 if (is_gimple_call (stmt
))
1028 nops
= gimple_call_num_args (stmt
);
1029 else if (is_gimple_assign (stmt
))
1031 nops
= gimple_num_ops (stmt
) - 1;
1032 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1035 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1040 /* If the SLP node is a PHI (induction or reduction), terminate
1042 if (gimple_code (stmt
) == GIMPLE_PHI
)
1044 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1045 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1046 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1050 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1051 /* Induction from different IVs is not supported. */
1052 if (def_type
== vect_induction_def
)
1054 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1055 if (stmt
!= stmts
[0])
1060 /* Else def types have to match. */
1061 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1063 /* But for reduction chains only check on the first stmt. */
1064 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1065 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1067 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1071 node
= vect_create_new_slp_node (stmts
);
1076 bool two_operators
= false;
1077 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1078 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1079 stmts
, group_size
, nops
,
1080 &this_max_nunits
, matches
, &two_operators
))
1083 /* If the SLP node is a load, terminate the recursion. */
1084 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1085 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1087 *max_nunits
= this_max_nunits
;
1088 node
= vect_create_new_slp_node (stmts
);
1089 loads
->safe_push (node
);
1093 /* Get at the operands, verifying they are compatible. */
1094 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1095 slp_oprnd_info oprnd_info
;
1096 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1098 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1099 stmt
, i
, &oprnds_info
);
1101 matches
[(res
== -1) ? 0 : i
] = false;
1105 for (i
= 0; i
< group_size
; ++i
)
1108 vect_free_oprnd_info (oprnds_info
);
1112 auto_vec
<slp_tree
, 4> children
;
1113 auto_vec
<slp_tree
> this_loads
;
1118 max_tree_size
-= *tree_size
;
1120 /* Create SLP_TREE nodes for the definition node/s. */
1121 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1124 unsigned old_nloads
= this_loads
.length ();
1125 unsigned old_tree_size
= this_tree_size
;
1128 if (oprnd_info
->first_dt
!= vect_internal_def
1129 && oprnd_info
->first_dt
!= vect_reduction_def
1130 && oprnd_info
->first_dt
!= vect_induction_def
)
1133 if (++this_tree_size
> max_tree_size
)
1135 FOR_EACH_VEC_ELT (children
, j
, child
)
1136 vect_free_slp_tree (child
);
1137 vect_free_oprnd_info (oprnds_info
);
1141 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1142 group_size
, &this_max_nunits
,
1143 &this_loads
, matches
, npermutes
,
1145 max_tree_size
)) != NULL
)
1147 /* If we have all children of child built up from scalars then just
1148 throw that away and build it up this node from scalars. */
1149 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1150 /* ??? Rejecting patterns this way doesn't work. We'd have to
1151 do extra work to cancel the pattern so the uses see the
1153 && !is_pattern_stmt_p
1154 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1156 slp_tree grandchild
;
1158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1159 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1164 this_loads
.truncate (old_nloads
);
1165 this_tree_size
= old_tree_size
;
1166 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1167 vect_free_slp_tree (grandchild
);
1168 SLP_TREE_CHILDREN (child
).truncate (0);
1170 dump_printf_loc (MSG_NOTE
, vect_location
,
1171 "Building parent vector operands from "
1172 "scalars instead\n");
1173 oprnd_info
->def_stmts
= vNULL
;
1174 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1175 children
.safe_push (child
);
1180 oprnd_info
->def_stmts
= vNULL
;
1181 children
.safe_push (child
);
1185 /* If the SLP build failed fatally and we analyze a basic-block
1186 simply treat nodes we fail to build as externally defined
1187 (and thus build vectors from the scalar defs).
1188 The cost model will reject outright expensive cases.
1189 ??? This doesn't treat cases where permutation ultimatively
1190 fails (or we don't try permutation below). Ideally we'd
1191 even compute a permutation that will end up with the maximum
1193 if (is_a
<bb_vec_info
> (vinfo
)
1195 /* ??? Rejecting patterns this way doesn't work. We'd have to
1196 do extra work to cancel the pattern so the uses see the
1198 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1200 dump_printf_loc (MSG_NOTE
, vect_location
,
1201 "Building vector operands from scalars\n");
1202 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1203 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1204 children
.safe_push (child
);
1205 oprnd_info
->def_stmts
= vNULL
;
1209 /* If the SLP build for operand zero failed and operand zero
1210 and one can be commutated try that for the scalar stmts
1211 that failed the match. */
1213 /* A first scalar stmt mismatch signals a fatal mismatch. */
1215 /* ??? For COND_EXPRs we can swap the comparison operands
1216 as well as the arms under some constraints. */
1218 && oprnds_info
[1]->first_dt
== vect_internal_def
1219 && is_gimple_assign (stmt
)
1220 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1222 /* Do so only if the number of not successful permutes was nor more
1223 than a cut-ff as re-trying the recursive match on
1224 possibly each level of the tree would expose exponential
1228 /* Verify if we can safely swap or if we committed to a specific
1229 operand order already. */
1230 for (j
= 0; j
< group_size
; ++j
)
1233 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1238 "Build SLP failed: cannot swap operands "
1240 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1246 /* Swap mismatched definition stmts. */
1247 dump_printf_loc (MSG_NOTE
, vect_location
,
1248 "Re-trying with swapped operands of stmts ");
1249 for (j
= 0; j
< group_size
; ++j
)
1252 std::swap (oprnds_info
[0]->def_stmts
[j
],
1253 oprnds_info
[1]->def_stmts
[j
]);
1254 dump_printf (MSG_NOTE
, "%d ", j
);
1256 dump_printf (MSG_NOTE
, "\n");
1257 /* And try again with scratch 'matches' ... */
1258 bool *tem
= XALLOCAVEC (bool, group_size
);
1259 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1260 group_size
, &this_max_nunits
,
1261 &this_loads
, tem
, npermutes
,
1263 max_tree_size
)) != NULL
)
1265 /* ... so if successful we can apply the operand swapping
1266 to the GIMPLE IL. This is necessary because for example
1267 vect_get_slp_defs uses operand indexes and thus expects
1268 canonical operand order. This is also necessary even
1269 if we end up building the operand from scalars as
1270 we'll continue to process swapped operand two. */
1271 for (j
= 0; j
< group_size
; ++j
)
1273 gimple
*stmt
= stmts
[j
];
1274 gimple_set_plf (stmt
, GF_PLF_1
, false);
1276 for (j
= 0; j
< group_size
; ++j
)
1278 gimple
*stmt
= stmts
[j
];
1281 /* Avoid swapping operands twice. */
1282 if (gimple_plf (stmt
, GF_PLF_1
))
1284 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1285 gimple_assign_rhs2_ptr (stmt
));
1286 gimple_set_plf (stmt
, GF_PLF_1
, true);
1289 /* Verify we swap all duplicates or none. */
1291 for (j
= 0; j
< group_size
; ++j
)
1293 gimple
*stmt
= stmts
[j
];
1294 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1297 /* If we have all children of child built up from scalars then
1298 just throw that away and build it up this node from scalars. */
1299 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1300 /* ??? Rejecting patterns this way doesn't work. We'd have
1301 to do extra work to cancel the pattern so the uses see the
1303 && !is_pattern_stmt_p
1304 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1307 slp_tree grandchild
;
1309 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1310 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1315 this_loads
.truncate (old_nloads
);
1316 this_tree_size
= old_tree_size
;
1317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1318 vect_free_slp_tree (grandchild
);
1319 SLP_TREE_CHILDREN (child
).truncate (0);
1321 dump_printf_loc (MSG_NOTE
, vect_location
,
1322 "Building parent vector operands from "
1323 "scalars instead\n");
1324 oprnd_info
->def_stmts
= vNULL
;
1325 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1326 children
.safe_push (child
);
1331 oprnd_info
->def_stmts
= vNULL
;
1332 children
.safe_push (child
);
1340 gcc_assert (child
== NULL
);
1341 FOR_EACH_VEC_ELT (children
, j
, child
)
1342 vect_free_slp_tree (child
);
1343 vect_free_oprnd_info (oprnds_info
);
1347 vect_free_oprnd_info (oprnds_info
);
1350 *tree_size
+= this_tree_size
;
1351 *max_nunits
= this_max_nunits
;
1352 loads
->safe_splice (this_loads
);
1354 node
= vect_create_new_slp_node (stmts
);
1355 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1356 SLP_TREE_CHILDREN (node
).splice (children
);
1360 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1363 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1369 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1370 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1371 ? " (external)" : "");
1372 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1374 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1375 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1377 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1378 vect_print_slp_tree (dump_kind
, loc
, child
);
1382 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1383 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1384 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1385 stmts in NODE are to be marked. */
1388 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1394 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1397 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1398 if (j
< 0 || i
== j
)
1399 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1401 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1402 vect_mark_slp_stmts (child
, mark
, j
);
1406 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1409 vect_mark_slp_stmts_relevant (slp_tree node
)
1413 stmt_vec_info stmt_info
;
1416 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1419 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1421 stmt_info
= vinfo_for_stmt (stmt
);
1422 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1423 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1424 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1427 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1428 vect_mark_slp_stmts_relevant (child
);
1432 /* Rearrange the statements of NODE according to PERMUTATION. */
1435 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1436 vec
<unsigned> permutation
)
1439 vec
<gimple
*> tmp_stmts
;
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1444 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1446 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1447 tmp_stmts
.create (group_size
);
1448 tmp_stmts
.quick_grow_cleared (group_size
);
1450 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1451 tmp_stmts
[permutation
[i
]] = stmt
;
1453 SLP_TREE_SCALAR_STMTS (node
).release ();
1454 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1458 /* Attempt to reorder stmts in a reduction chain so that we don't
1459 require any load permutation. Return true if that was possible,
1460 otherwise return false. */
1463 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1465 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1468 slp_tree node
, load
;
1470 /* Compare all the permutation sequences to the first one. We know
1471 that at least one load is permuted. */
1472 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1473 if (!node
->load_permutation
.exists ())
1475 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1477 if (!load
->load_permutation
.exists ())
1479 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1480 if (lidx
!= node
->load_permutation
[j
])
1484 /* Check that the loads in the first sequence are different and there
1485 are no gaps between them. */
1486 auto_sbitmap
load_index (group_size
);
1487 bitmap_clear (load_index
);
1488 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1490 if (lidx
>= group_size
)
1492 if (bitmap_bit_p (load_index
, lidx
))
1495 bitmap_set_bit (load_index
, lidx
);
1497 for (i
= 0; i
< group_size
; i
++)
1498 if (!bitmap_bit_p (load_index
, i
))
1501 /* This permutation is valid for reduction. Since the order of the
1502 statements in the nodes is not important unless they are memory
1503 accesses, we can rearrange the statements in all the nodes
1504 according to the order of the loads. */
1505 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1506 node
->load_permutation
);
1508 /* We are done, no actual permutations need to be generated. */
1509 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1510 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1512 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1513 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1514 /* But we have to keep those permutations that are required because
1515 of handling of gaps. */
1516 if (known_eq (unrolling_factor
, 1U)
1517 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1518 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1519 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1521 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1522 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1528 /* Check if the required load permutations in the SLP instance
1529 SLP_INSTN are supported. */
1532 vect_supported_load_permutation_p (slp_instance slp_instn
)
1534 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1535 unsigned int i
, j
, k
, next
;
1537 gimple
*stmt
, *load
, *next_load
;
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1542 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1543 if (node
->load_permutation
.exists ())
1544 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1545 dump_printf (MSG_NOTE
, "%d ", next
);
1547 for (k
= 0; k
< group_size
; ++k
)
1548 dump_printf (MSG_NOTE
, "%d ", k
);
1549 dump_printf (MSG_NOTE
, "\n");
1552 /* In case of reduction every load permutation is allowed, since the order
1553 of the reduction statements is not important (as opposed to the case of
1554 grouped stores). The only condition we need to check is that all the
1555 load nodes are of the same size and have the same permutation (and then
1556 rearrange all the nodes of the SLP instance according to this
1559 /* Check that all the load nodes are of the same size. */
1560 /* ??? Can't we assert this? */
1561 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1562 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1565 node
= SLP_INSTANCE_TREE (slp_instn
);
1566 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1568 /* Reduction (there are no data-refs in the root).
1569 In reduction chain the order of the loads is not important. */
1570 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1571 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1572 vect_attempt_slp_rearrange_stmts (slp_instn
);
1574 /* In basic block vectorization we allow any subchain of an interleaving
1576 FORNOW: not supported in loop SLP because of realignment compications. */
1577 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1579 /* Check whether the loads in an instance form a subchain and thus
1580 no permutation is necessary. */
1581 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1583 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1585 bool subchain_p
= true;
1587 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1590 && (next_load
!= load
1591 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1596 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1599 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1602 stmt_vec_info group_info
1603 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1604 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1606 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1607 unsigned k
, maxk
= 0;
1608 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1611 /* In BB vectorization we may not actually use a loaded vector
1612 accessing elements in excess of GROUP_SIZE. */
1613 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1616 "BB vectorization with gaps at the end of "
1617 "a load is not supported\n");
1621 /* Verify the permutation can be generated. */
1624 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1625 1, slp_instn
, true, &n_perms
))
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1629 "unsupported load permutation\n");
1637 /* For loop vectorization verify we can generate the permutation. Be
1638 conservative about the vectorization factor, there are permutations
1639 that will use three vector inputs only starting from a specific factor
1640 and the vectorization factor is not yet final.
1641 ??? The SLP instance unrolling factor might not be the maximum one. */
1644 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1645 LOOP_VINFO_VECT_FACTOR
1646 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1647 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1648 if (node
->load_permutation
.exists ()
1649 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1650 slp_instn
, true, &n_perms
))
1657 /* Find the last store in SLP INSTANCE. */
1660 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1662 gimple
*last
= NULL
, *stmt
;
1664 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1666 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1667 if (is_pattern_stmt_p (stmt_vinfo
))
1668 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1670 last
= get_later_stmt (stmt
, last
);
1676 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1679 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1680 stmt_vector_for_cost
*prologue_cost_vec
,
1681 stmt_vector_for_cost
*body_cost_vec
,
1682 unsigned ncopies_for_cost
,
1683 scalar_stmts_set_t
* visited
)
1688 stmt_vec_info stmt_info
;
1691 /* If we already costed the exact same set of scalar stmts we're done.
1692 We share the generated vector stmts for those. */
1693 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1696 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1698 /* Recurse down the SLP tree. */
1699 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1700 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1701 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1702 body_cost_vec
, ncopies_for_cost
, visited
);
1704 /* Look at the first scalar stmt to determine the cost. */
1705 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1706 stmt_info
= vinfo_for_stmt (stmt
);
1707 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1709 vect_memory_access_type memory_access_type
1710 = (STMT_VINFO_STRIDED_P (stmt_info
)
1713 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1714 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1715 memory_access_type
, vect_uninitialized_def
,
1716 node
, prologue_cost_vec
, body_cost_vec
);
1719 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1720 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1722 /* If the load is permuted then the alignment is determined by
1723 the first group element not by the first scalar stmt DR. */
1724 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1725 stmt_info
= vinfo_for_stmt (stmt
);
1726 /* Record the cost for the permutation. */
1728 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1729 ncopies_for_cost
, instance
, true,
1731 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1732 stmt_info
, 0, vect_body
);
1733 unsigned assumed_nunits
1734 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1735 /* And adjust the number of loads performed. This handles
1736 redundancies as well as loads that are later dead. */
1737 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1738 bitmap_clear (perm
);
1739 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1740 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1741 ncopies_for_cost
= 0;
1742 bool load_seen
= false;
1743 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1745 if (i
% assumed_nunits
== 0)
1751 if (bitmap_bit_p (perm
, i
))
1756 gcc_assert (ncopies_for_cost
1757 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1758 + assumed_nunits
- 1) / assumed_nunits
);
1759 poly_uint64 uf
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1760 ncopies_for_cost
*= estimated_poly_value (uf
);
1762 /* Record the cost for the vector loads. */
1763 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1764 memory_access_type
, node
, prologue_cost_vec
,
1769 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1771 /* ncopies_for_cost is the number of IVs we generate. */
1772 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1773 stmt_info
, 0, vect_body
);
1775 /* Prologue cost for the initial values and step vector. */
1776 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1778 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1780 ? vector_load
: vec_construct
,
1781 stmt_info
, 0, vect_prologue
);
1782 record_stmt_cost (prologue_cost_vec
, 1,
1784 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1785 ? vector_load
: vec_construct
,
1786 stmt_info
, 0, vect_prologue
);
1788 /* ??? No easy way to get at the actual number of vector stmts
1789 to be geneated and thus the derived IVs. */
1793 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1794 stmt_info
, 0, vect_body
);
1795 if (SLP_TREE_TWO_OPERATORS (node
))
1797 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1798 stmt_info
, 0, vect_body
);
1799 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1800 stmt_info
, 0, vect_body
);
1804 /* Push SLP node def-type to stmts. */
1805 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1806 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1807 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1808 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1810 /* Scan operands and account for prologue cost of constants/externals.
1811 ??? This over-estimates cost for multiple uses and should be
1813 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1814 lhs
= gimple_get_lhs (stmt
);
1815 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1817 tree op
= gimple_op (stmt
, i
);
1819 enum vect_def_type dt
;
1820 if (!op
|| op
== lhs
)
1822 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1824 /* Without looking at the actual initializer a vector of
1825 constants can be implemented as load from the constant pool.
1826 ??? We need to pass down stmt_info for a vector type
1827 even if it points to the wrong stmt. */
1828 if (dt
== vect_constant_def
)
1829 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1830 stmt_info
, 0, vect_prologue
);
1831 else if (dt
== vect_external_def
)
1832 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1833 stmt_info
, 0, vect_prologue
);
1837 /* Restore stmt def-types. */
1838 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1839 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1840 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1841 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1844 /* Compute the cost for the SLP instance INSTANCE. */
1847 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1849 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1850 unsigned ncopies_for_cost
;
1851 stmt_info_for_cost
*si
;
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE
, vect_location
,
1856 "=== vect_analyze_slp_cost ===\n");
1858 /* Calculate the number of vector stmts to create based on the unrolling
1859 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1860 GROUP_SIZE / NUNITS otherwise. */
1861 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1862 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1863 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1864 /* Get the estimated vectorization factor, which is always one for
1865 basic-block vectorization. */
1866 unsigned int assumed_vf
;
1867 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1868 assumed_vf
= vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info
));
1871 /* For reductions look at a reduction operand in case the reduction
1872 operation is widening like DOT_PROD or SAD. */
1873 tree vectype_for_cost
= STMT_VINFO_VECTYPE (stmt_info
);
1874 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1876 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1877 switch (gimple_assign_rhs_code (stmt
))
1881 vectype_for_cost
= get_vectype_for_scalar_type
1882 (TREE_TYPE (gimple_assign_rhs1 (stmt
)));
1887 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype_for_cost
);
1888 ncopies_for_cost
= (least_common_multiple (assumed_nunits
,
1889 group_size
* assumed_vf
)
1892 prologue_cost_vec
.create (10);
1893 body_cost_vec
.create (10);
1894 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1895 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1896 &prologue_cost_vec
, &body_cost_vec
,
1897 ncopies_for_cost
, visited
);
1900 /* Record the prologue costs, which were delayed until we were
1901 sure that SLP was successful. */
1902 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1904 struct _stmt_vec_info
*stmt_info
1905 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1906 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1907 si
->misalign
, vect_prologue
);
1910 /* Record the instance's instructions in the target cost model. */
1911 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1913 struct _stmt_vec_info
*stmt_info
1914 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1915 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1916 si
->misalign
, vect_body
);
1919 prologue_cost_vec
.release ();
1920 body_cost_vec
.release ();
1923 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1924 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1925 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1926 containing the remainder.
1927 Return the first stmt in the second group. */
1930 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1932 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1933 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1934 gcc_assert (group1_size
> 0);
1935 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1936 gcc_assert (group2_size
> 0);
1937 GROUP_SIZE (first_vinfo
) = group1_size
;
1939 gimple
*stmt
= first_stmt
;
1940 for (unsigned i
= group1_size
; i
> 1; i
--)
1942 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1943 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1945 /* STMT is now the last element of the first group. */
1946 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1947 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1949 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1950 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1952 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1953 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1956 /* For the second group, the GROUP_GAP is that before the original group,
1957 plus skipping over the first vector. */
1958 GROUP_GAP (vinfo_for_stmt (group2
)) =
1959 GROUP_GAP (first_vinfo
) + group1_size
;
1961 /* GROUP_GAP of the first group now has to skip over the second group too. */
1962 GROUP_GAP (first_vinfo
) += group2_size
;
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1966 group1_size
, group2_size
);
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1975 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1977 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1985 vect_analyze_slp_instance (vec_info
*vinfo
,
1986 gimple
*stmt
, unsigned max_tree_size
)
1988 slp_instance new_instance
;
1990 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1991 tree vectype
, scalar_type
= NULL_TREE
;
1994 vec
<slp_tree
> loads
;
1995 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1996 vec
<gimple
*> scalar_stmts
;
1998 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2002 scalar_type
= TREE_TYPE (DR_REF (dr
));
2003 vectype
= get_vectype_for_scalar_type (scalar_type
);
2007 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2008 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2011 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2015 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2016 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2017 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2022 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2025 "Build SLP failed: unsupported data-type ");
2026 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2027 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2032 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts
.create (group_size
);
2037 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2042 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2043 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2044 scalar_stmts
.safe_push (
2045 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2047 scalar_stmts
.safe_push (next
);
2048 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2050 /* Mark the first element of the reduction chain as reduction to properly
2051 transform the node. In the reduction analysis phase only the last
2052 element of the chain is marked as reduction. */
2053 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2054 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2058 /* Collect reduction statements. */
2059 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2060 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2061 scalar_stmts
.safe_push (next
);
2064 loads
.create (group_size
);
2066 /* Build the tree for the SLP instance. */
2067 bool *matches
= XALLOCAVEC (bool, group_size
);
2068 unsigned npermutes
= 0;
2069 bst_fail
= new scalar_stmts_set_t ();
2070 poly_uint64 max_nunits
= nunits
;
2071 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2072 &max_nunits
, &loads
, matches
, &npermutes
,
2073 NULL
, max_tree_size
);
2077 /* Calculate the unrolling factor based on the smallest type. */
2078 poly_uint64 unrolling_factor
2079 = calculate_unrolling_factor (max_nunits
, group_size
);
2081 if (maybe_ne (unrolling_factor
, 1U)
2082 && is_a
<bb_vec_info
> (vinfo
))
2084 unsigned HOST_WIDE_INT const_max_nunits
;
2085 if (!max_nunits
.is_constant (&const_max_nunits
)
2086 || const_max_nunits
> group_size
)
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2090 "Build SLP failed: store group "
2091 "size not a multiple of the vector size "
2092 "in basic block SLP\n");
2093 vect_free_slp_tree (node
);
2097 /* Fatal mismatch. */
2098 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2099 vect_free_slp_tree (node
);
2104 /* Create a new SLP instance. */
2105 new_instance
= XNEW (struct _slp_instance
);
2106 SLP_INSTANCE_TREE (new_instance
) = node
;
2107 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2108 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2109 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2111 /* Compute the load permutation. */
2113 bool loads_permuted
= false;
2114 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2116 vec
<unsigned> load_permutation
;
2118 gimple
*load
, *first_stmt
;
2119 bool this_load_permuted
= false;
2120 load_permutation
.create (group_size
);
2121 first_stmt
= GROUP_FIRST_ELEMENT
2122 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2123 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2125 int load_place
= vect_get_place_in_interleaving_chain
2127 gcc_assert (load_place
!= -1);
2128 if (load_place
!= j
)
2129 this_load_permuted
= true;
2130 load_permutation
.safe_push (load_place
);
2132 if (!this_load_permuted
2133 /* The load requires permutation when unrolling exposes
2134 a gap either because the group is larger than the SLP
2135 group-size or because there is a gap between the groups. */
2136 && (known_eq (unrolling_factor
, 1U)
2137 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2138 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2140 load_permutation
.release ();
2143 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2144 loads_permuted
= true;
2149 if (!vect_supported_load_permutation_p (new_instance
))
2151 if (dump_enabled_p ())
2153 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2154 "Build SLP failed: unsupported load "
2156 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2159 vect_free_slp_instance (new_instance
);
2164 /* If the loads and stores can be handled with load/store-lan
2165 instructions do not generate this SLP instance. */
2166 if (is_a
<loop_vec_info
> (vinfo
)
2168 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2171 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2173 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2174 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2175 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2176 /* Use SLP for strided accesses (or if we
2177 can't load-lanes). */
2178 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2179 || ! vect_load_lanes_supported
2180 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2181 GROUP_SIZE (stmt_vinfo
)))
2184 if (i
== loads
.length ())
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2188 "Built SLP cancelled: can use "
2189 "load/store-lanes\n");
2190 vect_free_slp_instance (new_instance
);
2195 vinfo
->slp_instances
.safe_push (new_instance
);
2197 if (dump_enabled_p ())
2199 dump_printf_loc (MSG_NOTE
, vect_location
,
2200 "Final SLP tree for instance:\n");
2201 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2209 /* Failed to SLP. */
2210 /* Free the allocated memory. */
2211 scalar_stmts
.release ();
2215 /* For basic block SLP, try to break the group up into multiples of the
2217 unsigned HOST_WIDE_INT const_nunits
;
2218 if (is_a
<bb_vec_info
> (vinfo
)
2219 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2220 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2221 && nunits
.is_constant (&const_nunits
))
2223 /* We consider breaking the group only on VF boundaries from the existing
2225 for (i
= 0; i
< group_size
; i
++)
2226 if (!matches
[i
]) break;
2228 if (i
>= const_nunits
&& i
< group_size
)
2230 /* Split into two groups at the first vector boundary before i. */
2231 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2232 unsigned group1_size
= i
& ~(const_nunits
- 1);
2234 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2235 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2236 /* If the first non-match was in the middle of a vector,
2237 skip the rest of that vector. */
2238 if (group1_size
< i
)
2240 i
= group1_size
+ const_nunits
;
2242 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2245 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2248 /* Even though the first vector did not all match, we might be able to SLP
2249 (some) of the remainder. FORNOW ignore this possibility. */
2256 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2257 trees of packed scalar stmts if SLP is possible. */
2260 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2263 gimple
*first_element
;
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2268 /* Find SLP sequences starting from groups of grouped stores. */
2269 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2270 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2272 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2274 if (loop_vinfo
->reduction_chains
.length () > 0)
2276 /* Find SLP sequences starting from reduction chains. */
2277 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2278 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2281 /* Dissolve reduction chain group. */
2282 gimple
*next
, *stmt
= first_element
;
2285 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2286 next
= GROUP_NEXT_ELEMENT (vinfo
);
2287 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2288 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2291 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2292 = vect_internal_def
;
2296 /* Find SLP sequences starting from groups of reductions. */
2297 if (loop_vinfo
->reductions
.length () > 1)
2298 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2306 /* For each possible SLP instance decide whether to SLP it and calculate overall
2307 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2308 least one instance. */
2311 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2314 poly_uint64 unrolling_factor
= 1;
2315 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2316 slp_instance instance
;
2317 int decided_to_slp
= 0;
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2323 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2325 /* FORNOW: SLP if you can. */
2326 /* All unroll factors have the form current_vector_size * X for some
2327 rational X, so they must have a common multiple. */
2329 = force_common_multiple (unrolling_factor
,
2330 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2332 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2333 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2334 loop-based vectorization. Such stmts will be marked as HYBRID. */
2335 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2339 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2341 if (decided_to_slp
&& dump_enabled_p ())
2343 dump_printf_loc (MSG_NOTE
, vect_location
,
2344 "Decided to SLP %d instances. Unrolling factor ",
2346 dump_dec (MSG_NOTE
, unrolling_factor
);
2347 dump_printf (MSG_NOTE
, "\n");
2350 return (decided_to_slp
> 0);
2354 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2355 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2358 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2360 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2361 imm_use_iterator imm_iter
;
2363 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2365 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2366 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2369 /* Propagate hybrid down the SLP tree. */
2370 if (stype
== hybrid
)
2372 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2376 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2377 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2378 /* If we get a pattern stmt here we have to use the LHS of the
2379 original stmt for immediate uses. */
2380 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2381 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2382 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2384 if (gimple_code (stmt
) == GIMPLE_PHI
)
2385 def
= gimple_phi_result (stmt
);
2387 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2389 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2391 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2393 use_vinfo
= vinfo_for_stmt (use_stmt
);
2394 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2395 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2396 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2397 if (!STMT_SLP_TYPE (use_vinfo
)
2398 && (STMT_VINFO_RELEVANT (use_vinfo
)
2399 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2400 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2401 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2403 if (dump_enabled_p ())
2405 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2406 "def in non-SLP stmt: ");
2407 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2415 && !HYBRID_SLP_STMT (stmt_vinfo
))
2417 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2420 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2422 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2425 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2426 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2427 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2430 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2433 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2435 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2436 struct loop
*loopp
= (struct loop
*)wi
->info
;
2441 if (TREE_CODE (*tp
) == SSA_NAME
2442 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2444 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2445 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2446 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2448 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2451 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2453 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2461 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2464 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2465 /* If the stmt is in a SLP instance then this isn't a reason
2466 to mark use definitions in other SLP instances as hybrid. */
2467 if (! STMT_SLP_TYPE (use_vinfo
)
2468 && (STMT_VINFO_RELEVANT (use_vinfo
)
2469 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2470 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2471 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2478 /* Find stmts that must be both vectorized and SLPed. */
2481 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2484 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2485 slp_instance instance
;
2487 if (dump_enabled_p ())
2488 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2491 /* First walk all pattern stmt in the loop and mark defs of uses as
2492 hybrid because immediate uses in them are not recorded. */
2493 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2495 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2496 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2499 gimple
*stmt
= gsi_stmt (gsi
);
2500 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2501 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2504 memset (&wi
, 0, sizeof (wi
));
2505 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2506 gimple_stmt_iterator gsi2
2507 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2508 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2509 vect_detect_hybrid_slp_1
, &wi
);
2510 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2511 vect_detect_hybrid_slp_2
,
2512 vect_detect_hybrid_slp_1
, &wi
);
2517 /* Then walk the SLP instance trees marking stmts with uses in
2518 non-SLP stmts as hybrid, also propagating hybrid down the
2519 SLP tree, collecting the above info on-the-fly. */
2520 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2522 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2523 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2529 /* Initialize a bb_vec_info struct for the statements between
2530 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2532 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2533 gimple_stmt_iterator region_end_in
)
2534 : vec_info (vec_info::bb
, init_cost (NULL
)),
2535 bb (gsi_bb (region_begin_in
)),
2536 region_begin (region_begin_in
),
2537 region_end (region_end_in
)
2539 gimple_stmt_iterator gsi
;
2541 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2544 gimple
*stmt
= gsi_stmt (gsi
);
2545 gimple_set_uid (stmt
, 0);
2546 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2553 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2554 stmts in the basic block. */
2556 _bb_vec_info::~_bb_vec_info ()
2558 for (gimple_stmt_iterator si
= region_begin
;
2559 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2561 gimple
*stmt
= gsi_stmt (si
);
2562 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2565 /* Free stmt_vec_info. */
2566 free_stmt_vec_info (stmt
);
2568 /* Reset region marker. */
2569 gimple_set_uid (stmt
, -1);
2576 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2577 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2579 Return true if the operations are supported. */
2582 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2583 slp_instance node_instance
)
2590 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2594 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2597 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2598 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2599 gcc_assert (stmt_info
);
2600 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2602 /* For BB vectorization vector types are assigned here.
2603 Memory accesses already got their vector type assigned
2604 in vect_analyze_data_refs. */
2605 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2607 && ! STMT_VINFO_DATA_REF (stmt_info
))
2609 gcc_assert (PURE_SLP_STMT (stmt_info
));
2611 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2612 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_NOTE
, vect_location
,
2615 "get vectype for scalar type: ");
2616 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2617 dump_printf (MSG_NOTE
, "\n");
2620 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2623 if (dump_enabled_p ())
2625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2626 "not SLPed: unsupported data-type ");
2627 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2629 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2634 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2637 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2638 dump_printf (MSG_NOTE
, "\n");
2642 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2643 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2646 /* Calculate the number of vector statements to be created for the
2647 scalar stmts in this node. For SLP reductions it is equal to the
2648 number of vector statements in the children (which has already been
2649 calculated by the recursive call). Otherwise it is the number of
2650 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2651 VF divided by the number of elements in a vector. */
2652 if (GROUP_FIRST_ELEMENT (stmt_info
)
2653 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2654 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2655 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2659 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2660 vf
= loop_vinfo
->vectorization_factor
;
2663 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2664 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2665 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2666 = vect_get_num_vectors (vf
* group_size
, vectype
);
2669 /* Push SLP node def-type to stmt operands. */
2670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2671 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2672 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2673 = SLP_TREE_DEF_TYPE (child
);
2674 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2675 /* Restore def-types. */
2676 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2677 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2678 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2679 = vect_internal_def
;
2687 /* Analyze statements in SLP instances of VINFO. Return true if the
2688 operations are supported. */
2691 vect_slp_analyze_operations (vec_info
*vinfo
)
2693 slp_instance instance
;
2696 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_NOTE
, vect_location
,
2698 "=== vect_slp_analyze_operations ===\n");
2700 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2702 if (!vect_slp_analyze_node_operations (vinfo
,
2703 SLP_INSTANCE_TREE (instance
),
2706 dump_printf_loc (MSG_NOTE
, vect_location
,
2707 "removing SLP instance operations starting from: ");
2708 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2709 SLP_TREE_SCALAR_STMTS
2710 (SLP_INSTANCE_TREE (instance
))[0], 0);
2711 vect_free_slp_instance (instance
);
2712 vinfo
->slp_instances
.ordered_remove (i
);
2716 /* Compute the costs of the SLP instance. */
2717 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2722 return !vinfo
->slp_instances
.is_empty ();
2726 /* Compute the scalar cost of the SLP node NODE and its children
2727 and return it. Do not account defs that are marked in LIFE and
2728 update LIFE according to uses of NODE. */
2731 vect_bb_slp_scalar_cost (basic_block bb
,
2732 slp_tree node
, vec
<bool, va_heap
> *life
)
2734 unsigned scalar_cost
= 0;
2739 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2742 ssa_op_iter op_iter
;
2743 def_operand_p def_p
;
2744 stmt_vec_info stmt_info
;
2749 /* If there is a non-vectorized use of the defs then the scalar
2750 stmt is kept live in which case we do not account it or any
2751 required defs in the SLP children in the scalar cost. This
2752 way we make the vectorization more costly when compared to
2754 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2756 imm_use_iterator use_iter
;
2758 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2759 if (!is_gimple_debug (use_stmt
)
2760 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2762 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2765 BREAK_FROM_IMM_USE_STMT (use_iter
);
2771 /* Count scalar stmts only once. */
2772 if (gimple_visited_p (stmt
))
2774 gimple_set_visited (stmt
, true);
2776 stmt_info
= vinfo_for_stmt (stmt
);
2777 if (STMT_VINFO_DATA_REF (stmt_info
))
2779 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2780 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2782 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2785 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2787 scalar_cost
+= stmt_cost
;
2790 auto_vec
<bool, 20> subtree_life
;
2791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2793 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2795 /* Do not directly pass LIFE to the recursive call, copy it to
2796 confine changes in the callee to the current child/subtree. */
2797 subtree_life
.safe_splice (*life
);
2798 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2799 subtree_life
.truncate (0);
2806 /* Check if vectorization of the basic block is profitable. */
2809 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2811 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2812 slp_instance instance
;
2814 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2815 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2817 /* Calculate scalar cost. */
2818 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2820 auto_vec
<bool, 20> life
;
2821 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2822 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2823 SLP_INSTANCE_TREE (instance
),
2827 /* Unset visited flag. */
2828 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2829 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2830 gimple_set_visited (gsi_stmt (gsi
), false);
2832 /* Complete the target-specific cost calculation. */
2833 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2834 &vec_inside_cost
, &vec_epilogue_cost
);
2836 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2838 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2841 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2843 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2844 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2845 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2848 /* Vectorization is profitable if its cost is more than the cost of scalar
2849 version. Note that we err on the vector side for equal cost because
2850 the cost estimate is otherwise quite pessimistic (constant uses are
2851 free on the scalar side but cost a load on the vector side for
2853 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2859 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2860 if so and sets fatal to true if failure is independent of
2861 current_vector_size. */
2864 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2865 gimple_stmt_iterator region_end
,
2866 vec
<data_reference_p
> datarefs
, int n_stmts
,
2869 bb_vec_info bb_vinfo
;
2870 slp_instance instance
;
2872 poly_uint64 min_vf
= 2;
2874 /* The first group of checks is independent of the vector size. */
2877 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2881 "not vectorized: too many instructions in "
2883 free_data_refs (datarefs
);
2887 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2891 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2893 /* Analyze the data references. */
2895 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2897 if (dump_enabled_p ())
2898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2899 "not vectorized: unhandled data-ref in basic "
2906 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2908 if (dump_enabled_p ())
2909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2910 "not vectorized: not enough data-refs in "
2917 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2921 "not vectorized: unhandled data access in "
2928 /* If there are no grouped stores in the region there is no need
2929 to continue with pattern recog as vect_analyze_slp will fail
2931 if (bb_vinfo
->grouped_stores
.is_empty ())
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2935 "not vectorized: no grouped stores in "
2942 /* While the rest of the analysis below depends on it in some way. */
2945 vect_pattern_recog (bb_vinfo
);
2947 /* Check the SLP opportunities in the basic block, analyze and build SLP
2949 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2951 if (dump_enabled_p ())
2953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2954 "Failed to SLP the basic block.\n");
2955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2956 "not vectorized: failed to find SLP opportunities "
2957 "in basic block.\n");
2964 vect_record_base_alignments (bb_vinfo
);
2966 /* Analyze and verify the alignment of data references and the
2967 dependence in the SLP instances. */
2968 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2970 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2971 || ! vect_slp_analyze_instance_dependence (instance
))
2973 dump_printf_loc (MSG_NOTE
, vect_location
,
2974 "removing SLP instance operations starting from: ");
2975 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2976 SLP_TREE_SCALAR_STMTS
2977 (SLP_INSTANCE_TREE (instance
))[0], 0);
2978 vect_free_slp_instance (instance
);
2979 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2983 /* Mark all the statements that we want to vectorize as pure SLP and
2985 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2986 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2990 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2996 if (!vect_slp_analyze_operations (bb_vinfo
))
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3000 "not vectorized: bad operation in basic block.\n");
3006 /* Cost model: check if the vectorization is worthwhile. */
3007 if (!unlimited_cost_model (NULL
)
3008 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3012 "not vectorized: vectorization is not "
3019 if (dump_enabled_p ())
3020 dump_printf_loc (MSG_NOTE
, vect_location
,
3021 "Basic block will be vectorized using SLP\n");
3027 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3028 true if anything in the basic-block was vectorized. */
3031 vect_slp_bb (basic_block bb
)
3033 bb_vec_info bb_vinfo
;
3034 gimple_stmt_iterator gsi
;
3035 bool any_vectorized
= false;
3036 auto_vector_sizes vector_sizes
;
3038 if (dump_enabled_p ())
3039 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3041 /* Autodetect first vector size we try. */
3042 current_vector_size
= 0;
3043 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3044 unsigned int next_size
= 0;
3046 gsi
= gsi_start_bb (bb
);
3048 poly_uint64 autodetected_vector_size
= 0;
3051 if (gsi_end_p (gsi
))
3054 gimple_stmt_iterator region_begin
= gsi
;
3055 vec
<data_reference_p
> datarefs
= vNULL
;
3058 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3060 gimple
*stmt
= gsi_stmt (gsi
);
3061 if (is_gimple_debug (stmt
))
3065 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3066 vect_location
= gimple_location (stmt
);
3068 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3072 /* Skip leading unhandled stmts. */
3073 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3079 gimple_stmt_iterator region_end
= gsi
;
3081 bool vectorized
= false;
3083 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3084 datarefs
, insns
, fatal
);
3086 && dbg_cnt (vect_slp
))
3088 if (dump_enabled_p ())
3089 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3091 vect_schedule_slp (bb_vinfo
);
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_NOTE
, vect_location
,
3095 "basic block part vectorized\n");
3101 any_vectorized
|= vectorized
;
3104 autodetected_vector_size
= current_vector_size
;
3106 if (next_size
< vector_sizes
.length ()
3107 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3111 || next_size
== vector_sizes
.length ()
3112 || known_eq (current_vector_size
, 0U)
3113 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3114 vector sizes will fail do not bother iterating. */
3117 if (gsi_end_p (region_end
))
3120 /* Skip the unhandled stmt. */
3123 /* And reset vector sizes. */
3124 current_vector_size
= 0;
3129 /* Try the next biggest vector size. */
3130 current_vector_size
= vector_sizes
[next_size
++];
3131 if (dump_enabled_p ())
3133 dump_printf_loc (MSG_NOTE
, vect_location
,
3134 "***** Re-trying analysis with "
3136 dump_dec (MSG_NOTE
, current_vector_size
);
3137 dump_printf (MSG_NOTE
, "\n");
3145 return any_vectorized
;
3149 /* Return 1 if vector type of boolean constant which is OPNUM
3150 operand in statement STMT is a boolean vector. */
3153 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3155 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3156 enum tree_code code
= gimple_expr_code (stmt
);
3159 enum vect_def_type dt
;
3161 /* For comparison and COND_EXPR type is chosen depending
3162 on the other comparison operand. */
3163 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3166 op
= gimple_assign_rhs1 (stmt
);
3168 op
= gimple_assign_rhs2 (stmt
);
3170 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3174 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3177 if (code
== COND_EXPR
)
3179 tree cond
= gimple_assign_rhs1 (stmt
);
3181 if (TREE_CODE (cond
) == SSA_NAME
)
3184 op
= TREE_OPERAND (cond
, 1);
3186 op
= TREE_OPERAND (cond
, 0);
3188 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3192 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3195 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3199 /* For constant and loop invariant defs of SLP_NODE this function returns
3200 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3201 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3202 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3203 REDUC_INDEX is the index of the reduction operand in the statements, unless
3207 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3208 vec
<tree
> *vec_oprnds
,
3209 unsigned int op_num
, unsigned int number_of_vectors
)
3211 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3212 gimple
*stmt
= stmts
[0];
3213 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3216 unsigned j
, number_of_places_left_in_vector
;
3219 int group_size
= stmts
.length ();
3220 unsigned int vec_num
, i
;
3221 unsigned number_of_copies
= 1;
3223 voprnds
.create (number_of_vectors
);
3224 bool constant_p
, is_store
;
3225 tree neutral_op
= NULL
;
3226 enum tree_code code
= gimple_expr_code (stmt
);
3227 gimple_seq ctor_seq
= NULL
;
3229 /* Check if vector type is a boolean vector. */
3230 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3231 && vect_mask_constant_operand_p (stmt
, op_num
))
3233 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3235 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3236 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3238 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3241 op
= gimple_assign_rhs1 (stmt
);
3248 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3249 created vectors. It is greater than 1 if unrolling is performed.
3251 For example, we have two scalar operands, s1 and s2 (e.g., group of
3252 strided accesses of size two), while NUNITS is four (i.e., four scalars
3253 of this type can be packed in a vector). The output vector will contain
3254 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3257 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3258 containing the operands.
3260 For example, NUNITS is four as before, and the group size is 8
3261 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3262 {s5, s6, s7, s8}. */
3264 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3266 number_of_places_left_in_vector
= nunits
;
3268 tree_vector_builder
elts (vector_type
, nunits
, 1);
3269 elts
.quick_grow (nunits
);
3270 bool place_after_defs
= false;
3271 for (j
= 0; j
< number_of_copies
; j
++)
3273 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3276 op
= gimple_assign_rhs1 (stmt
);
3283 tree cond
= gimple_assign_rhs1 (stmt
);
3284 if (TREE_CODE (cond
) == SSA_NAME
)
3285 op
= gimple_op (stmt
, op_num
+ 1);
3286 else if (op_num
== 0 || op_num
== 1)
3287 op
= TREE_OPERAND (cond
, op_num
);
3291 op
= gimple_assign_rhs2 (stmt
);
3293 op
= gimple_assign_rhs3 (stmt
);
3299 op
= gimple_call_arg (stmt
, op_num
);
3306 op
= gimple_op (stmt
, op_num
+ 1);
3307 /* Unlike the other binary operators, shifts/rotates have
3308 the shift count being int, instead of the same type as
3309 the lhs, so make sure the scalar is the right type if
3310 we are dealing with vectors of
3311 long long/long/short/char. */
3312 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3313 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3317 op
= gimple_op (stmt
, op_num
+ 1);
3322 /* Create 'vect_ = {op0,op1,...,opn}'. */
3323 number_of_places_left_in_vector
--;
3325 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3327 if (CONSTANT_CLASS_P (op
))
3329 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3331 /* Can't use VIEW_CONVERT_EXPR for booleans because
3332 of possibly different sizes of scalar value and
3334 if (integer_zerop (op
))
3335 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3336 else if (integer_onep (op
))
3337 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3342 op
= fold_unary (VIEW_CONVERT_EXPR
,
3343 TREE_TYPE (vector_type
), op
);
3344 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3348 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3350 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3353 = build_all_ones_cst (TREE_TYPE (vector_type
));
3355 = build_zero_cst (TREE_TYPE (vector_type
));
3356 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3357 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3363 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3366 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3369 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3373 elts
[number_of_places_left_in_vector
] = op
;
3374 if (!CONSTANT_CLASS_P (op
))
3376 if (TREE_CODE (orig_op
) == SSA_NAME
3377 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3378 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3379 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3380 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3381 place_after_defs
= true;
3383 if (number_of_places_left_in_vector
== 0)
3386 vec_cst
= elts
.build ();
3389 vec
<constructor_elt
, va_gc
> *v
;
3391 vec_alloc (v
, nunits
);
3392 for (k
= 0; k
< nunits
; ++k
)
3393 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3394 vec_cst
= build_constructor (vector_type
, v
);
3397 gimple_stmt_iterator gsi
;
3398 if (place_after_defs
)
3401 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3402 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3405 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3406 if (ctor_seq
!= NULL
)
3408 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3409 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3413 voprnds
.quick_push (init
);
3414 place_after_defs
= false;
3415 number_of_places_left_in_vector
= nunits
;
3417 elts
.new_vector (vector_type
, nunits
, 1);
3418 elts
.quick_grow (nunits
);
3423 /* Since the vectors are created in the reverse order, we should invert
3425 vec_num
= voprnds
.length ();
3426 for (j
= vec_num
; j
!= 0; j
--)
3428 vop
= voprnds
[j
- 1];
3429 vec_oprnds
->quick_push (vop
);
3434 /* In case that VF is greater than the unrolling factor needed for the SLP
3435 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3436 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3437 to replicate the vectors. */
3438 while (number_of_vectors
> vec_oprnds
->length ())
3440 tree neutral_vec
= NULL
;
3445 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3447 vec_oprnds
->quick_push (neutral_vec
);
3451 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3452 vec_oprnds
->quick_push (vop
);
3458 /* Get vectorized definitions from SLP_NODE that contains corresponding
3459 vectorized def-stmts. */
3462 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3465 gimple
*vec_def_stmt
;
3468 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3470 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3472 gcc_assert (vec_def_stmt
);
3473 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3474 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3476 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3477 vec_oprnds
->quick_push (vec_oprnd
);
3482 /* Get vectorized definitions for SLP_NODE.
3483 If the scalar definitions are loop invariants or constants, collect them and
3484 call vect_get_constant_vectors() to create vector stmts.
3485 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3486 must be stored in the corresponding child of SLP_NODE, and we call
3487 vect_get_slp_vect_defs () to retrieve them. */
3490 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3491 vec
<vec
<tree
> > *vec_oprnds
)
3494 int number_of_vects
= 0, i
;
3495 unsigned int child_index
= 0;
3496 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3497 slp_tree child
= NULL
;
3500 bool vectorized_defs
;
3502 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3503 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3505 /* For each operand we check if it has vectorized definitions in a child
3506 node or we need to create them (for invariants and constants). We
3507 check if the LHS of the first stmt of the next child matches OPRND.
3508 If it does, we found the correct child. Otherwise, we call
3509 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3510 to check this child node for the next operand. */
3511 vectorized_defs
= false;
3512 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3514 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3516 /* We have to check both pattern and original def, if available. */
3517 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3519 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3521 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3524 if (gimple_code (first_def
) == GIMPLE_PHI
)
3525 first_def_op
= gimple_phi_result (first_def
);
3527 first_def_op
= gimple_get_lhs (first_def
);
3528 if (operand_equal_p (oprnd
, first_def_op
, 0)
3530 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3532 /* The number of vector defs is determined by the number of
3533 vector statements in the node from which we get those
3535 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3536 vectorized_defs
= true;
3544 if (!vectorized_defs
)
3548 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3549 /* Number of vector stmts was calculated according to LHS in
3550 vect_schedule_slp_instance (), fix it by replacing LHS with
3551 RHS, if necessary. See vect_get_smallest_scalar_type () for
3553 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3555 if (rhs_size_unit
!= lhs_size_unit
)
3557 number_of_vects
*= rhs_size_unit
;
3558 number_of_vects
/= lhs_size_unit
;
3563 /* Allocate memory for vectorized defs. */
3565 vec_defs
.create (number_of_vects
);
3567 /* For reduction defs we call vect_get_constant_vectors (), since we are
3568 looking for initial loop invariant values. */
3569 if (vectorized_defs
)
3570 /* The defs are already vectorized. */
3571 vect_get_slp_vect_defs (child
, &vec_defs
);
3573 /* Build vectors from scalar defs. */
3574 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3577 vec_oprnds
->quick_push (vec_defs
);
3581 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3582 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3583 permute statements for the SLP node NODE of the SLP instance
3584 SLP_NODE_INSTANCE. */
3587 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3588 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3589 slp_instance slp_node_instance
, bool analyze_only
,
3592 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3593 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3594 tree mask_element_type
= NULL_TREE
, mask_type
;
3595 int nunits
, vec_index
= 0;
3596 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3597 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3600 unsigned HOST_WIDE_INT const_vf
;
3602 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3605 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3607 mode
= TYPE_MODE (vectype
);
3609 /* At the moment, all permutations are represented using per-element
3610 indices, so we can't cope with variable vectorization factors. */
3611 if (!vf
.is_constant (&const_vf
))
3614 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3615 same size as the vector element being permuted. */
3616 mask_element_type
= lang_hooks
.types
.type_for_mode
3617 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3618 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3619 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3620 vec_perm_builder
mask (nunits
, nunits
, 1);
3621 mask
.quick_grow (nunits
);
3622 vec_perm_indices indices
;
3624 /* Initialize the vect stmts of NODE to properly insert the generated
3627 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3628 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3629 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3631 /* Generate permutation masks for every NODE. Number of masks for each NODE
3632 is equal to GROUP_SIZE.
3633 E.g., we have a group of three nodes with three loads from the same
3634 location in each node, and the vector size is 4. I.e., we have a
3635 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3636 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3637 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3640 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3641 The last mask is illegal since we assume two operands for permute
3642 operation, and the mask element values can't be outside that range.
3643 Hence, the last mask must be converted into {2,5,5,5}.
3644 For the first two permutations we need the first and the second input
3645 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3646 we need the second and the third vectors: {b1,c1,a2,b2} and
3649 int vect_stmts_counter
= 0;
3651 int first_vec_index
= -1;
3652 int second_vec_index
= -1;
3656 for (unsigned int j
= 0; j
< const_vf
; j
++)
3658 for (int k
= 0; k
< group_size
; k
++)
3660 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3661 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3662 vec_index
= i
/ nunits
;
3663 mask_element
= i
% nunits
;
3664 if (vec_index
== first_vec_index
3665 || first_vec_index
== -1)
3667 first_vec_index
= vec_index
;
3669 else if (vec_index
== second_vec_index
3670 || second_vec_index
== -1)
3672 second_vec_index
= vec_index
;
3673 mask_element
+= nunits
;
3677 if (dump_enabled_p ())
3679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3680 "permutation requires at "
3681 "least three vectors ");
3682 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3685 gcc_assert (analyze_only
);
3689 gcc_assert (mask_element
>= 0
3690 && mask_element
< 2 * nunits
);
3691 if (mask_element
!= index
)
3693 mask
[index
++] = mask_element
;
3695 if (index
== nunits
&& !noop_p
)
3697 indices
.new_vector (mask
, 2, nunits
);
3698 if (!can_vec_perm_const_p (mode
, indices
))
3700 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3704 "unsupported vect permute { ");
3705 for (i
= 0; i
< nunits
; ++i
)
3706 dump_printf (MSG_MISSED_OPTIMIZATION
,
3707 HOST_WIDE_INT_PRINT_DEC
" ", mask
[i
]);
3708 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3710 gcc_assert (analyze_only
);
3717 if (index
== nunits
)
3721 tree mask_vec
= NULL_TREE
;
3724 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3726 if (second_vec_index
== -1)
3727 second_vec_index
= first_vec_index
;
3729 /* Generate the permute statement if necessary. */
3730 tree first_vec
= dr_chain
[first_vec_index
];
3731 tree second_vec
= dr_chain
[second_vec_index
];
3736 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3738 perm_dest
= make_ssa_name (perm_dest
);
3739 perm_stmt
= gimple_build_assign (perm_dest
,
3741 first_vec
, second_vec
,
3743 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3746 /* If mask was NULL_TREE generate the requested
3747 identity transform. */
3748 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3750 /* Store the vector statement in NODE. */
3751 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3755 first_vec_index
= -1;
3756 second_vec_index
= -1;
3765 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3766 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3767 scalar_stmts_to_slp_tree_map_t
;
3769 /* Vectorize SLP instance tree in postorder. */
3772 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3773 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3776 bool grouped_store
, is_store
;
3777 gimple_stmt_iterator si
;
3778 stmt_vec_info stmt_info
;
3779 unsigned int group_size
;
3784 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3787 /* See if we have already vectorized the same set of stmts and reuse their
3788 vectorized stmts. */
3790 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
3793 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
3798 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3799 vect_schedule_slp_instance (child
, instance
, bst_map
);
3801 /* Push SLP node def-type to stmts. */
3802 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3803 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3804 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3805 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3807 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3808 stmt_info
= vinfo_for_stmt (stmt
);
3810 /* VECTYPE is the type of the destination. */
3811 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3812 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3814 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3815 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3817 if (dump_enabled_p ())
3819 dump_printf_loc (MSG_NOTE
,vect_location
,
3820 "------>vectorizing SLP node starting from: ");
3821 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3824 /* Vectorized stmts go before the last scalar stmt which is where
3825 all uses are ready. */
3826 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3828 /* Mark the first element of the reduction chain as reduction to properly
3829 transform the node. In the analysis phase only the last element of the
3830 chain is marked as reduction. */
3831 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3832 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3834 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3835 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3838 /* Handle two-operation SLP nodes by vectorizing the group with
3839 both operations and then performing a merge. */
3840 if (SLP_TREE_TWO_OPERATORS (node
))
3842 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3843 enum tree_code ocode
= ERROR_MARK
;
3845 vec_perm_builder
mask (group_size
, group_size
, 1);
3846 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3847 if (gimple_assign_rhs_code (ostmt
) != code0
)
3849 mask
.quick_push (1);
3850 ocode
= gimple_assign_rhs_code (ostmt
);
3853 mask
.quick_push (0);
3854 if (ocode
!= ERROR_MARK
)
3859 tree tmask
= NULL_TREE
;
3860 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3861 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3862 SLP_TREE_VEC_STMTS (node
).truncate (0);
3863 gimple_assign_set_rhs_code (stmt
, ocode
);
3864 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3865 gimple_assign_set_rhs_code (stmt
, code0
);
3866 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3867 SLP_TREE_VEC_STMTS (node
).truncate (0);
3868 tree meltype
= build_nonstandard_integer_type
3869 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3870 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3872 for (j
= 0; j
< v0
.length (); ++j
)
3874 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3875 tree_vector_builder
melts (mvectype
, nunits
, 1);
3876 for (l
= 0; l
< nunits
; ++l
)
3878 if (k
>= group_size
)
3880 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3881 melts
.quick_push (t
);
3883 tmask
= melts
.build ();
3885 /* ??? Not all targets support a VEC_PERM_EXPR with a
3886 constant mask that would translate to a vec_merge RTX
3887 (with their vec_perm_const_ok). We can either not
3888 vectorize in that case or let veclower do its job.
3889 Unfortunately that isn't too great and at least for
3890 plus/minus we'd eventually like to match targets
3891 vector addsub instructions. */
3893 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3895 gimple_assign_lhs (v0
[j
]),
3896 gimple_assign_lhs (v1
[j
]), tmask
);
3897 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3898 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3905 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3907 /* Restore stmt def-types. */
3908 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3909 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3910 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3911 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3916 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3917 For loop vectorization this is done in vectorizable_call, but for SLP
3918 it needs to be deferred until end of vect_schedule_slp, because multiple
3919 SLP instances may refer to the same scalar stmt. */
3922 vect_remove_slp_scalar_calls (slp_tree node
)
3924 gimple
*stmt
, *new_stmt
;
3925 gimple_stmt_iterator gsi
;
3929 stmt_vec_info stmt_info
;
3931 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3934 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3935 vect_remove_slp_scalar_calls (child
);
3937 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3939 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3941 stmt_info
= vinfo_for_stmt (stmt
);
3942 if (stmt_info
== NULL
3943 || is_pattern_stmt_p (stmt_info
)
3944 || !PURE_SLP_STMT (stmt_info
))
3946 lhs
= gimple_call_lhs (stmt
);
3947 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3948 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3949 set_vinfo_for_stmt (stmt
, NULL
);
3950 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3951 gsi
= gsi_for_stmt (stmt
);
3952 gsi_replace (&gsi
, new_stmt
, false);
3953 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3957 /* Generate vector code for all SLP instances in the loop/basic block. */
3960 vect_schedule_slp (vec_info
*vinfo
)
3962 vec
<slp_instance
> slp_instances
;
3963 slp_instance instance
;
3965 bool is_store
= false;
3967 slp_instances
= vinfo
->slp_instances
;
3968 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3970 /* Schedule the tree of INSTANCE. */
3971 scalar_stmts_to_slp_tree_map_t
*bst_map
3972 = new scalar_stmts_to_slp_tree_map_t ();
3973 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3976 if (dump_enabled_p ())
3977 dump_printf_loc (MSG_NOTE
, vect_location
,
3978 "vectorizing stmts using SLP.\n");
3981 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3983 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3986 gimple_stmt_iterator gsi
;
3988 /* Remove scalar call stmts. Do not do this for basic-block
3989 vectorization as not all uses may be vectorized.
3990 ??? Why should this be necessary? DCE should be able to
3991 remove the stmts itself.
3992 ??? For BB vectorization we can as well remove scalar
3993 stmts starting from the SLP tree root if they have no
3995 if (is_a
<loop_vec_info
> (vinfo
))
3996 vect_remove_slp_scalar_calls (root
);
3998 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3999 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4001 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4004 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4005 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4006 /* Free the attached stmt_vec_info and remove the stmt. */
4007 gsi
= gsi_for_stmt (store
);
4008 unlink_stmt_vdef (store
);
4009 gsi_remove (&gsi
, true);
4010 release_defs (store
);
4011 free_stmt_vec_info (store
);